OLD | NEW |
1 // Copyright 2014 the V8 project authors. All rights reserved. | 1 // Copyright 2014 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 | 5 |
6 #ifdef V8_I18N_SUPPORT | 6 #ifdef V8_I18N_SUPPORT |
7 #include "src/runtime/runtime-utils.h" | 7 #include "src/runtime/runtime-utils.h" |
8 | 8 |
9 #include "src/api.h" | 9 #include "src/api.h" |
10 #include "src/api-natives.h" | 10 #include "src/api-natives.h" |
11 #include "src/arguments.h" | 11 #include "src/arguments.h" |
12 #include "src/factory.h" | 12 #include "src/factory.h" |
13 #include "src/i18n.h" | 13 #include "src/i18n.h" |
14 #include "src/isolate-inl.h" | 14 #include "src/isolate-inl.h" |
15 #include "src/messages.h" | 15 #include "src/messages.h" |
16 | 16 |
17 #include "unicode/brkiter.h" | 17 #include "unicode/brkiter.h" |
18 #include "unicode/calendar.h" | 18 #include "unicode/calendar.h" |
19 #include "unicode/coll.h" | 19 #include "unicode/coll.h" |
20 #include "unicode/curramt.h" | 20 #include "unicode/curramt.h" |
21 #include "unicode/datefmt.h" | 21 #include "unicode/datefmt.h" |
22 #include "unicode/dcfmtsym.h" | 22 #include "unicode/dcfmtsym.h" |
23 #include "unicode/decimfmt.h" | 23 #include "unicode/decimfmt.h" |
24 #include "unicode/dtfmtsym.h" | 24 #include "unicode/dtfmtsym.h" |
25 #include "unicode/dtptngen.h" | 25 #include "unicode/dtptngen.h" |
26 #include "unicode/locid.h" | 26 #include "unicode/locid.h" |
| 27 #include "unicode/normalizer2.h" |
27 #include "unicode/numfmt.h" | 28 #include "unicode/numfmt.h" |
28 #include "unicode/numsys.h" | 29 #include "unicode/numsys.h" |
29 #include "unicode/rbbi.h" | 30 #include "unicode/rbbi.h" |
30 #include "unicode/smpdtfmt.h" | 31 #include "unicode/smpdtfmt.h" |
31 #include "unicode/timezone.h" | 32 #include "unicode/timezone.h" |
32 #include "unicode/translit.h" | 33 #include "unicode/translit.h" |
33 #include "unicode/uchar.h" | 34 #include "unicode/uchar.h" |
34 #include "unicode/ucol.h" | 35 #include "unicode/ucol.h" |
35 #include "unicode/ucurr.h" | 36 #include "unicode/ucurr.h" |
36 #include "unicode/uloc.h" | 37 #include "unicode/uloc.h" |
37 #include "unicode/unistr.h" | 38 #include "unicode/unistr.h" |
38 #include "unicode/unum.h" | 39 #include "unicode/unum.h" |
39 #include "unicode/uversion.h" | 40 #include "unicode/uversion.h" |
40 | 41 |
41 | 42 |
42 namespace v8 { | 43 namespace v8 { |
43 namespace internal { | 44 namespace internal { |
| 45 namespace { |
| 46 |
| 47 const UChar* GetUCharBufferFromFlat(const String::FlatContent& flat, |
| 48 base::SmartArrayPointer<uc16>* dest, |
| 49 int32_t length) { |
| 50 DCHECK(flat.IsFlat()); |
| 51 if (flat.IsOneByte()) { |
| 52 if (dest->is_empty()) { |
| 53 dest->Reset(NewArray<uc16>(length)); |
| 54 CopyChars(dest->get(), flat.ToOneByteVector().start(), length); |
| 55 } |
| 56 return reinterpret_cast<const UChar*>(dest->get()); |
| 57 } else { |
| 58 return reinterpret_cast<const UChar*>(flat.ToUC16Vector().start()); |
| 59 } |
| 60 } |
| 61 |
| 62 } // namespace |
44 | 63 |
45 RUNTIME_FUNCTION(Runtime_CanonicalizeLanguageTag) { | 64 RUNTIME_FUNCTION(Runtime_CanonicalizeLanguageTag) { |
46 HandleScope scope(isolate); | 65 HandleScope scope(isolate); |
47 Factory* factory = isolate->factory(); | 66 Factory* factory = isolate->factory(); |
48 | 67 |
49 DCHECK(args.length() == 1); | 68 DCHECK(args.length() == 1); |
50 CONVERT_ARG_HANDLE_CHECKED(String, locale_id_str, 0); | 69 CONVERT_ARG_HANDLE_CHECKED(String, locale_id_str, 0); |
51 | 70 |
52 v8::String::Utf8Value locale_id(v8::Utils::ToLocal(locale_id_str)); | 71 v8::String::Utf8Value locale_id(v8::Utils::ToLocal(locale_id_str)); |
53 | 72 |
(...skipping 496 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
550 | 569 |
551 DCHECK(args.length() == 3); | 570 DCHECK(args.length() == 3); |
552 | 571 |
553 CONVERT_ARG_HANDLE_CHECKED(JSObject, collator_holder, 0); | 572 CONVERT_ARG_HANDLE_CHECKED(JSObject, collator_holder, 0); |
554 CONVERT_ARG_HANDLE_CHECKED(String, string1, 1); | 573 CONVERT_ARG_HANDLE_CHECKED(String, string1, 1); |
555 CONVERT_ARG_HANDLE_CHECKED(String, string2, 2); | 574 CONVERT_ARG_HANDLE_CHECKED(String, string2, 2); |
556 | 575 |
557 icu::Collator* collator = Collator::UnpackCollator(isolate, collator_holder); | 576 icu::Collator* collator = Collator::UnpackCollator(isolate, collator_holder); |
558 if (!collator) return isolate->ThrowIllegalOperation(); | 577 if (!collator) return isolate->ThrowIllegalOperation(); |
559 | 578 |
560 v8::String::Value string_value1(v8::Utils::ToLocal(string1)); | 579 string1 = String::Flatten(string1); |
561 v8::String::Value string_value2(v8::Utils::ToLocal(string2)); | 580 string2 = String::Flatten(string2); |
562 const UChar* u_string1 = reinterpret_cast<const UChar*>(*string_value1); | 581 DisallowHeapAllocation no_gc; |
563 const UChar* u_string2 = reinterpret_cast<const UChar*>(*string_value2); | 582 int32_t length1 = string1->length(); |
| 583 int32_t length2 = string2->length(); |
| 584 String::FlatContent flat1 = string1->GetFlatContent(); |
| 585 String::FlatContent flat2 = string2->GetFlatContent(); |
| 586 base::SmartArrayPointer<uc16> sap1; |
| 587 base::SmartArrayPointer<uc16> sap2; |
| 588 const UChar* string_val1 = GetUCharBufferFromFlat(flat1, &sap1, length1); |
| 589 const UChar* string_val2 = GetUCharBufferFromFlat(flat2, &sap2, length2); |
564 UErrorCode status = U_ZERO_ERROR; | 590 UErrorCode status = U_ZERO_ERROR; |
565 UCollationResult result = | 591 UCollationResult result = |
566 collator->compare(u_string1, string_value1.length(), u_string2, | 592 collator->compare(string_val1, length1, string_val2, length2, status); |
567 string_value2.length(), status); | |
568 if (U_FAILURE(status)) return isolate->ThrowIllegalOperation(); | 593 if (U_FAILURE(status)) return isolate->ThrowIllegalOperation(); |
569 | 594 |
570 return *isolate->factory()->NewNumberFromInt(result); | 595 return *isolate->factory()->NewNumberFromInt(result); |
571 } | 596 } |
572 | 597 |
573 | 598 |
574 RUNTIME_FUNCTION(Runtime_StringNormalize) { | 599 RUNTIME_FUNCTION(Runtime_StringNormalize) { |
575 HandleScope scope(isolate); | 600 HandleScope scope(isolate); |
576 static const UNormalizationMode normalizationForms[] = { | 601 static const struct { |
577 UNORM_NFC, UNORM_NFD, UNORM_NFKC, UNORM_NFKD}; | 602 const char* name; |
| 603 UNormalization2Mode mode; |
| 604 } normalizationForms[] = { |
| 605 {"nfc", UNORM2_COMPOSE}, |
| 606 {"nfc", UNORM2_DECOMPOSE}, |
| 607 {"nfkc", UNORM2_COMPOSE}, |
| 608 {"nfkc", UNORM2_DECOMPOSE}, |
| 609 }; |
578 | 610 |
579 DCHECK(args.length() == 2); | 611 DCHECK(args.length() == 2); |
580 | 612 |
581 CONVERT_ARG_HANDLE_CHECKED(String, stringValue, 0); | 613 CONVERT_ARG_HANDLE_CHECKED(String, s, 0); |
582 CONVERT_NUMBER_CHECKED(int, form_id, Int32, args[1]); | 614 CONVERT_NUMBER_CHECKED(int, form_id, Int32, args[1]); |
583 RUNTIME_ASSERT(form_id >= 0 && | 615 RUNTIME_ASSERT(form_id >= 0 && |
584 static_cast<size_t>(form_id) < arraysize(normalizationForms)); | 616 static_cast<size_t>(form_id) < arraysize(normalizationForms)); |
585 | 617 |
586 v8::String::Value string_value(v8::Utils::ToLocal(stringValue)); | 618 int length = s->length(); |
587 const UChar* u_value = reinterpret_cast<const UChar*>(*string_value); | 619 s = String::Flatten(s); |
| 620 icu::UnicodeString result; |
| 621 base::SmartArrayPointer<uc16> sap; |
| 622 UErrorCode status = U_ZERO_ERROR; |
| 623 { |
| 624 DisallowHeapAllocation no_gc; |
| 625 String::FlatContent flat = s->GetFlatContent(); |
| 626 const UChar* src = GetUCharBufferFromFlat(flat, &sap, length); |
| 627 icu::UnicodeString input(false, src, length); |
| 628 // Getting a singleton. Should not free it. |
| 629 const icu::Normalizer2* normalizer = |
| 630 icu::Normalizer2::getInstance(nullptr, normalizationForms[form_id].name, |
| 631 normalizationForms[form_id].mode, status); |
| 632 DCHECK(U_SUCCESS(status)); |
| 633 RUNTIME_ASSERT(normalizer != nullptr); |
| 634 int32_t normalized_prefix_length = |
| 635 normalizer->spanQuickCheckYes(input, status); |
| 636 // Quick return if the input is already normalized. |
| 637 if (length == normalized_prefix_length) return *s; |
| 638 icu::UnicodeString unnormalized = |
| 639 input.tempSubString(normalized_prefix_length); |
| 640 // Read-only alias of the normalized prefix. |
| 641 result.setTo(false, input.getBuffer(), normalized_prefix_length); |
| 642 // copy-on-write; normalize the suffix and append to |result|. |
| 643 normalizer->normalizeSecondAndAppend(result, unnormalized, status); |
| 644 } |
588 | 645 |
589 // TODO(mnita): check Normalizer2 (not available in ICU 46) | |
590 UErrorCode status = U_ZERO_ERROR; | |
591 icu::UnicodeString input(false, u_value, string_value.length()); | |
592 icu::UnicodeString result; | |
593 icu::Normalizer::normalize(input, normalizationForms[form_id], 0, result, | |
594 status); | |
595 if (U_FAILURE(status)) { | 646 if (U_FAILURE(status)) { |
596 return isolate->heap()->undefined_value(); | 647 return isolate->heap()->undefined_value(); |
597 } | 648 } |
598 | 649 |
599 Handle<String> result_str; | 650 Handle<String> result_str; |
600 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( | 651 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
601 isolate, result_str, | 652 isolate, result_str, |
602 isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>( | 653 isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>( |
603 reinterpret_cast<const uint16_t*>(result.getBuffer()), | 654 reinterpret_cast<const uint16_t*>(result.getBuffer()), |
604 result.length()))); | 655 result.length()))); |
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
658 CONVERT_ARG_HANDLE_CHECKED(String, text, 1); | 709 CONVERT_ARG_HANDLE_CHECKED(String, text, 1); |
659 | 710 |
660 icu::BreakIterator* break_iterator = | 711 icu::BreakIterator* break_iterator = |
661 BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder); | 712 BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder); |
662 if (!break_iterator) return isolate->ThrowIllegalOperation(); | 713 if (!break_iterator) return isolate->ThrowIllegalOperation(); |
663 | 714 |
664 icu::UnicodeString* u_text = reinterpret_cast<icu::UnicodeString*>( | 715 icu::UnicodeString* u_text = reinterpret_cast<icu::UnicodeString*>( |
665 break_iterator_holder->GetInternalField(1)); | 716 break_iterator_holder->GetInternalField(1)); |
666 delete u_text; | 717 delete u_text; |
667 | 718 |
668 v8::String::Value text_value(v8::Utils::ToLocal(text)); | 719 int length = text->length(); |
669 u_text = new icu::UnicodeString(reinterpret_cast<const UChar*>(*text_value), | 720 text = String::Flatten(text); |
670 text_value.length()); | 721 DisallowHeapAllocation no_gc; |
| 722 String::FlatContent flat = text->GetFlatContent(); |
| 723 base::SmartArrayPointer<uc16> sap; |
| 724 const UChar* text_value = GetUCharBufferFromFlat(flat, &sap, length); |
| 725 u_text = new icu::UnicodeString(text_value, length); |
671 break_iterator_holder->SetInternalField(1, reinterpret_cast<Smi*>(u_text)); | 726 break_iterator_holder->SetInternalField(1, reinterpret_cast<Smi*>(u_text)); |
672 | 727 |
673 break_iterator->setText(*u_text); | 728 break_iterator->setText(*u_text); |
674 | 729 |
675 return isolate->heap()->undefined_value(); | 730 return isolate->heap()->undefined_value(); |
676 } | 731 } |
677 | 732 |
678 | 733 |
679 RUNTIME_FUNCTION(Runtime_BreakIteratorFirst) { | 734 RUNTIME_FUNCTION(Runtime_BreakIteratorFirst) { |
680 HandleScope scope(isolate); | 735 HandleScope scope(isolate); |
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
757 const char* transliterator_id) { | 812 const char* transliterator_id) { |
758 UErrorCode status = U_ZERO_ERROR; | 813 UErrorCode status = U_ZERO_ERROR; |
759 base::SmartPointer<icu::Transliterator> translit( | 814 base::SmartPointer<icu::Transliterator> translit( |
760 icu::Transliterator::createInstance( | 815 icu::Transliterator::createInstance( |
761 icu::UnicodeString(transliterator_id, -1, US_INV), UTRANS_FORWARD, | 816 icu::UnicodeString(transliterator_id, -1, US_INV), UTRANS_FORWARD, |
762 status)); | 817 status)); |
763 if (U_FAILURE(status)) return; | 818 if (U_FAILURE(status)) return; |
764 translit->transliterate(*input); | 819 translit->transliterate(*input); |
765 } | 820 } |
766 | 821 |
767 const UChar* GetUCharBufferFromFlat(const String::FlatContent& flat, | |
768 base::SmartArrayPointer<uc16>* dest, | |
769 int32_t length) { | |
770 DCHECK(flat.IsFlat()); | |
771 if (flat.IsOneByte()) { | |
772 if (dest->is_empty()) { | |
773 dest->Reset(NewArray<uc16>(length)); | |
774 CopyChars(dest->get(), flat.ToOneByteVector().start(), length); | |
775 } | |
776 return reinterpret_cast<const UChar*>(dest->get()); | |
777 } else { | |
778 return reinterpret_cast<const UChar*>(flat.ToUC16Vector().start()); | |
779 } | |
780 } | |
781 | |
782 MUST_USE_RESULT Object* LocaleConvertCase(Handle<String> s, Isolate* isolate, | 822 MUST_USE_RESULT Object* LocaleConvertCase(Handle<String> s, Isolate* isolate, |
783 bool is_to_upper, const char* lang) { | 823 bool is_to_upper, const char* lang) { |
784 int32_t src_length = s->length(); | 824 int32_t src_length = s->length(); |
785 | 825 |
786 // Greek uppercasing has to be done via transliteration. | 826 // Greek uppercasing has to be done via transliteration. |
787 // TODO(jshin): Drop this special-casing once ICU's regular case conversion | 827 // TODO(jshin): Drop this special-casing once ICU's regular case conversion |
788 // API supports Greek uppercasing. See | 828 // API supports Greek uppercasing. See |
789 // http://bugs.icu-project.org/trac/ticket/10582 . | 829 // http://bugs.icu-project.org/trac/ticket/10582 . |
790 // In the meantime, if there's no Greek character in |s|, call this | 830 // In the meantime, if there's no Greek character in |s|, call this |
791 // function again with the root locale (lang=""). | 831 // function again with the root locale (lang=""). |
(...skipping 310 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1102 // mapping of ASCII range characters are different in those locales. | 1142 // mapping of ASCII range characters are different in those locales. |
1103 // Greek (el) does not require any adjustment, though. | 1143 // Greek (el) does not require any adjustment, though. |
1104 return LocaleConvertCase(s, isolate, is_upper, | 1144 return LocaleConvertCase(s, isolate, is_upper, |
1105 reinterpret_cast<const char*>(lang_str)); | 1145 reinterpret_cast<const char*>(lang_str)); |
1106 } | 1146 } |
1107 | 1147 |
1108 } // namespace internal | 1148 } // namespace internal |
1109 } // namespace v8 | 1149 } // namespace v8 |
1110 | 1150 |
1111 #endif // V8_I18N_SUPPORT | 1151 #endif // V8_I18N_SUPPORT |
OLD | NEW |