Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(428)

Side by Side Diff: src/runtime/runtime-i18n.cc

Issue 1971943002: Make normalize, collator:compare and breakiterator a bit more efficient (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: use RUNTIME_ASSERT Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2014 the V8 project authors. All rights reserved. 1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 5
6 #ifdef V8_I18N_SUPPORT 6 #ifdef V8_I18N_SUPPORT
7 #include "src/runtime/runtime-utils.h" 7 #include "src/runtime/runtime-utils.h"
8 8
9 #include "src/api.h" 9 #include "src/api.h"
10 #include "src/api-natives.h" 10 #include "src/api-natives.h"
11 #include "src/arguments.h" 11 #include "src/arguments.h"
12 #include "src/factory.h" 12 #include "src/factory.h"
13 #include "src/i18n.h" 13 #include "src/i18n.h"
14 #include "src/isolate-inl.h" 14 #include "src/isolate-inl.h"
15 #include "src/messages.h" 15 #include "src/messages.h"
16 16
17 #include "unicode/brkiter.h" 17 #include "unicode/brkiter.h"
18 #include "unicode/calendar.h" 18 #include "unicode/calendar.h"
19 #include "unicode/coll.h" 19 #include "unicode/coll.h"
20 #include "unicode/curramt.h" 20 #include "unicode/curramt.h"
21 #include "unicode/datefmt.h" 21 #include "unicode/datefmt.h"
22 #include "unicode/dcfmtsym.h" 22 #include "unicode/dcfmtsym.h"
23 #include "unicode/decimfmt.h" 23 #include "unicode/decimfmt.h"
24 #include "unicode/dtfmtsym.h" 24 #include "unicode/dtfmtsym.h"
25 #include "unicode/dtptngen.h" 25 #include "unicode/dtptngen.h"
26 #include "unicode/locid.h" 26 #include "unicode/locid.h"
27 #include "unicode/normalizer2.h"
27 #include "unicode/numfmt.h" 28 #include "unicode/numfmt.h"
28 #include "unicode/numsys.h" 29 #include "unicode/numsys.h"
29 #include "unicode/rbbi.h" 30 #include "unicode/rbbi.h"
30 #include "unicode/smpdtfmt.h" 31 #include "unicode/smpdtfmt.h"
31 #include "unicode/timezone.h" 32 #include "unicode/timezone.h"
32 #include "unicode/translit.h" 33 #include "unicode/translit.h"
33 #include "unicode/uchar.h" 34 #include "unicode/uchar.h"
34 #include "unicode/ucol.h" 35 #include "unicode/ucol.h"
35 #include "unicode/ucurr.h" 36 #include "unicode/ucurr.h"
36 #include "unicode/uloc.h" 37 #include "unicode/uloc.h"
37 #include "unicode/unistr.h" 38 #include "unicode/unistr.h"
38 #include "unicode/unum.h" 39 #include "unicode/unum.h"
39 #include "unicode/uversion.h" 40 #include "unicode/uversion.h"
40 41
41 42
42 namespace v8 { 43 namespace v8 {
43 namespace internal { 44 namespace internal {
45 namespace {
46
47 const UChar* GetUCharBufferFromFlat(const String::FlatContent& flat,
48 base::SmartArrayPointer<uc16>* dest,
49 int32_t length) {
50 DCHECK(flat.IsFlat());
51 if (flat.IsOneByte()) {
52 if (dest->is_empty()) {
53 dest->Reset(NewArray<uc16>(length));
54 CopyChars(dest->get(), flat.ToOneByteVector().start(), length);
55 }
56 return reinterpret_cast<const UChar*>(dest->get());
57 } else {
58 return reinterpret_cast<const UChar*>(flat.ToUC16Vector().start());
59 }
60 }
61
62 } // namespace
44 63
45 RUNTIME_FUNCTION(Runtime_CanonicalizeLanguageTag) { 64 RUNTIME_FUNCTION(Runtime_CanonicalizeLanguageTag) {
46 HandleScope scope(isolate); 65 HandleScope scope(isolate);
47 Factory* factory = isolate->factory(); 66 Factory* factory = isolate->factory();
48 67
49 DCHECK(args.length() == 1); 68 DCHECK(args.length() == 1);
50 CONVERT_ARG_HANDLE_CHECKED(String, locale_id_str, 0); 69 CONVERT_ARG_HANDLE_CHECKED(String, locale_id_str, 0);
51 70
52 v8::String::Utf8Value locale_id(v8::Utils::ToLocal(locale_id_str)); 71 v8::String::Utf8Value locale_id(v8::Utils::ToLocal(locale_id_str));
53 72
(...skipping 496 matching lines...) Expand 10 before | Expand all | Expand 10 after
550 569
551 DCHECK(args.length() == 3); 570 DCHECK(args.length() == 3);
552 571
553 CONVERT_ARG_HANDLE_CHECKED(JSObject, collator_holder, 0); 572 CONVERT_ARG_HANDLE_CHECKED(JSObject, collator_holder, 0);
554 CONVERT_ARG_HANDLE_CHECKED(String, string1, 1); 573 CONVERT_ARG_HANDLE_CHECKED(String, string1, 1);
555 CONVERT_ARG_HANDLE_CHECKED(String, string2, 2); 574 CONVERT_ARG_HANDLE_CHECKED(String, string2, 2);
556 575
557 icu::Collator* collator = Collator::UnpackCollator(isolate, collator_holder); 576 icu::Collator* collator = Collator::UnpackCollator(isolate, collator_holder);
558 if (!collator) return isolate->ThrowIllegalOperation(); 577 if (!collator) return isolate->ThrowIllegalOperation();
559 578
560 v8::String::Value string_value1(v8::Utils::ToLocal(string1)); 579 string1 = String::Flatten(string1);
561 v8::String::Value string_value2(v8::Utils::ToLocal(string2)); 580 string2 = String::Flatten(string2);
562 const UChar* u_string1 = reinterpret_cast<const UChar*>(*string_value1); 581 DisallowHeapAllocation no_gc;
563 const UChar* u_string2 = reinterpret_cast<const UChar*>(*string_value2); 582 int32_t length1 = string1->length();
583 int32_t length2 = string2->length();
584 String::FlatContent flat1 = string1->GetFlatContent();
585 String::FlatContent flat2 = string2->GetFlatContent();
586 base::SmartArrayPointer<uc16> sap1;
587 base::SmartArrayPointer<uc16> sap2;
588 const UChar* string_val1 = GetUCharBufferFromFlat(flat1, &sap1, length1);
589 const UChar* string_val2 = GetUCharBufferFromFlat(flat2, &sap2, length2);
564 UErrorCode status = U_ZERO_ERROR; 590 UErrorCode status = U_ZERO_ERROR;
565 UCollationResult result = 591 UCollationResult result =
566 collator->compare(u_string1, string_value1.length(), u_string2, 592 collator->compare(string_val1, length1, string_val2, length2, status);
567 string_value2.length(), status);
568 if (U_FAILURE(status)) return isolate->ThrowIllegalOperation(); 593 if (U_FAILURE(status)) return isolate->ThrowIllegalOperation();
569 594
570 return *isolate->factory()->NewNumberFromInt(result); 595 return *isolate->factory()->NewNumberFromInt(result);
571 } 596 }
572 597
573 598
574 RUNTIME_FUNCTION(Runtime_StringNormalize) { 599 RUNTIME_FUNCTION(Runtime_StringNormalize) {
575 HandleScope scope(isolate); 600 HandleScope scope(isolate);
576 static const UNormalizationMode normalizationForms[] = { 601 static const struct {
577 UNORM_NFC, UNORM_NFD, UNORM_NFKC, UNORM_NFKD}; 602 const char* name;
603 UNormalization2Mode mode;
604 } normalizationForms[] = {
605 {"nfc", UNORM2_COMPOSE},
606 {"nfc", UNORM2_DECOMPOSE},
607 {"nfkc", UNORM2_COMPOSE},
608 {"nfkc", UNORM2_DECOMPOSE},
609 };
578 610
579 DCHECK(args.length() == 2); 611 DCHECK(args.length() == 2);
580 612
581 CONVERT_ARG_HANDLE_CHECKED(String, stringValue, 0); 613 CONVERT_ARG_HANDLE_CHECKED(String, s, 0);
582 CONVERT_NUMBER_CHECKED(int, form_id, Int32, args[1]); 614 CONVERT_NUMBER_CHECKED(int, form_id, Int32, args[1]);
583 RUNTIME_ASSERT(form_id >= 0 && 615 RUNTIME_ASSERT(form_id >= 0 &&
584 static_cast<size_t>(form_id) < arraysize(normalizationForms)); 616 static_cast<size_t>(form_id) < arraysize(normalizationForms));
585 617
586 v8::String::Value string_value(v8::Utils::ToLocal(stringValue)); 618 int length = s->length();
587 const UChar* u_value = reinterpret_cast<const UChar*>(*string_value); 619 s = String::Flatten(s);
620 icu::UnicodeString result;
621 base::SmartArrayPointer<uc16> sap;
622 UErrorCode status = U_ZERO_ERROR;
623 {
624 DisallowHeapAllocation no_gc;
625 String::FlatContent flat = s->GetFlatContent();
626 const UChar* src = GetUCharBufferFromFlat(flat, &sap, length);
627 icu::UnicodeString input(false, src, length);
628 // Getting a singleton. Should not free it.
629 const icu::Normalizer2* normalizer =
630 icu::Normalizer2::getInstance(nullptr, normalizationForms[form_id].name,
631 normalizationForms[form_id].mode, status);
632 DCHECK(U_SUCCESS(status));
633 RUNTIME_ASSERT(normalizer != nullptr);
634 int32_t normalized_prefix_length =
635 normalizer->spanQuickCheckYes(input, status);
636 // Quick return if the input is already normalized.
637 if (length == normalized_prefix_length) return *s;
638 icu::UnicodeString unnormalized =
639 input.tempSubString(normalized_prefix_length);
640 // Read-only alias of the normalized prefix.
641 result.setTo(false, input.getBuffer(), normalized_prefix_length);
642 // copy-on-write; normalize the suffix and append to |result|.
643 normalizer->normalizeSecondAndAppend(result, unnormalized, status);
644 }
588 645
589 // TODO(mnita): check Normalizer2 (not available in ICU 46)
590 UErrorCode status = U_ZERO_ERROR;
591 icu::UnicodeString input(false, u_value, string_value.length());
592 icu::UnicodeString result;
593 icu::Normalizer::normalize(input, normalizationForms[form_id], 0, result,
594 status);
595 if (U_FAILURE(status)) { 646 if (U_FAILURE(status)) {
596 return isolate->heap()->undefined_value(); 647 return isolate->heap()->undefined_value();
597 } 648 }
598 649
599 Handle<String> result_str; 650 Handle<String> result_str;
600 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 651 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
601 isolate, result_str, 652 isolate, result_str,
602 isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>( 653 isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>(
603 reinterpret_cast<const uint16_t*>(result.getBuffer()), 654 reinterpret_cast<const uint16_t*>(result.getBuffer()),
604 result.length()))); 655 result.length())));
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after
658 CONVERT_ARG_HANDLE_CHECKED(String, text, 1); 709 CONVERT_ARG_HANDLE_CHECKED(String, text, 1);
659 710
660 icu::BreakIterator* break_iterator = 711 icu::BreakIterator* break_iterator =
661 BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder); 712 BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder);
662 if (!break_iterator) return isolate->ThrowIllegalOperation(); 713 if (!break_iterator) return isolate->ThrowIllegalOperation();
663 714
664 icu::UnicodeString* u_text = reinterpret_cast<icu::UnicodeString*>( 715 icu::UnicodeString* u_text = reinterpret_cast<icu::UnicodeString*>(
665 break_iterator_holder->GetInternalField(1)); 716 break_iterator_holder->GetInternalField(1));
666 delete u_text; 717 delete u_text;
667 718
668 v8::String::Value text_value(v8::Utils::ToLocal(text)); 719 int length = text->length();
669 u_text = new icu::UnicodeString(reinterpret_cast<const UChar*>(*text_value), 720 text = String::Flatten(text);
670 text_value.length()); 721 DisallowHeapAllocation no_gc;
722 String::FlatContent flat = text->GetFlatContent();
723 base::SmartArrayPointer<uc16> sap;
724 const UChar* text_value = GetUCharBufferFromFlat(flat, &sap, length);
725 u_text = new icu::UnicodeString(text_value, length);
671 break_iterator_holder->SetInternalField(1, reinterpret_cast<Smi*>(u_text)); 726 break_iterator_holder->SetInternalField(1, reinterpret_cast<Smi*>(u_text));
672 727
673 break_iterator->setText(*u_text); 728 break_iterator->setText(*u_text);
674 729
675 return isolate->heap()->undefined_value(); 730 return isolate->heap()->undefined_value();
676 } 731 }
677 732
678 733
679 RUNTIME_FUNCTION(Runtime_BreakIteratorFirst) { 734 RUNTIME_FUNCTION(Runtime_BreakIteratorFirst) {
680 HandleScope scope(isolate); 735 HandleScope scope(isolate);
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after
757 const char* transliterator_id) { 812 const char* transliterator_id) {
758 UErrorCode status = U_ZERO_ERROR; 813 UErrorCode status = U_ZERO_ERROR;
759 base::SmartPointer<icu::Transliterator> translit( 814 base::SmartPointer<icu::Transliterator> translit(
760 icu::Transliterator::createInstance( 815 icu::Transliterator::createInstance(
761 icu::UnicodeString(transliterator_id, -1, US_INV), UTRANS_FORWARD, 816 icu::UnicodeString(transliterator_id, -1, US_INV), UTRANS_FORWARD,
762 status)); 817 status));
763 if (U_FAILURE(status)) return; 818 if (U_FAILURE(status)) return;
764 translit->transliterate(*input); 819 translit->transliterate(*input);
765 } 820 }
766 821
767 const UChar* GetUCharBufferFromFlat(const String::FlatContent& flat,
768 base::SmartArrayPointer<uc16>* dest,
769 int32_t length) {
770 DCHECK(flat.IsFlat());
771 if (flat.IsOneByte()) {
772 if (dest->is_empty()) {
773 dest->Reset(NewArray<uc16>(length));
774 CopyChars(dest->get(), flat.ToOneByteVector().start(), length);
775 }
776 return reinterpret_cast<const UChar*>(dest->get());
777 } else {
778 return reinterpret_cast<const UChar*>(flat.ToUC16Vector().start());
779 }
780 }
781
782 MUST_USE_RESULT Object* LocaleConvertCase(Handle<String> s, Isolate* isolate, 822 MUST_USE_RESULT Object* LocaleConvertCase(Handle<String> s, Isolate* isolate,
783 bool is_to_upper, const char* lang) { 823 bool is_to_upper, const char* lang) {
784 int32_t src_length = s->length(); 824 int32_t src_length = s->length();
785 825
786 // Greek uppercasing has to be done via transliteration. 826 // Greek uppercasing has to be done via transliteration.
787 // TODO(jshin): Drop this special-casing once ICU's regular case conversion 827 // TODO(jshin): Drop this special-casing once ICU's regular case conversion
788 // API supports Greek uppercasing. See 828 // API supports Greek uppercasing. See
789 // http://bugs.icu-project.org/trac/ticket/10582 . 829 // http://bugs.icu-project.org/trac/ticket/10582 .
790 // In the meantime, if there's no Greek character in |s|, call this 830 // In the meantime, if there's no Greek character in |s|, call this
791 // function again with the root locale (lang=""). 831 // function again with the root locale (lang="").
(...skipping 310 matching lines...) Expand 10 before | Expand all | Expand 10 after
1102 // mapping of ASCII range characters are different in those locales. 1142 // mapping of ASCII range characters are different in those locales.
1103 // Greek (el) does not require any adjustment, though. 1143 // Greek (el) does not require any adjustment, though.
1104 return LocaleConvertCase(s, isolate, is_upper, 1144 return LocaleConvertCase(s, isolate, is_upper,
1105 reinterpret_cast<const char*>(lang_str)); 1145 reinterpret_cast<const char*>(lang_str));
1106 } 1146 }
1107 1147
1108 } // namespace internal 1148 } // namespace internal
1109 } // namespace v8 1149 } // namespace v8
1110 1150
1111 #endif // V8_I18N_SUPPORT 1151 #endif // V8_I18N_SUPPORT
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698