Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(247)

Side by Side Diff: src/regexp/jsregexp.cc

Issue 2514333002: [regexp] use C++ API for unicode set over C API. (Closed)
Patch Set: addressed comment Created 4 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | src/regexp/regexp-parser.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/regexp/jsregexp.h" 5 #include "src/regexp/jsregexp.h"
6 6
7 #include <memory> 7 #include <memory>
8 8
9 #include "src/base/platform/platform.h" 9 #include "src/base/platform/platform.h"
10 #include "src/compilation-cache.h" 10 #include "src/compilation-cache.h"
11 #include "src/elements.h" 11 #include "src/elements.h"
12 #include "src/execution.h" 12 #include "src/execution.h"
13 #include "src/factory.h" 13 #include "src/factory.h"
14 #include "src/isolate-inl.h" 14 #include "src/isolate-inl.h"
15 #include "src/messages.h" 15 #include "src/messages.h"
16 #include "src/ostreams.h" 16 #include "src/ostreams.h"
17 #include "src/regexp/interpreter-irregexp.h" 17 #include "src/regexp/interpreter-irregexp.h"
18 #include "src/regexp/jsregexp-inl.h" 18 #include "src/regexp/jsregexp-inl.h"
19 #include "src/regexp/regexp-macro-assembler-irregexp.h" 19 #include "src/regexp/regexp-macro-assembler-irregexp.h"
20 #include "src/regexp/regexp-macro-assembler-tracer.h" 20 #include "src/regexp/regexp-macro-assembler-tracer.h"
21 #include "src/regexp/regexp-macro-assembler.h" 21 #include "src/regexp/regexp-macro-assembler.h"
22 #include "src/regexp/regexp-parser.h" 22 #include "src/regexp/regexp-parser.h"
23 #include "src/regexp/regexp-stack.h" 23 #include "src/regexp/regexp-stack.h"
24 #include "src/runtime/runtime.h" 24 #include "src/runtime/runtime.h"
25 #include "src/splay-tree-inl.h" 25 #include "src/splay-tree-inl.h"
26 #include "src/string-search.h" 26 #include "src/string-search.h"
27 #include "src/unicode-decoder.h" 27 #include "src/unicode-decoder.h"
28 28
29 #ifdef V8_I18N_SUPPORT 29 #ifdef V8_I18N_SUPPORT
30 #include "unicode/uset.h" 30 #include "unicode/uniset.h"
31 #include "unicode/utypes.h" 31 #include "unicode/utypes.h"
32 #endif // V8_I18N_SUPPORT 32 #endif // V8_I18N_SUPPORT
33 33
34 #ifndef V8_INTERPRETED_REGEXP 34 #ifndef V8_INTERPRETED_REGEXP
35 #if V8_TARGET_ARCH_IA32 35 #if V8_TARGET_ARCH_IA32
36 #include "src/regexp/ia32/regexp-macro-assembler-ia32.h" 36 #include "src/regexp/ia32/regexp-macro-assembler-ia32.h"
37 #elif V8_TARGET_ARCH_X64 37 #elif V8_TARGET_ARCH_X64
38 #include "src/regexp/x64/regexp-macro-assembler-x64.h" 38 #include "src/regexp/x64/regexp-macro-assembler-x64.h"
39 #elif V8_TARGET_ARCH_ARM64 39 #elif V8_TARGET_ARCH_ARM64
40 #include "src/regexp/arm64/regexp-macro-assembler-arm64.h" 40 #include "src/regexp/arm64/regexp-macro-assembler-arm64.h"
(...skipping 5066 matching lines...) Expand 10 before | Expand all | Expand 10 after
5107 return TextNode::CreateForCharacterRanges(zone, range, false, on_success); 5107 return TextNode::CreateForCharacterRanges(zone, range, false, on_success);
5108 } 5108 }
5109 5109
5110 5110
5111 void AddUnicodeCaseEquivalents(RegExpCompiler* compiler, 5111 void AddUnicodeCaseEquivalents(RegExpCompiler* compiler,
5112 ZoneList<CharacterRange>* ranges) { 5112 ZoneList<CharacterRange>* ranges) {
5113 #ifdef V8_I18N_SUPPORT 5113 #ifdef V8_I18N_SUPPORT
5114 // Use ICU to compute the case fold closure over the ranges. 5114 // Use ICU to compute the case fold closure over the ranges.
5115 DCHECK(compiler->unicode()); 5115 DCHECK(compiler->unicode());
5116 DCHECK(compiler->ignore_case()); 5116 DCHECK(compiler->ignore_case());
5117 USet* set = uset_openEmpty(); 5117 icu::UnicodeSet set;
5118 for (int i = 0; i < ranges->length(); i++) { 5118 for (int i = 0; i < ranges->length(); i++) {
5119 uset_addRange(set, ranges->at(i).from(), ranges->at(i).to()); 5119 set.add(ranges->at(i).from(), ranges->at(i).to());
5120 } 5120 }
5121 ranges->Clear(); 5121 ranges->Clear();
5122 uset_closeOver(set, USET_CASE_INSENSITIVE); 5122 set.closeOver(USET_CASE_INSENSITIVE);
5123 // Full case mapping map single characters to multiple characters. 5123 // Full case mapping map single characters to multiple characters.
5124 // Those are represented as strings in the set. Remove them so that 5124 // Those are represented as strings in the set. Remove them so that
5125 // we end up with only simple and common case mappings. 5125 // we end up with only simple and common case mappings.
5126 uset_removeAllStrings(set); 5126 set.removeAllStrings();
5127 int item_count = uset_getItemCount(set);
5128 int item_result = 0;
5129 UErrorCode ec = U_ZERO_ERROR;
5130 Zone* zone = compiler->zone(); 5127 Zone* zone = compiler->zone();
5131 for (int i = 0; i < item_count; i++) { 5128 for (int i = 0; i < set.getRangeCount(); i++) {
5132 uc32 start = 0; 5129 ranges->Add(CharacterRange::Range(set.getRangeStart(i), set.getRangeEnd(i)),
5133 uc32 end = 0; 5130 zone);
5134 item_result += uset_getItem(set, i, &start, &end, nullptr, 0, &ec);
5135 ranges->Add(CharacterRange::Range(start, end), zone);
5136 } 5131 }
5137 // No errors and everything we collected have been ranges. 5132 // No errors and everything we collected have been ranges.
5138 DCHECK_EQ(U_ZERO_ERROR, ec);
5139 DCHECK_EQ(0, item_result);
5140 uset_close(set);
5141 #else 5133 #else
5142 // Fallback if ICU is not included. 5134 // Fallback if ICU is not included.
5143 CharacterRange::AddCaseEquivalents(compiler->isolate(), compiler->zone(), 5135 CharacterRange::AddCaseEquivalents(compiler->isolate(), compiler->zone(),
5144 ranges, compiler->one_byte()); 5136 ranges, compiler->one_byte());
5145 #endif // V8_I18N_SUPPORT 5137 #endif // V8_I18N_SUPPORT
5146 CharacterRange::Canonicalize(ranges); 5138 CharacterRange::Canonicalize(ranges);
5147 } 5139 }
5148 5140
5149 5141
5150 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, 5142 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
(...skipping 1719 matching lines...) Expand 10 before | Expand all | Expand 10 after
6870 6862
6871 6863
6872 void RegExpResultsCache::Clear(FixedArray* cache) { 6864 void RegExpResultsCache::Clear(FixedArray* cache) {
6873 for (int i = 0; i < kRegExpResultsCacheSize; i++) { 6865 for (int i = 0; i < kRegExpResultsCacheSize; i++) {
6874 cache->set(i, Smi::kZero); 6866 cache->set(i, Smi::kZero);
6875 } 6867 }
6876 } 6868 }
6877 6869
6878 } // namespace internal 6870 } // namespace internal
6879 } // namespace v8 6871 } // namespace v8
OLDNEW
« no previous file with comments | « no previous file | src/regexp/regexp-parser.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698