Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(401)

Side by Side Diff: src/regexp/jsregexp.cc

Issue 1578253005: [regexp] implement character classes for unicode regexps. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: more tests Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/regexp/jsregexp.h ('k') | src/regexp/regexp-ast.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/regexp/jsregexp.h" 5 #include "src/regexp/jsregexp.h"
6 6
7 #include "src/ast/ast.h" 7 #include "src/ast/ast.h"
8 #include "src/base/platform/platform.h" 8 #include "src/base/platform/platform.h"
9 #include "src/compilation-cache.h" 9 #include "src/compilation-cache.h"
10 #include "src/compiler.h" 10 #include "src/compiler.h"
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after
65 Handle<String> error_text) { 65 Handle<String> error_text) {
66 USE(ThrowRegExpException(re, Handle<String>(re->Pattern()), error_text)); 66 USE(ThrowRegExpException(re, Handle<String>(re->Pattern()), error_text));
67 } 67 }
68 68
69 69
70 ContainedInLattice AddRange(ContainedInLattice containment, 70 ContainedInLattice AddRange(ContainedInLattice containment,
71 const int* ranges, 71 const int* ranges,
72 int ranges_length, 72 int ranges_length,
73 Interval new_range) { 73 Interval new_range) {
74 DCHECK((ranges_length & 1) == 1); 74 DCHECK((ranges_length & 1) == 1);
75 DCHECK(ranges[ranges_length - 1] == String::kMaxUtf16CodeUnit + 1); 75 DCHECK(ranges[ranges_length - 1] == String::kMaxCodePoint + 1);
76 if (containment == kLatticeUnknown) return containment; 76 if (containment == kLatticeUnknown) return containment;
77 bool inside = false; 77 bool inside = false;
78 int last = 0; 78 int last = 0;
79 for (int i = 0; i < ranges_length; inside = !inside, last = ranges[i], i++) { 79 for (int i = 0; i < ranges_length; inside = !inside, last = ranges[i], i++) {
80 // Consider the range from last to ranges[i]. 80 // Consider the range from last to ranges[i].
81 // We haven't got to the new range yet. 81 // We haven't got to the new range yet.
82 if (ranges[i] <= new_range.from()) continue; 82 if (ranges[i] <= new_range.from()) continue;
83 // New range is wholly inside last-ranges[i]. Note that new_range.to() is 83 // New range is wholly inside last-ranges[i]. Note that new_range.to() is
84 // inclusive, but the values in ranges are not. 84 // inclusive, but the values in ranges are not.
85 if (last <= new_range.from() && new_range.to() < ranges[i]) { 85 if (last <= new_range.from() && new_range.to() < ranges[i]) {
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after
138 138
139 Handle<Object> result; 139 Handle<Object> result;
140 if (in_cache) { 140 if (in_cache) {
141 re->set_data(*cached); 141 re->set_data(*cached);
142 return re; 142 return re;
143 } 143 }
144 pattern = String::Flatten(pattern); 144 pattern = String::Flatten(pattern);
145 PostponeInterruptsScope postpone(isolate); 145 PostponeInterruptsScope postpone(isolate);
146 RegExpCompileData parse_result; 146 RegExpCompileData parse_result;
147 FlatStringReader reader(isolate, pattern); 147 FlatStringReader reader(isolate, pattern);
148 if (!RegExpParser::ParseRegExp(re->GetIsolate(), &zone, &reader, 148 if (!RegExpParser::ParseRegExp(re->GetIsolate(), &zone, &reader, flags,
149 flags & JSRegExp::kMultiline, 149 &parse_result)) {
150 flags & JSRegExp::kUnicode, &parse_result)) {
151 // Throw an exception if we fail to parse the pattern. 150 // Throw an exception if we fail to parse the pattern.
152 return ThrowRegExpException(re, pattern, parse_result.error); 151 return ThrowRegExpException(re, pattern, parse_result.error);
153 } 152 }
154 153
155 bool has_been_compiled = false; 154 bool has_been_compiled = false;
156 155
157 if (parse_result.simple && !(flags & JSRegExp::kIgnoreCase) && 156 if (parse_result.simple && !(flags & JSRegExp::kIgnoreCase) &&
158 !(flags & JSRegExp::kSticky) && !HasFewDifferentCharacters(pattern)) { 157 !(flags & JSRegExp::kSticky) && !HasFewDifferentCharacters(pattern)) {
159 // Parse-tree is a single atom that is equal to the pattern. 158 // Parse-tree is a single atom that is equal to the pattern.
160 AtomCompile(re, pattern, flags, pattern); 159 AtomCompile(re, pattern, flags, pattern);
(...skipping 203 matching lines...) Expand 10 before | Expand all | Expand 10 after
364 ThrowRegExpException(re, error_message); 363 ThrowRegExpException(re, error_message);
365 return false; 364 return false;
366 } 365 }
367 366
368 JSRegExp::Flags flags = re->GetFlags(); 367 JSRegExp::Flags flags = re->GetFlags();
369 368
370 Handle<String> pattern(re->Pattern()); 369 Handle<String> pattern(re->Pattern());
371 pattern = String::Flatten(pattern); 370 pattern = String::Flatten(pattern);
372 RegExpCompileData compile_data; 371 RegExpCompileData compile_data;
373 FlatStringReader reader(isolate, pattern); 372 FlatStringReader reader(isolate, pattern);
374 if (!RegExpParser::ParseRegExp(isolate, &zone, &reader, 373 if (!RegExpParser::ParseRegExp(isolate, &zone, &reader, flags,
375 flags & JSRegExp::kMultiline, 374 &compile_data)) {
376 flags & JSRegExp::kUnicode, &compile_data)) {
377 // Throw an exception if we fail to parse the pattern. 375 // Throw an exception if we fail to parse the pattern.
378 // THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once. 376 // THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once.
379 USE(ThrowRegExpException(re, pattern, compile_data.error)); 377 USE(ThrowRegExpException(re, pattern, compile_data.error));
380 return false; 378 return false;
381 } 379 }
382 RegExpEngine::CompilationResult result = RegExpEngine::Compile( 380 RegExpEngine::CompilationResult result =
383 isolate, &zone, &compile_data, flags & JSRegExp::kIgnoreCase, 381 RegExpEngine::Compile(isolate, &zone, &compile_data, flags, pattern,
384 flags & JSRegExp::kGlobal, flags & JSRegExp::kMultiline, 382 sample_subject, is_one_byte);
385 flags & JSRegExp::kSticky, pattern, sample_subject, is_one_byte);
386 if (result.error_message != NULL) { 383 if (result.error_message != NULL) {
387 // Unable to compile regexp. 384 // Unable to compile regexp.
388 Handle<String> error_message = isolate->factory()->NewStringFromUtf8( 385 Handle<String> error_message = isolate->factory()->NewStringFromUtf8(
389 CStrVector(result.error_message)).ToHandleChecked(); 386 CStrVector(result.error_message)).ToHandleChecked();
390 ThrowRegExpException(re, error_message); 387 ThrowRegExpException(re, error_message);
391 return false; 388 return false;
392 } 389 }
393 390
394 Handle<FixedArray> data = Handle<FixedArray>(FixedArray::cast(re->data())); 391 Handle<FixedArray> data = Handle<FixedArray>(FixedArray::cast(re->data()));
395 data->set(JSRegExp::code_index(is_one_byte), result.code); 392 data->set(JSRegExp::code_index(is_one_byte), result.code);
(...skipping 542 matching lines...) Expand 10 before | Expand all | Expand 10 after
938 935
939 private: 936 private:
940 CharacterFrequency frequencies_[RegExpMacroAssembler::kTableSize]; 937 CharacterFrequency frequencies_[RegExpMacroAssembler::kTableSize];
941 int total_samples_; 938 int total_samples_;
942 }; 939 };
943 940
944 941
945 class RegExpCompiler { 942 class RegExpCompiler {
946 public: 943 public:
947 RegExpCompiler(Isolate* isolate, Zone* zone, int capture_count, 944 RegExpCompiler(Isolate* isolate, Zone* zone, int capture_count,
948 bool ignore_case, bool is_one_byte); 945 JSRegExp::Flags flags, bool is_one_byte);
949 946
950 int AllocateRegister() { 947 int AllocateRegister() {
951 if (next_register_ >= RegExpMacroAssembler::kMaxRegister) { 948 if (next_register_ >= RegExpMacroAssembler::kMaxRegister) {
952 reg_exp_too_big_ = true; 949 reg_exp_too_big_ = true;
953 return next_register_; 950 return next_register_;
954 } 951 }
955 return next_register_++; 952 return next_register_++;
956 } 953 }
957 954
955 // Lookarounds to match lone surrogates for unicode character class matches
956 // are never nested. We can therefore reuse registers.
957 int UnicodeLookaroundStackRegister() {
958 if (unicode_lookaround_stack_register_ == kNoRegister) {
959 unicode_lookaround_stack_register_ = AllocateRegister();
960 }
961 return unicode_lookaround_stack_register_;
962 }
963
964 int UnicodeLookaroundPositionRegister() {
965 if (unicode_lookaround_position_register_ == kNoRegister) {
966 unicode_lookaround_position_register_ = AllocateRegister();
967 }
968 return unicode_lookaround_position_register_;
969 }
970
958 RegExpEngine::CompilationResult Assemble(RegExpMacroAssembler* assembler, 971 RegExpEngine::CompilationResult Assemble(RegExpMacroAssembler* assembler,
959 RegExpNode* start, 972 RegExpNode* start,
960 int capture_count, 973 int capture_count,
961 Handle<String> pattern); 974 Handle<String> pattern);
962 975
963 inline void AddWork(RegExpNode* node) { 976 inline void AddWork(RegExpNode* node) {
964 if (!node->on_work_list() && !node->label()->is_bound()) { 977 if (!node->on_work_list() && !node->label()->is_bound()) {
965 node->set_on_work_list(true); 978 node->set_on_work_list(true);
966 work_list_->Add(node); 979 work_list_->Add(node);
967 } 980 }
968 } 981 }
969 982
970 static const int kImplementationOffset = 0; 983 static const int kImplementationOffset = 0;
971 static const int kNumberOfRegistersOffset = 0; 984 static const int kNumberOfRegistersOffset = 0;
972 static const int kCodeOffset = 1; 985 static const int kCodeOffset = 1;
973 986
974 RegExpMacroAssembler* macro_assembler() { return macro_assembler_; } 987 RegExpMacroAssembler* macro_assembler() { return macro_assembler_; }
975 EndNode* accept() { return accept_; } 988 EndNode* accept() { return accept_; }
976 989
977 static const int kMaxRecursion = 100; 990 static const int kMaxRecursion = 100;
978 inline int recursion_depth() { return recursion_depth_; } 991 inline int recursion_depth() { return recursion_depth_; }
979 inline void IncrementRecursionDepth() { recursion_depth_++; } 992 inline void IncrementRecursionDepth() { recursion_depth_++; }
980 inline void DecrementRecursionDepth() { recursion_depth_--; } 993 inline void DecrementRecursionDepth() { recursion_depth_--; }
981 994
982 void SetRegExpTooBig() { reg_exp_too_big_ = true; } 995 void SetRegExpTooBig() { reg_exp_too_big_ = true; }
983 996
984 inline bool ignore_case() { return ignore_case_; } 997 inline bool ignore_case() { return (flags_ & JSRegExp::kIgnoreCase) != 0; }
998 inline bool unicode() { return (flags_ & JSRegExp::kUnicode) != 0; }
985 inline bool one_byte() { return one_byte_; } 999 inline bool one_byte() { return one_byte_; }
986 inline bool optimize() { return optimize_; } 1000 inline bool optimize() { return optimize_; }
987 inline void set_optimize(bool value) { optimize_ = value; } 1001 inline void set_optimize(bool value) { optimize_ = value; }
988 inline bool limiting_recursion() { return limiting_recursion_; } 1002 inline bool limiting_recursion() { return limiting_recursion_; }
989 inline void set_limiting_recursion(bool value) { 1003 inline void set_limiting_recursion(bool value) {
990 limiting_recursion_ = value; 1004 limiting_recursion_ = value;
991 } 1005 }
992 bool read_backward() { return read_backward_; } 1006 bool read_backward() { return read_backward_; }
993 void set_read_backward(bool value) { read_backward_ = value; } 1007 void set_read_backward(bool value) { read_backward_ = value; }
994 FrequencyCollator* frequency_collator() { return &frequency_collator_; } 1008 FrequencyCollator* frequency_collator() { return &frequency_collator_; }
995 1009
996 int current_expansion_factor() { return current_expansion_factor_; } 1010 int current_expansion_factor() { return current_expansion_factor_; }
997 void set_current_expansion_factor(int value) { 1011 void set_current_expansion_factor(int value) {
998 current_expansion_factor_ = value; 1012 current_expansion_factor_ = value;
999 } 1013 }
1000 1014
1001 Isolate* isolate() const { return isolate_; } 1015 Isolate* isolate() const { return isolate_; }
1002 Zone* zone() const { return zone_; } 1016 Zone* zone() const { return zone_; }
1003 1017
1004 static const int kNoRegister = -1; 1018 static const int kNoRegister = -1;
1005 1019
1006 private: 1020 private:
1007 EndNode* accept_; 1021 EndNode* accept_;
1008 int next_register_; 1022 int next_register_;
1023 int unicode_lookaround_stack_register_;
1024 int unicode_lookaround_position_register_;
1009 List<RegExpNode*>* work_list_; 1025 List<RegExpNode*>* work_list_;
1010 int recursion_depth_; 1026 int recursion_depth_;
1011 RegExpMacroAssembler* macro_assembler_; 1027 RegExpMacroAssembler* macro_assembler_;
1012 bool ignore_case_; 1028 JSRegExp::Flags flags_;
1013 bool one_byte_; 1029 bool one_byte_;
1014 bool reg_exp_too_big_; 1030 bool reg_exp_too_big_;
1015 bool limiting_recursion_; 1031 bool limiting_recursion_;
1016 bool optimize_; 1032 bool optimize_;
1017 bool read_backward_; 1033 bool read_backward_;
1018 int current_expansion_factor_; 1034 int current_expansion_factor_;
1019 FrequencyCollator frequency_collator_; 1035 FrequencyCollator frequency_collator_;
1020 Isolate* isolate_; 1036 Isolate* isolate_;
1021 Zone* zone_; 1037 Zone* zone_;
1022 }; 1038 };
(...skipping 11 matching lines...) Expand all
1034 1050
1035 1051
1036 static RegExpEngine::CompilationResult IrregexpRegExpTooBig(Isolate* isolate) { 1052 static RegExpEngine::CompilationResult IrregexpRegExpTooBig(Isolate* isolate) {
1037 return RegExpEngine::CompilationResult(isolate, "RegExp too big"); 1053 return RegExpEngine::CompilationResult(isolate, "RegExp too big");
1038 } 1054 }
1039 1055
1040 1056
1041 // Attempts to compile the regexp using an Irregexp code generator. Returns 1057 // Attempts to compile the regexp using an Irregexp code generator. Returns
1042 // a fixed array or a null handle depending on whether it succeeded. 1058 // a fixed array or a null handle depending on whether it succeeded.
1043 RegExpCompiler::RegExpCompiler(Isolate* isolate, Zone* zone, int capture_count, 1059 RegExpCompiler::RegExpCompiler(Isolate* isolate, Zone* zone, int capture_count,
1044 bool ignore_case, bool one_byte) 1060 JSRegExp::Flags flags, bool one_byte)
1045 : next_register_(2 * (capture_count + 1)), 1061 : next_register_(2 * (capture_count + 1)),
1062 unicode_lookaround_stack_register_(kNoRegister),
1063 unicode_lookaround_position_register_(kNoRegister),
1046 work_list_(NULL), 1064 work_list_(NULL),
1047 recursion_depth_(0), 1065 recursion_depth_(0),
1048 ignore_case_(ignore_case), 1066 flags_(flags),
1049 one_byte_(one_byte), 1067 one_byte_(one_byte),
1050 reg_exp_too_big_(false), 1068 reg_exp_too_big_(false),
1051 limiting_recursion_(false), 1069 limiting_recursion_(false),
1052 optimize_(FLAG_regexp_optimization), 1070 optimize_(FLAG_regexp_optimization),
1053 read_backward_(false), 1071 read_backward_(false),
1054 current_expansion_factor_(1), 1072 current_expansion_factor_(1),
1055 frequency_collator_(), 1073 frequency_collator_(),
1056 isolate_(isolate), 1074 isolate_(isolate),
1057 zone_(zone) { 1075 zone_(zone) {
1058 accept_ = new(zone) EndNode(EndNode::ACCEPT, zone); 1076 accept_ = new(zone) EndNode(EndNode::ACCEPT, zone);
(...skipping 1032 matching lines...) Expand 10 before | Expand all | Expand 10 after
2091 flip ? even_label : odd_label); 2109 flip ? even_label : odd_label);
2092 } 2110 }
2093 } 2111 }
2094 2112
2095 2113
2096 static void EmitCharClass(RegExpMacroAssembler* macro_assembler, 2114 static void EmitCharClass(RegExpMacroAssembler* macro_assembler,
2097 RegExpCharacterClass* cc, bool one_byte, 2115 RegExpCharacterClass* cc, bool one_byte,
2098 Label* on_failure, int cp_offset, bool check_offset, 2116 Label* on_failure, int cp_offset, bool check_offset,
2099 bool preloaded, Zone* zone) { 2117 bool preloaded, Zone* zone) {
2100 ZoneList<CharacterRange>* ranges = cc->ranges(zone); 2118 ZoneList<CharacterRange>* ranges = cc->ranges(zone);
2101 if (!CharacterRange::IsCanonical(ranges)) { 2119 CharacterRange::Canonicalize(ranges);
2102 CharacterRange::Canonicalize(ranges);
2103 }
2104 2120
2105 int max_char; 2121 int max_char;
2106 if (one_byte) { 2122 if (one_byte) {
2107 max_char = String::kMaxOneByteCharCode; 2123 max_char = String::kMaxOneByteCharCode;
2108 } else { 2124 } else {
2109 max_char = String::kMaxUtf16CodeUnit; 2125 max_char = String::kMaxUtf16CodeUnit;
2110 } 2126 }
2111 2127
2112 int range_count = ranges->length(); 2128 int range_count = ranges->length();
2113 2129
(...skipping 21 matching lines...) Expand all
2135 if (cc->is_negated()) { 2151 if (cc->is_negated()) {
2136 macro_assembler->GoTo(on_failure); 2152 macro_assembler->GoTo(on_failure);
2137 } else { 2153 } else {
2138 // This is a common case hit by non-anchored expressions. 2154 // This is a common case hit by non-anchored expressions.
2139 if (check_offset) { 2155 if (check_offset) {
2140 macro_assembler->CheckPosition(cp_offset, on_failure); 2156 macro_assembler->CheckPosition(cp_offset, on_failure);
2141 } 2157 }
2142 } 2158 }
2143 return; 2159 return;
2144 } 2160 }
2145 if (last_valid_range == 0 &&
2146 !cc->is_negated() &&
2147 ranges->at(0).IsEverything(max_char)) {
2148 // This is a common case hit by non-anchored expressions.
2149 if (check_offset) {
2150 macro_assembler->CheckPosition(cp_offset, on_failure);
2151 }
2152 return;
2153 }
2154 2161
2155 if (!preloaded) { 2162 if (!preloaded) {
2156 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset); 2163 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset);
2157 } 2164 }
2158 2165
2159 if (cc->is_standard(zone) && 2166 if (cc->is_standard(zone) &&
2160 macro_assembler->CheckSpecialCharacterClass(cc->standard_type(), 2167 macro_assembler->CheckSpecialCharacterClass(cc->standard_type(),
2161 on_failure)) { 2168 on_failure)) {
2162 return; 2169 return;
2163 } 2170 }
2164 2171
2165 2172
2166 // A new list with ascending entries. Each entry is a code unit 2173 // A new list with ascending entries. Each entry is a code unit
2167 // where there is a boundary between code units that are part of 2174 // where there is a boundary between code units that are part of
2168 // the class and code units that are not. Normally we insert an 2175 // the class and code units that are not. Normally we insert an
2169 // entry at zero which goes to the failure label, but if there 2176 // entry at zero which goes to the failure label, but if there
2170 // was already one there we fall through for success on that entry. 2177 // was already one there we fall through for success on that entry.
2171 // Subsequent entries have alternating meaning (success/failure). 2178 // Subsequent entries have alternating meaning (success/failure).
(...skipping 619 matching lines...) Expand 10 before | Expand all | Expand 10 after
2791 // Character is outside Latin-1 completely 2798 // Character is outside Latin-1 completely
2792 if (converted == 0) return set_replacement(NULL); 2799 if (converted == 0) return set_replacement(NULL);
2793 // Convert quark to Latin-1 in place. 2800 // Convert quark to Latin-1 in place.
2794 uint16_t* copy = const_cast<uint16_t*>(quarks.start()); 2801 uint16_t* copy = const_cast<uint16_t*>(quarks.start());
2795 copy[j] = converted; 2802 copy[j] = converted;
2796 } 2803 }
2797 } else { 2804 } else {
2798 DCHECK(elm.text_type() == TextElement::CHAR_CLASS); 2805 DCHECK(elm.text_type() == TextElement::CHAR_CLASS);
2799 RegExpCharacterClass* cc = elm.char_class(); 2806 RegExpCharacterClass* cc = elm.char_class();
2800 ZoneList<CharacterRange>* ranges = cc->ranges(zone()); 2807 ZoneList<CharacterRange>* ranges = cc->ranges(zone());
2801 if (!CharacterRange::IsCanonical(ranges)) { 2808 CharacterRange::Canonicalize(ranges);
2802 CharacterRange::Canonicalize(ranges);
2803 }
2804 // Now they are in order so we only need to look at the first. 2809 // Now they are in order so we only need to look at the first.
2805 int range_count = ranges->length(); 2810 int range_count = ranges->length();
2806 if (cc->is_negated()) { 2811 if (cc->is_negated()) {
2807 if (range_count != 0 && 2812 if (range_count != 0 &&
2808 ranges->at(0).from() == 0 && 2813 ranges->at(0).from() == 0 &&
2809 ranges->at(0).to() >= String::kMaxOneByteCharCode) { 2814 ranges->at(0).to() >= String::kMaxOneByteCharCode) {
2810 // This will be handled in a later filter. 2815 // This will be handled in a later filter.
2811 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue; 2816 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue;
2812 return set_replacement(NULL); 2817 return set_replacement(NULL);
2813 } 2818 }
(...skipping 468 matching lines...) Expand 10 before | Expand all | Expand 10 after
3282 bool TextNode::SkipPass(int int_pass, bool ignore_case) { 3287 bool TextNode::SkipPass(int int_pass, bool ignore_case) {
3283 TextEmitPassType pass = static_cast<TextEmitPassType>(int_pass); 3288 TextEmitPassType pass = static_cast<TextEmitPassType>(int_pass);
3284 if (ignore_case) { 3289 if (ignore_case) {
3285 return pass == SIMPLE_CHARACTER_MATCH; 3290 return pass == SIMPLE_CHARACTER_MATCH;
3286 } else { 3291 } else {
3287 return pass == NON_LETTER_CHARACTER_MATCH || pass == CASE_CHARACTER_MATCH; 3292 return pass == NON_LETTER_CHARACTER_MATCH || pass == CASE_CHARACTER_MATCH;
3288 } 3293 }
3289 } 3294 }
3290 3295
3291 3296
3297 TextNode* TextNode::CreateForCharacterRanges(Zone* zone,
3298 ZoneList<CharacterRange>* ranges,
3299 bool read_backward,
3300 RegExpNode* on_success) {
3301 DCHECK_NOT_NULL(ranges);
3302 ZoneList<TextElement>* elms = new (zone) ZoneList<TextElement>(1, zone);
3303 elms->Add(
3304 TextElement::CharClass(new (zone) RegExpCharacterClass(ranges, false)),
3305 zone);
3306 return new (zone) TextNode(elms, read_backward, on_success);
3307 }
3308
3309
3310 TextNode* TextNode::CreateForSurrogatePair(Zone* zone, CharacterRange lead,
3311 CharacterRange trail,
3312 bool read_backward,
3313 RegExpNode* on_success) {
3314 ZoneList<CharacterRange>* lead_ranges = CharacterRange::List(zone, lead);
3315 ZoneList<CharacterRange>* trail_ranges = CharacterRange::List(zone, trail);
3316 ZoneList<TextElement>* elms = new (zone) ZoneList<TextElement>(2, zone);
3317 elms->Add(TextElement::CharClass(
3318 new (zone) RegExpCharacterClass(lead_ranges, false)),
3319 zone);
3320 elms->Add(TextElement::CharClass(
3321 new (zone) RegExpCharacterClass(trail_ranges, false)),
3322 zone);
3323 return new (zone) TextNode(elms, read_backward, on_success);
3324 }
3325
3326
3292 // This generates the code to match a text node. A text node can contain 3327 // This generates the code to match a text node. A text node can contain
3293 // straight character sequences (possibly to be matched in a case-independent 3328 // straight character sequences (possibly to be matched in a case-independent
3294 // way) and character classes. For efficiency we do not do this in a single 3329 // way) and character classes. For efficiency we do not do this in a single
3295 // pass from left to right. Instead we pass over the text node several times, 3330 // pass from left to right. Instead we pass over the text node several times,
3296 // emitting code for some character positions every time. See the comment on 3331 // emitting code for some character positions every time. See the comment on
3297 // TextEmitPass for details. 3332 // TextEmitPass for details.
3298 void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) { 3333 void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) {
3299 LimitResult limit_result = LimitVersions(compiler, trace); 3334 LimitResult limit_result = LimitVersions(compiler, trace);
3300 if (limit_result == DONE) return; 3335 if (limit_result == DONE) return;
3301 DCHECK(limit_result == CONTINUE); 3336 DCHECK(limit_result == CONTINUE);
(...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after
3398 3433
3399 3434
3400 RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode( 3435 RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode(
3401 RegExpCompiler* compiler) { 3436 RegExpCompiler* compiler) {
3402 if (read_backward()) return NULL; 3437 if (read_backward()) return NULL;
3403 if (elements()->length() != 1) return NULL; 3438 if (elements()->length() != 1) return NULL;
3404 TextElement elm = elements()->at(0); 3439 TextElement elm = elements()->at(0);
3405 if (elm.text_type() != TextElement::CHAR_CLASS) return NULL; 3440 if (elm.text_type() != TextElement::CHAR_CLASS) return NULL;
3406 RegExpCharacterClass* node = elm.char_class(); 3441 RegExpCharacterClass* node = elm.char_class();
3407 ZoneList<CharacterRange>* ranges = node->ranges(zone()); 3442 ZoneList<CharacterRange>* ranges = node->ranges(zone());
3408 if (!CharacterRange::IsCanonical(ranges)) { 3443 CharacterRange::Canonicalize(ranges);
3409 CharacterRange::Canonicalize(ranges);
3410 }
3411 if (node->is_negated()) { 3444 if (node->is_negated()) {
3412 return ranges->length() == 0 ? on_success() : NULL; 3445 return ranges->length() == 0 ? on_success() : NULL;
3413 } 3446 }
3414 if (ranges->length() != 1) return NULL; 3447 if (ranges->length() != 1) return NULL;
3415 uint32_t max_char; 3448 uint32_t max_char;
3416 if (compiler->one_byte()) { 3449 if (compiler->one_byte()) {
3417 max_char = String::kMaxOneByteCharCode; 3450 max_char = String::kMaxOneByteCharCode;
3418 } else { 3451 } else {
3419 max_char = String::kMaxUtf16CodeUnit; 3452 max_char = String::kMaxUtf16CodeUnit;
3420 } 3453 }
(...skipping 126 matching lines...) Expand 10 before | Expand all | Expand 10 after
3547 return alt_gens_[i]; 3580 return alt_gens_[i];
3548 } 3581 }
3549 3582
3550 private: 3583 private:
3551 static const int kAFew = 10; 3584 static const int kAFew = 10;
3552 ZoneList<AlternativeGeneration*> alt_gens_; 3585 ZoneList<AlternativeGeneration*> alt_gens_;
3553 AlternativeGeneration a_few_alt_gens_[kAFew]; 3586 AlternativeGeneration a_few_alt_gens_[kAFew];
3554 }; 3587 };
3555 3588
3556 3589
3590 static const uc32 kLeadSurrogateStart = 0xd800;
3591 static const uc32 kLeadSurrogateEnd = 0xdbff;
3592 static const uc32 kTrailSurrogateStart = 0xdc00;
3593 static const uc32 kTrailSurrogateEnd = 0xdfff;
3594 static const uc32 kNonBmpStart = 0x10000;
3595 static const uc32 kNonBmpEnd = 0x10ffff;
3596 static const uc32 kRangeEndMarker = 0x110000;
3597
3557 // The '2' variant is has inclusive from and exclusive to. 3598 // The '2' variant is has inclusive from and exclusive to.
3558 // This covers \s as defined in ECMA-262 5.1, 15.10.2.12, 3599 // This covers \s as defined in ECMA-262 5.1, 15.10.2.12,
3559 // which include WhiteSpace (7.2) or LineTerminator (7.3) values. 3600 // which include WhiteSpace (7.2) or LineTerminator (7.3) values.
3560 static const int kSpaceRanges[] = { '\t', '\r' + 1, ' ', ' ' + 1, 3601 static const int kSpaceRanges[] = {
3561 0x00A0, 0x00A1, 0x1680, 0x1681, 0x180E, 0x180F, 0x2000, 0x200B, 3602 '\t', '\r' + 1, ' ', ' ' + 1, 0x00A0, 0x00A1, 0x1680, 0x1681,
3562 0x2028, 0x202A, 0x202F, 0x2030, 0x205F, 0x2060, 0x3000, 0x3001, 3603 0x180E, 0x180F, 0x2000, 0x200B, 0x2028, 0x202A, 0x202F, 0x2030,
3563 0xFEFF, 0xFF00, 0x10000 }; 3604 0x205F, 0x2060, 0x3000, 0x3001, 0xFEFF, 0xFF00, kRangeEndMarker};
3564 static const int kSpaceRangeCount = arraysize(kSpaceRanges); 3605 static const int kSpaceRangeCount = arraysize(kSpaceRanges);
3565 3606
3566 static const int kWordRanges[] = { 3607 static const int kWordRanges[] = {
3567 '0', '9' + 1, 'A', 'Z' + 1, '_', '_' + 1, 'a', 'z' + 1, 0x10000 }; 3608 '0', '9' + 1, 'A', 'Z' + 1, '_', '_' + 1, 'a', 'z' + 1, kRangeEndMarker};
3568 static const int kWordRangeCount = arraysize(kWordRanges); 3609 static const int kWordRangeCount = arraysize(kWordRanges);
3569 static const int kDigitRanges[] = { '0', '9' + 1, 0x10000 }; 3610 static const int kDigitRanges[] = {'0', '9' + 1, kRangeEndMarker};
3570 static const int kDigitRangeCount = arraysize(kDigitRanges); 3611 static const int kDigitRangeCount = arraysize(kDigitRanges);
3571 static const int kSurrogateRanges[] = { 0xd800, 0xe000, 0x10000 }; 3612 static const int kSurrogateRanges[] = {
3613 kLeadSurrogateStart, kLeadSurrogateStart + 1, kRangeEndMarker};
3572 static const int kSurrogateRangeCount = arraysize(kSurrogateRanges); 3614 static const int kSurrogateRangeCount = arraysize(kSurrogateRanges);
3573 static const int kLineTerminatorRanges[] = { 0x000A, 0x000B, 0x000D, 0x000E, 3615 static const int kLineTerminatorRanges[] = {
3574 0x2028, 0x202A, 0x10000 }; 3616 0x000A, 0x000B, 0x000D, 0x000E, 0x2028, 0x202A, kRangeEndMarker};
3575 static const int kLineTerminatorRangeCount = arraysize(kLineTerminatorRanges); 3617 static const int kLineTerminatorRangeCount = arraysize(kLineTerminatorRanges);
3576 3618
3577
3578 void BoyerMoorePositionInfo::Set(int character) { 3619 void BoyerMoorePositionInfo::Set(int character) {
3579 SetInterval(Interval(character, character)); 3620 SetInterval(Interval(character, character));
3580 } 3621 }
3581 3622
3582 3623
3583 void BoyerMoorePositionInfo::SetInterval(const Interval& interval) { 3624 void BoyerMoorePositionInfo::SetInterval(const Interval& interval) {
3584 s_ = AddRange(s_, kSpaceRanges, kSpaceRangeCount, interval); 3625 s_ = AddRange(s_, kSpaceRanges, kSpaceRangeCount, interval);
3585 w_ = AddRange(w_, kWordRanges, kWordRangeCount, interval); 3626 w_ = AddRange(w_, kWordRanges, kWordRangeCount, interval);
3586 d_ = AddRange(d_, kDigitRanges, kDigitRangeCount, interval); 3627 d_ = AddRange(d_, kDigitRanges, kDigitRangeCount, interval);
3587 surrogate_ = 3628 surrogate_ =
(...skipping 1137 matching lines...) Expand 10 before | Expand all | Expand 10 after
4725 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler, 4766 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler,
4726 RegExpNode* on_success) { 4767 RegExpNode* on_success) {
4727 return new (compiler->zone()) 4768 return new (compiler->zone())
4728 TextNode(elements(), compiler->read_backward(), on_success); 4769 TextNode(elements(), compiler->read_backward(), on_success);
4729 } 4770 }
4730 4771
4731 4772
4732 static bool CompareInverseRanges(ZoneList<CharacterRange>* ranges, 4773 static bool CompareInverseRanges(ZoneList<CharacterRange>* ranges,
4733 const int* special_class, 4774 const int* special_class,
4734 int length) { 4775 int length) {
4735 length--; // Remove final 0x10000. 4776 length--; // Remove final marker.
4736 DCHECK(special_class[length] == 0x10000); 4777 DCHECK(special_class[length] == kRangeEndMarker);
4737 DCHECK(ranges->length() != 0); 4778 DCHECK(ranges->length() != 0);
4738 DCHECK(length != 0); 4779 DCHECK(length != 0);
4739 DCHECK(special_class[0] != 0); 4780 DCHECK(special_class[0] != 0);
4740 if (ranges->length() != (length >> 1) + 1) { 4781 if (ranges->length() != (length >> 1) + 1) {
4741 return false; 4782 return false;
4742 } 4783 }
4743 CharacterRange range = ranges->at(0); 4784 CharacterRange range = ranges->at(0);
4744 if (range.from() != 0) { 4785 if (range.from() != 0) {
4745 return false; 4786 return false;
4746 } 4787 }
4747 for (int i = 0; i < length; i += 2) { 4788 for (int i = 0; i < length; i += 2) {
4748 if (special_class[i] != (range.to() + 1)) { 4789 if (special_class[i] != (range.to() + 1)) {
4749 return false; 4790 return false;
4750 } 4791 }
4751 range = ranges->at((i >> 1) + 1); 4792 range = ranges->at((i >> 1) + 1);
4752 if (special_class[i+1] != range.from()) { 4793 if (special_class[i+1] != range.from()) {
4753 return false; 4794 return false;
4754 } 4795 }
4755 } 4796 }
4756 if (range.to() != 0xffff) { 4797 if (range.to() != 0xffff) {
4757 return false; 4798 return false;
4758 } 4799 }
4759 return true; 4800 return true;
4760 } 4801 }
4761 4802
4762 4803
4763 static bool CompareRanges(ZoneList<CharacterRange>* ranges, 4804 static bool CompareRanges(ZoneList<CharacterRange>* ranges,
4764 const int* special_class, 4805 const int* special_class,
4765 int length) { 4806 int length) {
4766 length--; // Remove final 0x10000. 4807 length--; // Remove final marker.
4767 DCHECK(special_class[length] == 0x10000); 4808 DCHECK(special_class[length] == kRangeEndMarker);
4768 if (ranges->length() * 2 != length) { 4809 if (ranges->length() * 2 != length) {
4769 return false; 4810 return false;
4770 } 4811 }
4771 for (int i = 0; i < length; i += 2) { 4812 for (int i = 0; i < length; i += 2) {
4772 CharacterRange range = ranges->at(i >> 1); 4813 CharacterRange range = ranges->at(i >> 1);
4773 if (range.from() != special_class[i] || 4814 if (range.from() != special_class[i] ||
4774 range.to() != special_class[i + 1] - 1) { 4815 range.to() != special_class[i + 1] - 1) {
4775 return false; 4816 return false;
4776 } 4817 }
4777 } 4818 }
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after
4813 return true; 4854 return true;
4814 } 4855 }
4815 if (CompareInverseRanges(set_.ranges(zone), kWordRanges, kWordRangeCount)) { 4856 if (CompareInverseRanges(set_.ranges(zone), kWordRanges, kWordRangeCount)) {
4816 set_.set_standard_set_type('W'); 4857 set_.set_standard_set_type('W');
4817 return true; 4858 return true;
4818 } 4859 }
4819 return false; 4860 return false;
4820 } 4861 }
4821 4862
4822 4863
4864 bool RegExpCharacterClass::NeedsDesugaringForUnicode(Zone* zone) {
4865 ZoneList<CharacterRange>* ranges = this->ranges(zone);
4866 CharacterRange::Canonicalize(ranges);
4867 for (int i = ranges->length() - 1; i >= 0; i--) {
4868 uc32 from = ranges->at(i).from();
4869 uc32 to = ranges->at(i).to();
4870 // Check for non-BMP characters.
4871 if (to >= kNonBmpStart) return true;
4872 // Check for lone surrogates.
4873 if (from <= kTrailSurrogateEnd && to >= kLeadSurrogateStart) return true;
4874 }
4875 return false;
4876 }
4877
4878
4879 UnicodeRangeSplitter::UnicodeRangeSplitter(Zone* zone,
4880 ZoneList<CharacterRange>* base)
4881 : zone_(zone),
4882 table_(zone),
4883 bmp_(nullptr),
4884 lead_surrogates_(nullptr),
4885 trail_surrogates_(nullptr),
4886 non_bmp_(nullptr) {
4887 // The unicode range splitter categorizes given character ranges into:
4888 // - Code points from the BMP representable by one code unit.
4889 // - Code points outside the BMP that need to be split into surrogate pairs.
4890 // - Lone lead surrogates.
4891 // - Lone trail surrogates.
4892 // Lone surrogates are valid code points, even though no actual characters.
4893 // They require special matching to make sure we do not split surrogate pairs.
4894 // We use the dispatch table to accomplish this. The base range is split up
4895 // by the table by the overlay ranges, and the Call callback is used to
4896 // filter and collect ranges for each category.
4897 for (int i = 0; i < base->length(); i++) {
4898 table_.AddRange(base->at(i), kBase, zone_);
4899 }
4900 // Add overlay ranges.
4901 table_.AddRange(CharacterRange(0, kLeadSurrogateStart - 1), kBmpCodePoints,
4902 zone_);
4903 table_.AddRange(CharacterRange(kLeadSurrogateStart, kLeadSurrogateEnd),
4904 kLeadSurrogates, zone_);
4905 table_.AddRange(CharacterRange(kTrailSurrogateStart, kTrailSurrogateEnd),
4906 kTrailSurrogates, zone_);
4907 table_.AddRange(CharacterRange(kTrailSurrogateEnd, kNonBmpStart - 1),
4908 kBmpCodePoints, zone_);
4909 table_.AddRange(CharacterRange(kNonBmpStart, kNonBmpEnd), kNonBmpCodePoints,
4910 zone_);
4911 table_.ForEach(this);
4912 }
4913
4914
4915 void UnicodeRangeSplitter::Call(uc32 from, DispatchTable::Entry entry) {
4916 OutSet* outset = entry.out_set();
4917 if (!outset->Get(kBase)) return;
4918 ZoneList<CharacterRange>** target = NULL;
4919 if (outset->Get(kBmpCodePoints)) {
4920 target = &bmp_;
4921 } else if (outset->Get(kLeadSurrogates)) {
4922 target = &lead_surrogates_;
4923 } else if (outset->Get(kTrailSurrogates)) {
4924 target = &trail_surrogates_;
4925 } else {
4926 DCHECK(outset->Get(kNonBmpCodePoints));
4927 target = &non_bmp_;
4928 }
4929 if (*target == NULL) *target = new (zone_) ZoneList<CharacterRange>(2, zone_);
4930 (*target)->Add(CharacterRange::Range(entry.from(), entry.to()), zone_);
4931 }
4932
4933
4934 void AddBmpCharacters(RegExpCompiler* compiler, ChoiceNode* result,
4935 RegExpNode* on_success, UnicodeRangeSplitter* splitter) {
4936 ZoneList<CharacterRange>* bmp = splitter->bmp();
4937 if (bmp == nullptr) return;
4938 result->AddAlternative(GuardedAlternative(TextNode::CreateForCharacterRanges(
4939 compiler->zone(), bmp, compiler->read_backward(), on_success)));
4940 }
4941
4942
4943 void AddNonBmpSurrogatePairs(RegExpCompiler* compiler, ChoiceNode* result,
4944 RegExpNode* on_success,
4945 UnicodeRangeSplitter* splitter) {
4946 ZoneList<CharacterRange>* non_bmp = splitter->non_bmp();
4947 if (non_bmp == nullptr) return;
4948 DCHECK(compiler->unicode());
4949 DCHECK(!compiler->one_byte());
4950 Zone* zone = compiler->zone();
4951 CharacterRange::Canonicalize(non_bmp);
4952 for (int i = 0; i < non_bmp->length(); i++) {
4953 // Match surrogate pair.
4954 // E.g. [\u10005-\u11005] becomes
4955 // \ud800[\udc05-\udfff]|
4956 // [\ud801-\ud803][\udc00-\udfff]|
4957 // \ud804[\udc00-\udc05]
4958 uc32 from = non_bmp->at(i).from();
4959 uc32 to = non_bmp->at(i).to();
4960 uc16 from_l = unibrow::Utf16::LeadSurrogate(from);
4961 uc16 from_t = unibrow::Utf16::TrailSurrogate(from);
4962 uc16 to_l = unibrow::Utf16::LeadSurrogate(to);
4963 uc16 to_t = unibrow::Utf16::TrailSurrogate(to);
4964 if (from_l == to_l) {
4965 // The lead surrogate is the same.
4966 result->AddAlternative(
4967 GuardedAlternative(TextNode::CreateForSurrogatePair(
4968 zone, CharacterRange::Singleton(from_l),
4969 CharacterRange::Range(from_t, to_t), compiler->read_backward(),
4970 on_success)));
4971 } else {
4972 if (from_t != kTrailSurrogateStart) {
4973 // Add [from_l][from_t-\udfff]
4974 result->AddAlternative(
4975 GuardedAlternative(TextNode::CreateForSurrogatePair(
4976 zone, CharacterRange::Singleton(from_l),
4977 CharacterRange::Range(from_t, kTrailSurrogateEnd),
4978 compiler->read_backward(), on_success)));
4979 from_l++;
4980 }
4981 if (to_t != kTrailSurrogateEnd) {
4982 // Add [to_l][\udc00-to_t]
4983 result->AddAlternative(
4984 GuardedAlternative(TextNode::CreateForSurrogatePair(
4985 zone, CharacterRange::Singleton(to_l),
4986 CharacterRange::Range(kTrailSurrogateStart, to_t),
4987 compiler->read_backward(), on_success)));
4988 to_l--;
4989 }
4990 if (from_l <= to_l) {
4991 // Add [from_l-to_l][\udc00-\udfff]
4992 result->AddAlternative(
4993 GuardedAlternative(TextNode::CreateForSurrogatePair(
4994 zone, CharacterRange::Range(from_l, to_l),
4995 CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd),
4996 compiler->read_backward(), on_success)));
4997 }
4998 }
4999 }
5000 }
5001
5002
5003 RegExpNode* NegativeLookaroundAgainstReadDirectionAndMatch(
5004 RegExpCompiler* compiler, ZoneList<CharacterRange>* lookbehind,
5005 ZoneList<CharacterRange>* match, RegExpNode* on_success,
5006 bool read_backward) {
5007 Zone* zone = compiler->zone();
5008 RegExpNode* match_node = TextNode::CreateForCharacterRanges(
5009 zone, match, read_backward, on_success);
5010 int stack_register = compiler->UnicodeLookaroundStackRegister();
5011 int position_register = compiler->UnicodeLookaroundPositionRegister();
5012 RegExpLookaround::Builder lookaround(false, match_node, stack_register,
5013 position_register);
5014 RegExpNode* negative_match = TextNode::CreateForCharacterRanges(
5015 zone, lookbehind, !read_backward, lookaround.on_match_success());
5016 return lookaround.ForMatch(negative_match);
5017 }
5018
5019
5020 RegExpNode* MatchAndNegativeLookaroundInReadDirection(
5021 RegExpCompiler* compiler, ZoneList<CharacterRange>* match,
5022 ZoneList<CharacterRange>* lookahead, RegExpNode* on_success,
5023 bool read_backward) {
5024 Zone* zone = compiler->zone();
5025 int stack_register = compiler->UnicodeLookaroundStackRegister();
5026 int position_register = compiler->UnicodeLookaroundPositionRegister();
5027 RegExpLookaround::Builder lookaround(false, on_success, stack_register,
5028 position_register);
5029 RegExpNode* negative_match = TextNode::CreateForCharacterRanges(
5030 zone, lookahead, read_backward, lookaround.on_match_success());
5031 return TextNode::CreateForCharacterRanges(
5032 zone, match, read_backward, lookaround.ForMatch(negative_match));
5033 }
5034
5035
5036 void AddLoneLeadSurrogates(RegExpCompiler* compiler, ChoiceNode* result,
5037 RegExpNode* on_success,
5038 UnicodeRangeSplitter* splitter) {
5039 ZoneList<CharacterRange>* lead_surrogates = splitter->lead_surrogates();
5040 if (lead_surrogates == nullptr) return;
5041 Zone* zone = compiler->zone();
5042 // E.g. \ud801 becomes \ud801(?![\udc00-\udfff]).
5043 ZoneList<CharacterRange>* trail_surrogates =
5044 new (zone) ZoneList<CharacterRange>(1, zone);
5045 trail_surrogates->Add(
5046 CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd), zone);
5047
5048 RegExpNode* match =
5049 compiler->read_backward()
5050 // Reading backward. Assert that reading forward, there is no trail
5051 // surrogate, and then backward match the lead surrogate.
5052 ? NegativeLookaroundAgainstReadDirectionAndMatch(
5053 compiler, trail_surrogates, lead_surrogates, on_success, true)
5054 // Reading forward. Forwrad match the lead surrogate and assert that
5055 // no
5056 // trail surrogate follows.
5057 : MatchAndNegativeLookaroundInReadDirection(
5058 compiler, lead_surrogates, trail_surrogates, on_success, false);
5059 result->AddAlternative(GuardedAlternative(match));
5060 }
5061
5062
5063 void AddLoneTrailSurrogates(RegExpCompiler* compiler, ChoiceNode* result,
5064 RegExpNode* on_success,
5065 UnicodeRangeSplitter* splitter) {
5066 ZoneList<CharacterRange>* trail_surrogates = splitter->trail_surrogates();
5067 if (trail_surrogates == nullptr) return;
5068 Zone* zone = compiler->zone();
5069 // E.g. \udc01 becomes (?<![\ud800-\udbff])\udc01
5070 ZoneList<CharacterRange>* lead_surrogates =
5071 new (zone) ZoneList<CharacterRange>(1, zone);
5072 lead_surrogates->Add(
5073 CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd), zone);
5074
5075 RegExpNode* match =
5076 compiler->read_backward()
5077 // Reading backward. Backward match the trail surrogate and assert
5078 // that no lead surrogate precedes it.
5079 ? MatchAndNegativeLookaroundInReadDirection(
5080 compiler, trail_surrogates, lead_surrogates, on_success, true)
5081 // Reading forward. Assert that reading backward, there is no lead
5082 // surrogate, and then forward match the trail surrogate.
5083 : NegativeLookaroundAgainstReadDirectionAndMatch(
5084 compiler, lead_surrogates, trail_surrogates, on_success, false);
5085 result->AddAlternative(GuardedAlternative(match));
5086 }
5087
5088
4823 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, 5089 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
4824 RegExpNode* on_success) { 5090 RegExpNode* on_success) {
4825 return new (compiler->zone()) 5091 set_.Canonicalize();
4826 TextNode(this, compiler->read_backward(), on_success); 5092 Zone* zone = compiler->zone();
4827 } 5093 ZoneList<CharacterRange>* ranges = this->ranges(zone);
4828 5094 if (compiler->unicode() && !compiler->one_byte()) {
4829 5095 if (is_negated()) {
5096 ZoneList<CharacterRange>* negated =
5097 new (zone) ZoneList<CharacterRange>(2, zone);
5098 CharacterRange::Negate(ranges, negated, zone);
5099 ranges = negated;
5100 }
5101 if (ranges->length() == 0) {
5102 // No matches possible.
5103 return new (zone) EndNode(EndNode::BACKTRACK, zone);
5104 }
5105 UnicodeRangeSplitter splitter(zone, ranges);
5106 ChoiceNode* result = new (compiler->zone()) ChoiceNode(2, compiler->zone());
5107 AddBmpCharacters(compiler, result, on_success, &splitter);
5108 AddNonBmpSurrogatePairs(compiler, result, on_success, &splitter);
5109 AddLoneLeadSurrogates(compiler, result, on_success, &splitter);
5110 AddLoneTrailSurrogates(compiler, result, on_success, &splitter);
5111 return result;
5112 } else {
5113 return new (zone) TextNode(this, compiler->read_backward(), on_success);
5114 }
5115 }
5116
5117
4830 int CompareFirstChar(RegExpTree* const* a, RegExpTree* const* b) { 5118 int CompareFirstChar(RegExpTree* const* a, RegExpTree* const* b) {
4831 RegExpAtom* atom1 = (*a)->AsAtom(); 5119 RegExpAtom* atom1 = (*a)->AsAtom();
4832 RegExpAtom* atom2 = (*b)->AsAtom(); 5120 RegExpAtom* atom2 = (*b)->AsAtom();
4833 uc16 character1 = atom1->data().at(0); 5121 uc16 character1 = atom1->data().at(0);
4834 uc16 character2 = atom2->data().at(0); 5122 uc16 character2 = atom2->data().at(0);
4835 if (character1 < character2) return -1; 5123 if (character1 < character2) return -1;
4836 if (character1 > character2) return 1; 5124 if (character1 > character2) return 1;
4837 return 0; 5125 return 0;
4838 } 5126 }
4839 5127
(...skipping 491 matching lines...) Expand 10 before | Expand all | Expand 10 after
5331 compiler->read_backward(), on_success); 5619 compiler->read_backward(), on_success);
5332 } 5620 }
5333 5621
5334 5622
5335 RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler, 5623 RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler,
5336 RegExpNode* on_success) { 5624 RegExpNode* on_success) {
5337 return on_success; 5625 return on_success;
5338 } 5626 }
5339 5627
5340 5628
5629 RegExpLookaround::Builder::Builder(bool is_positive, RegExpNode* on_success,
5630 int stack_pointer_register,
5631 int position_register,
5632 int capture_register_count,
5633 int capture_register_start)
5634 : is_positive_(is_positive),
5635 on_success_(on_success),
5636 stack_pointer_register_(stack_pointer_register),
5637 position_register_(position_register) {
5638 if (is_positive_) {
5639 on_match_success_ = ActionNode::PositiveSubmatchSuccess(
5640 stack_pointer_register, position_register, capture_register_count,
5641 capture_register_start, on_success_);
5642 } else {
5643 Zone* zone = on_success_->zone();
5644 on_match_success_ = new (zone) NegativeSubmatchSuccess(
5645 stack_pointer_register, position_register, capture_register_count,
5646 capture_register_start, zone);
5647 }
5648 }
5649
5650
5651 RegExpNode* RegExpLookaround::Builder::ForMatch(RegExpNode* match) {
5652 if (is_positive_) {
5653 return ActionNode::BeginSubmatch(stack_pointer_register_,
5654 position_register_, match);
5655 } else {
5656 Zone* zone = on_success_->zone();
5657 // We use a ChoiceNode to represent the negative lookaround. The first
5658 // alternative is the negative match. On success, the end node backtracks.
5659 // On failure, the second alternative is tried and leads to success.
5660 // NegativeLookaheadChoiceNode is a special ChoiceNode that ignores the
5661 // first exit when calculating quick checks.
5662 ChoiceNode* choice_node = new (zone) NegativeLookaroundChoiceNode(
5663 GuardedAlternative(match), GuardedAlternative(on_success_), zone);
5664 return ActionNode::BeginSubmatch(stack_pointer_register_,
5665 position_register_, choice_node);
5666 }
5667 }
5668
5669
5341 RegExpNode* RegExpLookaround::ToNode(RegExpCompiler* compiler, 5670 RegExpNode* RegExpLookaround::ToNode(RegExpCompiler* compiler,
5342 RegExpNode* on_success) { 5671 RegExpNode* on_success) {
5343 int stack_pointer_register = compiler->AllocateRegister(); 5672 int stack_pointer_register = compiler->AllocateRegister();
5344 int position_register = compiler->AllocateRegister(); 5673 int position_register = compiler->AllocateRegister();
5345 5674
5346 const int registers_per_capture = 2; 5675 const int registers_per_capture = 2;
5347 const int register_of_first_capture = 2; 5676 const int register_of_first_capture = 2;
5348 int register_count = capture_count_ * registers_per_capture; 5677 int register_count = capture_count_ * registers_per_capture;
5349 int register_start = 5678 int register_start =
5350 register_of_first_capture + capture_from_ * registers_per_capture; 5679 register_of_first_capture + capture_from_ * registers_per_capture;
5351 5680
5352 RegExpNode* result; 5681 RegExpNode* result;
5353 bool was_reading_backward = compiler->read_backward(); 5682 bool was_reading_backward = compiler->read_backward();
5354 compiler->set_read_backward(type() == LOOKBEHIND); 5683 compiler->set_read_backward(type() == LOOKBEHIND);
5355 if (is_positive()) { 5684 Builder builder(is_positive(), on_success, stack_pointer_register,
5356 result = ActionNode::BeginSubmatch( 5685 position_register, register_count, register_start);
5357 stack_pointer_register, position_register, 5686 RegExpNode* match = body_->ToNode(compiler, builder.on_match_success());
5358 body()->ToNode(compiler, 5687 result = builder.ForMatch(match);
5359 ActionNode::PositiveSubmatchSuccess(
5360 stack_pointer_register, position_register,
5361 register_count, register_start, on_success)));
5362 } else {
5363 // We use a ChoiceNode for a negative lookahead because it has most of
5364 // the characteristics we need. It has the body of the lookahead as its
5365 // first alternative and the expression after the lookahead of the second
5366 // alternative. If the first alternative succeeds then the
5367 // NegativeSubmatchSuccess will unwind the stack including everything the
5368 // choice node set up and backtrack. If the first alternative fails then
5369 // the second alternative is tried, which is exactly the desired result
5370 // for a negative lookahead. The NegativeLookaheadChoiceNode is a special
5371 // ChoiceNode that knows to ignore the first exit when calculating quick
5372 // checks.
5373 Zone* zone = compiler->zone();
5374
5375 GuardedAlternative body_alt(
5376 body()->ToNode(compiler, new (zone) NegativeSubmatchSuccess(
5377 stack_pointer_register, position_register,
5378 register_count, register_start, zone)));
5379 ChoiceNode* choice_node = new (zone) NegativeLookaroundChoiceNode(
5380 body_alt, GuardedAlternative(on_success), zone);
5381 result = ActionNode::BeginSubmatch(stack_pointer_register,
5382 position_register, choice_node);
5383 }
5384 compiler->set_read_backward(was_reading_backward); 5688 compiler->set_read_backward(was_reading_backward);
5385 return result; 5689 return result;
5386 } 5690 }
5387 5691
5388 5692
5389 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler, 5693 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler,
5390 RegExpNode* on_success) { 5694 RegExpNode* on_success) {
5391 return ToNode(body(), index(), compiler, on_success); 5695 return ToNode(body(), index(), compiler, on_success);
5392 } 5696 }
5393 5697
(...skipping 27 matching lines...) Expand all
5421 } 5725 }
5422 return current; 5726 return current;
5423 } 5727 }
5424 5728
5425 5729
5426 static void AddClass(const int* elmv, 5730 static void AddClass(const int* elmv,
5427 int elmc, 5731 int elmc,
5428 ZoneList<CharacterRange>* ranges, 5732 ZoneList<CharacterRange>* ranges,
5429 Zone* zone) { 5733 Zone* zone) {
5430 elmc--; 5734 elmc--;
5431 DCHECK(elmv[elmc] == 0x10000); 5735 DCHECK(elmv[elmc] == kRangeEndMarker);
5432 for (int i = 0; i < elmc; i += 2) { 5736 for (int i = 0; i < elmc; i += 2) {
5433 DCHECK(elmv[i] < elmv[i + 1]); 5737 DCHECK(elmv[i] < elmv[i + 1]);
5434 ranges->Add(CharacterRange(elmv[i], elmv[i + 1] - 1), zone); 5738 ranges->Add(CharacterRange(elmv[i], elmv[i + 1] - 1), zone);
5435 } 5739 }
5436 } 5740 }
5437 5741
5438 5742
5439 static void AddClassNegated(const int *elmv, 5743 static void AddClassNegated(const int *elmv,
5440 int elmc, 5744 int elmc,
5441 ZoneList<CharacterRange>* ranges, 5745 ZoneList<CharacterRange>* ranges,
5442 Zone* zone) { 5746 Zone* zone) {
5443 elmc--; 5747 elmc--;
5444 DCHECK(elmv[elmc] == 0x10000); 5748 DCHECK(elmv[elmc] == kRangeEndMarker);
5445 DCHECK(elmv[0] != 0x0000); 5749 DCHECK(elmv[0] != 0x0000);
5446 DCHECK(elmv[elmc-1] != String::kMaxUtf16CodeUnit); 5750 DCHECK(elmv[elmc - 1] != String::kMaxCodePoint);
5447 uc16 last = 0x0000; 5751 uc16 last = 0x0000;
5448 for (int i = 0; i < elmc; i += 2) { 5752 for (int i = 0; i < elmc; i += 2) {
5449 DCHECK(last <= elmv[i] - 1); 5753 DCHECK(last <= elmv[i] - 1);
5450 DCHECK(elmv[i] < elmv[i + 1]); 5754 DCHECK(elmv[i] < elmv[i + 1]);
5451 ranges->Add(CharacterRange(last, elmv[i] - 1), zone); 5755 ranges->Add(CharacterRange(last, elmv[i] - 1), zone);
5452 last = elmv[i + 1]; 5756 last = elmv[i + 1];
5453 } 5757 }
5454 ranges->Add(CharacterRange(last, String::kMaxUtf16CodeUnit), zone); 5758 ranges->Add(CharacterRange(last, String::kMaxCodePoint), zone);
5455 } 5759 }
5456 5760
5457 5761
5458 void CharacterRange::AddClassEscape(uc16 type, 5762 void CharacterRange::AddClassEscape(uc16 type,
5459 ZoneList<CharacterRange>* ranges, 5763 ZoneList<CharacterRange>* ranges,
5460 Zone* zone) { 5764 Zone* zone) {
5461 switch (type) { 5765 switch (type) {
5462 case 's': 5766 case 's':
5463 AddClass(kSpaceRanges, kSpaceRangeCount, ranges, zone); 5767 AddClass(kSpaceRanges, kSpaceRangeCount, ranges, zone);
5464 break; 5768 break;
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
5501 UNREACHABLE(); 5805 UNREACHABLE();
5502 } 5806 }
5503 } 5807 }
5504 5808
5505 5809
5506 Vector<const int> CharacterRange::GetWordBounds() { 5810 Vector<const int> CharacterRange::GetWordBounds() {
5507 return Vector<const int>(kWordRanges, kWordRangeCount - 1); 5811 return Vector<const int>(kWordRanges, kWordRangeCount - 1);
5508 } 5812 }
5509 5813
5510 5814
5511 class CharacterRangeSplitter {
5512 public:
5513 CharacterRangeSplitter(ZoneList<CharacterRange>** included,
5514 ZoneList<CharacterRange>** excluded,
5515 Zone* zone)
5516 : included_(included),
5517 excluded_(excluded),
5518 zone_(zone) { }
5519 void Call(uc16 from, DispatchTable::Entry entry);
5520
5521 static const int kInBase = 0;
5522 static const int kInOverlay = 1;
5523
5524 private:
5525 ZoneList<CharacterRange>** included_;
5526 ZoneList<CharacterRange>** excluded_;
5527 Zone* zone_;
5528 };
5529
5530
5531 void CharacterRangeSplitter::Call(uc16 from, DispatchTable::Entry entry) {
5532 if (!entry.out_set()->Get(kInBase)) return;
5533 ZoneList<CharacterRange>** target = entry.out_set()->Get(kInOverlay)
5534 ? included_
5535 : excluded_;
5536 if (*target == NULL) *target = new(zone_) ZoneList<CharacterRange>(2, zone_);
5537 (*target)->Add(CharacterRange(entry.from(), entry.to()), zone_);
5538 }
5539
5540
5541 void CharacterRange::Split(ZoneList<CharacterRange>* base,
5542 Vector<const int> overlay,
5543 ZoneList<CharacterRange>** included,
5544 ZoneList<CharacterRange>** excluded,
5545 Zone* zone) {
5546 DCHECK_NULL(*included);
5547 DCHECK_NULL(*excluded);
5548 DispatchTable table(zone);
5549 for (int i = 0; i < base->length(); i++)
5550 table.AddRange(base->at(i), CharacterRangeSplitter::kInBase, zone);
5551 for (int i = 0; i < overlay.length(); i += 2) {
5552 table.AddRange(CharacterRange(overlay[i], overlay[i + 1] - 1),
5553 CharacterRangeSplitter::kInOverlay, zone);
5554 }
5555 CharacterRangeSplitter callback(included, excluded, zone);
5556 table.ForEach(&callback);
5557 }
5558
5559
5560 void CharacterRange::AddCaseEquivalents(Isolate* isolate, Zone* zone, 5815 void CharacterRange::AddCaseEquivalents(Isolate* isolate, Zone* zone,
5561 ZoneList<CharacterRange>* ranges, 5816 ZoneList<CharacterRange>* ranges,
5562 bool is_one_byte) { 5817 bool is_one_byte) {
5563 uc16 bottom = from(); 5818 uc32 bottom = from();
5564 uc16 top = to(); 5819 uc32 top = to();
5820 // Nothing to be done for surrogates.
5821 if (bottom >= kLeadSurrogateStart && top <= kTrailSurrogateEnd) return;
5565 if (is_one_byte && !RangeContainsLatin1Equivalents(*this)) { 5822 if (is_one_byte && !RangeContainsLatin1Equivalents(*this)) {
5566 if (bottom > String::kMaxOneByteCharCode) return; 5823 if (bottom > String::kMaxOneByteCharCode) return;
5567 if (top > String::kMaxOneByteCharCode) top = String::kMaxOneByteCharCode; 5824 if (top > String::kMaxOneByteCharCode) top = String::kMaxOneByteCharCode;
5568 } 5825 }
5569 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 5826 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
5570 if (top == bottom) { 5827 if (top == bottom) {
5571 // If this is a singleton we just expand the one character. 5828 // If this is a singleton we just expand the one character.
5572 int length = isolate->jsregexp_uncanonicalize()->get(bottom, '\0', chars); 5829 int length = isolate->jsregexp_uncanonicalize()->get(bottom, '\0', chars);
5573 for (int i = 0; i < length; i++) { 5830 for (int i = 0; i < length; i++) {
5574 uc32 chr = chars[i]; 5831 uc32 chr = chars[i];
(...skipping 17 matching lines...) Expand all
5592 // we look up ['z', 'Z'] and produce [c-f] and [C-F]. We then only 5849 // we look up ['z', 'Z'] and produce [c-f] and [C-F]. We then only
5593 // add a range if it is not already contained in the input, so [c-f] 5850 // add a range if it is not already contained in the input, so [c-f]
5594 // will be skipped but [C-F] will be added. If this range is not 5851 // will be skipped but [C-F] will be added. If this range is not
5595 // completely contained in a block we do this for all the blocks 5852 // completely contained in a block we do this for all the blocks
5596 // covered by the range (handling characters that is not in a block 5853 // covered by the range (handling characters that is not in a block
5597 // as a "singleton block"). 5854 // as a "singleton block").
5598 unibrow::uchar range[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 5855 unibrow::uchar range[unibrow::Ecma262UnCanonicalize::kMaxWidth];
5599 int pos = bottom; 5856 int pos = bottom;
5600 while (pos <= top) { 5857 while (pos <= top) {
5601 int length = isolate->jsregexp_canonrange()->get(pos, '\0', range); 5858 int length = isolate->jsregexp_canonrange()->get(pos, '\0', range);
5602 uc16 block_end; 5859 uc32 block_end;
5603 if (length == 0) { 5860 if (length == 0) {
5604 block_end = pos; 5861 block_end = pos;
5605 } else { 5862 } else {
5606 DCHECK_EQ(1, length); 5863 DCHECK_EQ(1, length);
5607 block_end = range[0]; 5864 block_end = range[0];
5608 } 5865 }
5609 int end = (block_end > top) ? top : block_end; 5866 int end = (block_end > top) ? top : block_end;
5610 length = isolate->jsregexp_uncanonicalize()->get(block_end, '\0', range); 5867 length = isolate->jsregexp_uncanonicalize()->get(block_end, '\0', range);
5611 for (int i = 0; i < length; i++) { 5868 for (int i = 0; i < length; i++) {
5612 uc32 c = range[i]; 5869 uc32 c = range[i];
5613 uc16 range_from = c - (block_end - pos); 5870 uc32 range_from = c - (block_end - pos);
5614 uc16 range_to = c - (block_end - end); 5871 uc32 range_to = c - (block_end - end);
5615 if (!(bottom <= range_from && range_to <= top)) { 5872 if (!(bottom <= range_from && range_to <= top)) {
5616 ranges->Add(CharacterRange(range_from, range_to), zone); 5873 ranges->Add(CharacterRange(range_from, range_to), zone);
5617 } 5874 }
5618 } 5875 }
5619 pos = end + 1; 5876 pos = end + 1;
5620 } 5877 }
5621 } 5878 }
5622 } 5879 }
5623 5880
5624 5881
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after
5665 5922
5666 5923
5667 static int InsertRangeInCanonicalList(ZoneList<CharacterRange>* list, 5924 static int InsertRangeInCanonicalList(ZoneList<CharacterRange>* list,
5668 int count, 5925 int count,
5669 CharacterRange insert) { 5926 CharacterRange insert) {
5670 // Inserts a range into list[0..count[, which must be sorted 5927 // Inserts a range into list[0..count[, which must be sorted
5671 // by from value and non-overlapping and non-adjacent, using at most 5928 // by from value and non-overlapping and non-adjacent, using at most
5672 // list[0..count] for the result. Returns the number of resulting 5929 // list[0..count] for the result. Returns the number of resulting
5673 // canonicalized ranges. Inserting a range may collapse existing ranges into 5930 // canonicalized ranges. Inserting a range may collapse existing ranges into
5674 // fewer ranges, so the return value can be anything in the range 1..count+1. 5931 // fewer ranges, so the return value can be anything in the range 1..count+1.
5675 uc16 from = insert.from(); 5932 uc32 from = insert.from();
5676 uc16 to = insert.to(); 5933 uc32 to = insert.to();
5677 int start_pos = 0; 5934 int start_pos = 0;
5678 int end_pos = count; 5935 int end_pos = count;
5679 for (int i = count - 1; i >= 0; i--) { 5936 for (int i = count - 1; i >= 0; i--) {
5680 CharacterRange current = list->at(i); 5937 CharacterRange current = list->at(i);
5681 if (current.from() > to + 1) { 5938 if (current.from() > to + 1) {
5682 end_pos = i; 5939 end_pos = i;
5683 } else if (current.to() + 1 < from) { 5940 } else if (current.to() + 1 < from) {
5684 start_pos = i + 1; 5941 start_pos = i + 1;
5685 break; 5942 break;
5686 } 5943 }
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after
5766 DCHECK(CharacterRange::IsCanonical(character_ranges)); 6023 DCHECK(CharacterRange::IsCanonical(character_ranges));
5767 } 6024 }
5768 6025
5769 6026
5770 void CharacterRange::Negate(ZoneList<CharacterRange>* ranges, 6027 void CharacterRange::Negate(ZoneList<CharacterRange>* ranges,
5771 ZoneList<CharacterRange>* negated_ranges, 6028 ZoneList<CharacterRange>* negated_ranges,
5772 Zone* zone) { 6029 Zone* zone) {
5773 DCHECK(CharacterRange::IsCanonical(ranges)); 6030 DCHECK(CharacterRange::IsCanonical(ranges));
5774 DCHECK_EQ(0, negated_ranges->length()); 6031 DCHECK_EQ(0, negated_ranges->length());
5775 int range_count = ranges->length(); 6032 int range_count = ranges->length();
5776 uc16 from = 0; 6033 uc32 from = 0;
5777 int i = 0; 6034 int i = 0;
5778 if (range_count > 0 && ranges->at(0).from() == 0) { 6035 if (range_count > 0 && ranges->at(0).from() == 0) {
5779 from = ranges->at(0).to(); 6036 from = ranges->at(0).to();
5780 i = 1; 6037 i = 1;
5781 } 6038 }
5782 while (i < range_count) { 6039 while (i < range_count) {
5783 CharacterRange range = ranges->at(i); 6040 CharacterRange range = ranges->at(i);
5784 negated_ranges->Add(CharacterRange(from + 1, range.from() - 1), zone); 6041 negated_ranges->Add(CharacterRange(from + 1, range.from() - 1), zone);
5785 from = range.to(); 6042 from = range.to();
5786 i++; 6043 i++;
5787 } 6044 }
5788 if (from < String::kMaxUtf16CodeUnit) { 6045 if (from < String::kMaxCodePoint) {
5789 negated_ranges->Add(CharacterRange(from + 1, String::kMaxUtf16CodeUnit), 6046 negated_ranges->Add(CharacterRange(from + 1, String::kMaxCodePoint), zone);
5790 zone);
5791 } 6047 }
5792 } 6048 }
5793 6049
5794 6050
5795 // ------------------------------------------------------------------- 6051 // -------------------------------------------------------------------
5796 // Splay tree 6052 // Splay tree
5797 6053
5798 6054
5799 OutSet* OutSet::Extend(unsigned value, Zone* zone) { 6055 OutSet* OutSet::Extend(unsigned value, Zone* zone) {
5800 if (Get(value)) 6056 if (Get(value))
(...skipping 30 matching lines...) Expand all
5831 if (value < kFirstLimit) { 6087 if (value < kFirstLimit) {
5832 return (first_ & (1 << value)) != 0; 6088 return (first_ & (1 << value)) != 0;
5833 } else if (remaining_ == NULL) { 6089 } else if (remaining_ == NULL) {
5834 return false; 6090 return false;
5835 } else { 6091 } else {
5836 return remaining_->Contains(value); 6092 return remaining_->Contains(value);
5837 } 6093 }
5838 } 6094 }
5839 6095
5840 6096
5841 const uc16 DispatchTable::Config::kNoKey = unibrow::Utf8::kBadChar; 6097 const uc32 DispatchTable::Config::kNoKey = unibrow::Utf8::kBadChar;
5842 6098
5843 6099
5844 void DispatchTable::AddRange(CharacterRange full_range, int value, 6100 void DispatchTable::AddRange(CharacterRange full_range, int value,
5845 Zone* zone) { 6101 Zone* zone) {
5846 CharacterRange current = full_range; 6102 CharacterRange current = full_range;
5847 if (tree()->is_empty()) { 6103 if (tree()->is_empty()) {
5848 // If this is the first range we just insert into the table. 6104 // If this is the first range we just insert into the table.
5849 ZoneSplayTree<Config>::Locator loc; 6105 ZoneSplayTree<Config>::Locator loc;
5850 bool inserted = tree()->Insert(current.from(), &loc); 6106 bool inserted = tree()->Insert(current.from(), &loc);
5851 DCHECK(inserted); 6107 DCHECK(inserted);
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after
5933 USE(inserted); 6189 USE(inserted);
5934 ins.set_value(Entry(current.from(), 6190 ins.set_value(Entry(current.from(),
5935 current.to(), 6191 current.to(),
5936 empty()->Extend(value, zone))); 6192 empty()->Extend(value, zone)));
5937 break; 6193 break;
5938 } 6194 }
5939 } 6195 }
5940 } 6196 }
5941 6197
5942 6198
5943 OutSet* DispatchTable::Get(uc16 value) { 6199 OutSet* DispatchTable::Get(uc32 value) {
5944 ZoneSplayTree<Config>::Locator loc; 6200 ZoneSplayTree<Config>::Locator loc;
5945 if (!tree()->FindGreatestLessThan(value, &loc)) 6201 if (!tree()->FindGreatestLessThan(value, &loc))
5946 return empty(); 6202 return empty();
5947 Entry* entry = &loc.value(); 6203 Entry* entry = &loc.value();
5948 if (value <= entry->to()) 6204 if (value <= entry->to())
5949 return entry->out_set(); 6205 return entry->out_set();
5950 else 6206 else
5951 return empty(); 6207 return empty();
5952 } 6208 }
5953 6209
(...skipping 297 matching lines...) Expand 10 before | Expand all | Expand 10 after
6251 } 6507 }
6252 6508
6253 6509
6254 void DispatchTableConstructor::VisitAction(ActionNode* that) { 6510 void DispatchTableConstructor::VisitAction(ActionNode* that) {
6255 RegExpNode* target = that->on_success(); 6511 RegExpNode* target = that->on_success();
6256 target->Accept(this); 6512 target->Accept(this);
6257 } 6513 }
6258 6514
6259 6515
6260 RegExpEngine::CompilationResult RegExpEngine::Compile( 6516 RegExpEngine::CompilationResult RegExpEngine::Compile(
6261 Isolate* isolate, Zone* zone, RegExpCompileData* data, bool ignore_case, 6517 Isolate* isolate, Zone* zone, RegExpCompileData* data,
6262 bool is_global, bool is_multiline, bool is_sticky, Handle<String> pattern, 6518 JSRegExp::Flags flags, Handle<String> pattern,
6263 Handle<String> sample_subject, bool is_one_byte) { 6519 Handle<String> sample_subject, bool is_one_byte) {
6264 if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) { 6520 if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) {
6265 return IrregexpRegExpTooBig(isolate); 6521 return IrregexpRegExpTooBig(isolate);
6266 } 6522 }
6267 RegExpCompiler compiler(isolate, zone, data->capture_count, ignore_case, 6523 bool ignore_case = flags & JSRegExp::kIgnoreCase;
6524 bool is_sticky = flags & JSRegExp::kSticky;
6525 bool is_global = flags & JSRegExp::kGlobal;
6526 RegExpCompiler compiler(isolate, zone, data->capture_count, flags,
6268 is_one_byte); 6527 is_one_byte);
6269 6528
6270 if (compiler.optimize()) compiler.set_optimize(!TooMuchRegExpCode(pattern)); 6529 if (compiler.optimize()) compiler.set_optimize(!TooMuchRegExpCode(pattern));
6271 6530
6272 // Sample some characters from the middle of the string. 6531 // Sample some characters from the middle of the string.
6273 static const int kSampleSize = 128; 6532 static const int kSampleSize = 128;
6274 6533
6275 sample_subject = String::Flatten(sample_subject); 6534 sample_subject = String::Flatten(sample_subject);
6276 int chars_sampled = 0; 6535 int chars_sampled = 0;
6277 int half_way = (sample_subject->length() - kSampleSize) / 2; 6536 int half_way = (sample_subject->length() - kSampleSize) / 2;
(...skipping 222 matching lines...) Expand 10 before | Expand all | Expand 10 after
6500 6759
6501 6760
6502 void RegExpResultsCache::Clear(FixedArray* cache) { 6761 void RegExpResultsCache::Clear(FixedArray* cache) {
6503 for (int i = 0; i < kRegExpResultsCacheSize; i++) { 6762 for (int i = 0; i < kRegExpResultsCacheSize; i++) {
6504 cache->set(i, Smi::FromInt(0)); 6763 cache->set(i, Smi::FromInt(0));
6505 } 6764 }
6506 } 6765 }
6507 6766
6508 } // namespace internal 6767 } // namespace internal
6509 } // namespace v8 6768 } // namespace v8
OLDNEW
« no previous file with comments | « src/regexp/jsregexp.h ('k') | src/regexp/regexp-ast.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698