Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(11)

Side by Side Diff: src/regexp/jsregexp.cc

Issue 1618753002: Revert of [regexp] implement character classes for unicode regexps. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/regexp/jsregexp.h ('k') | src/regexp/regexp-ast.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/regexp/jsregexp.h" 5 #include "src/regexp/jsregexp.h"
6 6
7 #include "src/ast/ast.h" 7 #include "src/ast/ast.h"
8 #include "src/base/platform/platform.h" 8 #include "src/base/platform/platform.h"
9 #include "src/compilation-cache.h" 9 #include "src/compilation-cache.h"
10 #include "src/compiler.h" 10 #include "src/compiler.h"
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after
65 Handle<String> error_text) { 65 Handle<String> error_text) {
66 USE(ThrowRegExpException(re, Handle<String>(re->Pattern()), error_text)); 66 USE(ThrowRegExpException(re, Handle<String>(re->Pattern()), error_text));
67 } 67 }
68 68
69 69
70 ContainedInLattice AddRange(ContainedInLattice containment, 70 ContainedInLattice AddRange(ContainedInLattice containment,
71 const int* ranges, 71 const int* ranges,
72 int ranges_length, 72 int ranges_length,
73 Interval new_range) { 73 Interval new_range) {
74 DCHECK((ranges_length & 1) == 1); 74 DCHECK((ranges_length & 1) == 1);
75 DCHECK(ranges[ranges_length - 1] == String::kMaxCodePoint + 1); 75 DCHECK(ranges[ranges_length - 1] == String::kMaxUtf16CodeUnit + 1);
76 if (containment == kLatticeUnknown) return containment; 76 if (containment == kLatticeUnknown) return containment;
77 bool inside = false; 77 bool inside = false;
78 int last = 0; 78 int last = 0;
79 for (int i = 0; i < ranges_length; inside = !inside, last = ranges[i], i++) { 79 for (int i = 0; i < ranges_length; inside = !inside, last = ranges[i], i++) {
80 // Consider the range from last to ranges[i]. 80 // Consider the range from last to ranges[i].
81 // We haven't got to the new range yet. 81 // We haven't got to the new range yet.
82 if (ranges[i] <= new_range.from()) continue; 82 if (ranges[i] <= new_range.from()) continue;
83 // New range is wholly inside last-ranges[i]. Note that new_range.to() is 83 // New range is wholly inside last-ranges[i]. Note that new_range.to() is
84 // inclusive, but the values in ranges are not. 84 // inclusive, but the values in ranges are not.
85 if (last <= new_range.from() && new_range.to() < ranges[i]) { 85 if (last <= new_range.from() && new_range.to() < ranges[i]) {
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after
138 138
139 Handle<Object> result; 139 Handle<Object> result;
140 if (in_cache) { 140 if (in_cache) {
141 re->set_data(*cached); 141 re->set_data(*cached);
142 return re; 142 return re;
143 } 143 }
144 pattern = String::Flatten(pattern); 144 pattern = String::Flatten(pattern);
145 PostponeInterruptsScope postpone(isolate); 145 PostponeInterruptsScope postpone(isolate);
146 RegExpCompileData parse_result; 146 RegExpCompileData parse_result;
147 FlatStringReader reader(isolate, pattern); 147 FlatStringReader reader(isolate, pattern);
148 if (!RegExpParser::ParseRegExp(re->GetIsolate(), &zone, &reader, flags, 148 if (!RegExpParser::ParseRegExp(re->GetIsolate(), &zone, &reader,
149 &parse_result)) { 149 flags & JSRegExp::kMultiline,
150 flags & JSRegExp::kUnicode, &parse_result)) {
150 // Throw an exception if we fail to parse the pattern. 151 // Throw an exception if we fail to parse the pattern.
151 return ThrowRegExpException(re, pattern, parse_result.error); 152 return ThrowRegExpException(re, pattern, parse_result.error);
152 } 153 }
153 154
154 bool has_been_compiled = false; 155 bool has_been_compiled = false;
155 156
156 if (parse_result.simple && !(flags & JSRegExp::kIgnoreCase) && 157 if (parse_result.simple && !(flags & JSRegExp::kIgnoreCase) &&
157 !(flags & JSRegExp::kSticky) && !HasFewDifferentCharacters(pattern)) { 158 !(flags & JSRegExp::kSticky) && !HasFewDifferentCharacters(pattern)) {
158 // Parse-tree is a single atom that is equal to the pattern. 159 // Parse-tree is a single atom that is equal to the pattern.
159 AtomCompile(re, pattern, flags, pattern); 160 AtomCompile(re, pattern, flags, pattern);
(...skipping 203 matching lines...) Expand 10 before | Expand all | Expand 10 after
363 ThrowRegExpException(re, error_message); 364 ThrowRegExpException(re, error_message);
364 return false; 365 return false;
365 } 366 }
366 367
367 JSRegExp::Flags flags = re->GetFlags(); 368 JSRegExp::Flags flags = re->GetFlags();
368 369
369 Handle<String> pattern(re->Pattern()); 370 Handle<String> pattern(re->Pattern());
370 pattern = String::Flatten(pattern); 371 pattern = String::Flatten(pattern);
371 RegExpCompileData compile_data; 372 RegExpCompileData compile_data;
372 FlatStringReader reader(isolate, pattern); 373 FlatStringReader reader(isolate, pattern);
373 if (!RegExpParser::ParseRegExp(isolate, &zone, &reader, flags, 374 if (!RegExpParser::ParseRegExp(isolate, &zone, &reader,
374 &compile_data)) { 375 flags & JSRegExp::kMultiline,
376 flags & JSRegExp::kUnicode, &compile_data)) {
375 // Throw an exception if we fail to parse the pattern. 377 // Throw an exception if we fail to parse the pattern.
376 // THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once. 378 // THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once.
377 USE(ThrowRegExpException(re, pattern, compile_data.error)); 379 USE(ThrowRegExpException(re, pattern, compile_data.error));
378 return false; 380 return false;
379 } 381 }
380 RegExpEngine::CompilationResult result = 382 RegExpEngine::CompilationResult result = RegExpEngine::Compile(
381 RegExpEngine::Compile(isolate, &zone, &compile_data, flags, pattern, 383 isolate, &zone, &compile_data, flags & JSRegExp::kIgnoreCase,
382 sample_subject, is_one_byte); 384 flags & JSRegExp::kGlobal, flags & JSRegExp::kMultiline,
385 flags & JSRegExp::kSticky, pattern, sample_subject, is_one_byte);
383 if (result.error_message != NULL) { 386 if (result.error_message != NULL) {
384 // Unable to compile regexp. 387 // Unable to compile regexp.
385 Handle<String> error_message = isolate->factory()->NewStringFromUtf8( 388 Handle<String> error_message = isolate->factory()->NewStringFromUtf8(
386 CStrVector(result.error_message)).ToHandleChecked(); 389 CStrVector(result.error_message)).ToHandleChecked();
387 ThrowRegExpException(re, error_message); 390 ThrowRegExpException(re, error_message);
388 return false; 391 return false;
389 } 392 }
390 393
391 Handle<FixedArray> data = Handle<FixedArray>(FixedArray::cast(re->data())); 394 Handle<FixedArray> data = Handle<FixedArray>(FixedArray::cast(re->data()));
392 data->set(JSRegExp::code_index(is_one_byte), result.code); 395 data->set(JSRegExp::code_index(is_one_byte), result.code);
(...skipping 542 matching lines...) Expand 10 before | Expand all | Expand 10 after
935 938
936 private: 939 private:
937 CharacterFrequency frequencies_[RegExpMacroAssembler::kTableSize]; 940 CharacterFrequency frequencies_[RegExpMacroAssembler::kTableSize];
938 int total_samples_; 941 int total_samples_;
939 }; 942 };
940 943
941 944
942 class RegExpCompiler { 945 class RegExpCompiler {
943 public: 946 public:
944 RegExpCompiler(Isolate* isolate, Zone* zone, int capture_count, 947 RegExpCompiler(Isolate* isolate, Zone* zone, int capture_count,
945 JSRegExp::Flags flags, bool is_one_byte); 948 bool ignore_case, bool is_one_byte);
946 949
947 int AllocateRegister() { 950 int AllocateRegister() {
948 if (next_register_ >= RegExpMacroAssembler::kMaxRegister) { 951 if (next_register_ >= RegExpMacroAssembler::kMaxRegister) {
949 reg_exp_too_big_ = true; 952 reg_exp_too_big_ = true;
950 return next_register_; 953 return next_register_;
951 } 954 }
952 return next_register_++; 955 return next_register_++;
953 } 956 }
954 957
955 // Lookarounds to match lone surrogates for unicode character class matches
956 // are never nested. We can therefore reuse registers.
957 int UnicodeLookaroundStackRegister() {
958 if (unicode_lookaround_stack_register_ == kNoRegister) {
959 unicode_lookaround_stack_register_ = AllocateRegister();
960 }
961 return unicode_lookaround_stack_register_;
962 }
963
964 int UnicodeLookaroundPositionRegister() {
965 if (unicode_lookaround_position_register_ == kNoRegister) {
966 unicode_lookaround_position_register_ = AllocateRegister();
967 }
968 return unicode_lookaround_position_register_;
969 }
970
971 RegExpEngine::CompilationResult Assemble(RegExpMacroAssembler* assembler, 958 RegExpEngine::CompilationResult Assemble(RegExpMacroAssembler* assembler,
972 RegExpNode* start, 959 RegExpNode* start,
973 int capture_count, 960 int capture_count,
974 Handle<String> pattern); 961 Handle<String> pattern);
975 962
976 inline void AddWork(RegExpNode* node) { 963 inline void AddWork(RegExpNode* node) {
977 if (!node->on_work_list() && !node->label()->is_bound()) { 964 if (!node->on_work_list() && !node->label()->is_bound()) {
978 node->set_on_work_list(true); 965 node->set_on_work_list(true);
979 work_list_->Add(node); 966 work_list_->Add(node);
980 } 967 }
981 } 968 }
982 969
983 static const int kImplementationOffset = 0; 970 static const int kImplementationOffset = 0;
984 static const int kNumberOfRegistersOffset = 0; 971 static const int kNumberOfRegistersOffset = 0;
985 static const int kCodeOffset = 1; 972 static const int kCodeOffset = 1;
986 973
987 RegExpMacroAssembler* macro_assembler() { return macro_assembler_; } 974 RegExpMacroAssembler* macro_assembler() { return macro_assembler_; }
988 EndNode* accept() { return accept_; } 975 EndNode* accept() { return accept_; }
989 976
990 static const int kMaxRecursion = 100; 977 static const int kMaxRecursion = 100;
991 inline int recursion_depth() { return recursion_depth_; } 978 inline int recursion_depth() { return recursion_depth_; }
992 inline void IncrementRecursionDepth() { recursion_depth_++; } 979 inline void IncrementRecursionDepth() { recursion_depth_++; }
993 inline void DecrementRecursionDepth() { recursion_depth_--; } 980 inline void DecrementRecursionDepth() { recursion_depth_--; }
994 981
995 void SetRegExpTooBig() { reg_exp_too_big_ = true; } 982 void SetRegExpTooBig() { reg_exp_too_big_ = true; }
996 983
997 inline bool ignore_case() { return (flags_ & JSRegExp::kIgnoreCase) != 0; } 984 inline bool ignore_case() { return ignore_case_; }
998 inline bool unicode() { return (flags_ & JSRegExp::kUnicode) != 0; }
999 inline bool one_byte() { return one_byte_; } 985 inline bool one_byte() { return one_byte_; }
1000 inline bool optimize() { return optimize_; } 986 inline bool optimize() { return optimize_; }
1001 inline void set_optimize(bool value) { optimize_ = value; } 987 inline void set_optimize(bool value) { optimize_ = value; }
1002 inline bool limiting_recursion() { return limiting_recursion_; } 988 inline bool limiting_recursion() { return limiting_recursion_; }
1003 inline void set_limiting_recursion(bool value) { 989 inline void set_limiting_recursion(bool value) {
1004 limiting_recursion_ = value; 990 limiting_recursion_ = value;
1005 } 991 }
1006 bool read_backward() { return read_backward_; } 992 bool read_backward() { return read_backward_; }
1007 void set_read_backward(bool value) { read_backward_ = value; } 993 void set_read_backward(bool value) { read_backward_ = value; }
1008 FrequencyCollator* frequency_collator() { return &frequency_collator_; } 994 FrequencyCollator* frequency_collator() { return &frequency_collator_; }
1009 995
1010 int current_expansion_factor() { return current_expansion_factor_; } 996 int current_expansion_factor() { return current_expansion_factor_; }
1011 void set_current_expansion_factor(int value) { 997 void set_current_expansion_factor(int value) {
1012 current_expansion_factor_ = value; 998 current_expansion_factor_ = value;
1013 } 999 }
1014 1000
1015 Isolate* isolate() const { return isolate_; } 1001 Isolate* isolate() const { return isolate_; }
1016 Zone* zone() const { return zone_; } 1002 Zone* zone() const { return zone_; }
1017 1003
1018 static const int kNoRegister = -1; 1004 static const int kNoRegister = -1;
1019 1005
1020 private: 1006 private:
1021 EndNode* accept_; 1007 EndNode* accept_;
1022 int next_register_; 1008 int next_register_;
1023 int unicode_lookaround_stack_register_;
1024 int unicode_lookaround_position_register_;
1025 List<RegExpNode*>* work_list_; 1009 List<RegExpNode*>* work_list_;
1026 int recursion_depth_; 1010 int recursion_depth_;
1027 RegExpMacroAssembler* macro_assembler_; 1011 RegExpMacroAssembler* macro_assembler_;
1028 JSRegExp::Flags flags_; 1012 bool ignore_case_;
1029 bool one_byte_; 1013 bool one_byte_;
1030 bool reg_exp_too_big_; 1014 bool reg_exp_too_big_;
1031 bool limiting_recursion_; 1015 bool limiting_recursion_;
1032 bool optimize_; 1016 bool optimize_;
1033 bool read_backward_; 1017 bool read_backward_;
1034 int current_expansion_factor_; 1018 int current_expansion_factor_;
1035 FrequencyCollator frequency_collator_; 1019 FrequencyCollator frequency_collator_;
1036 Isolate* isolate_; 1020 Isolate* isolate_;
1037 Zone* zone_; 1021 Zone* zone_;
1038 }; 1022 };
(...skipping 11 matching lines...) Expand all
1050 1034
1051 1035
1052 static RegExpEngine::CompilationResult IrregexpRegExpTooBig(Isolate* isolate) { 1036 static RegExpEngine::CompilationResult IrregexpRegExpTooBig(Isolate* isolate) {
1053 return RegExpEngine::CompilationResult(isolate, "RegExp too big"); 1037 return RegExpEngine::CompilationResult(isolate, "RegExp too big");
1054 } 1038 }
1055 1039
1056 1040
1057 // Attempts to compile the regexp using an Irregexp code generator. Returns 1041 // Attempts to compile the regexp using an Irregexp code generator. Returns
1058 // a fixed array or a null handle depending on whether it succeeded. 1042 // a fixed array or a null handle depending on whether it succeeded.
1059 RegExpCompiler::RegExpCompiler(Isolate* isolate, Zone* zone, int capture_count, 1043 RegExpCompiler::RegExpCompiler(Isolate* isolate, Zone* zone, int capture_count,
1060 JSRegExp::Flags flags, bool one_byte) 1044 bool ignore_case, bool one_byte)
1061 : next_register_(2 * (capture_count + 1)), 1045 : next_register_(2 * (capture_count + 1)),
1062 unicode_lookaround_stack_register_(kNoRegister),
1063 unicode_lookaround_position_register_(kNoRegister),
1064 work_list_(NULL), 1046 work_list_(NULL),
1065 recursion_depth_(0), 1047 recursion_depth_(0),
1066 flags_(flags), 1048 ignore_case_(ignore_case),
1067 one_byte_(one_byte), 1049 one_byte_(one_byte),
1068 reg_exp_too_big_(false), 1050 reg_exp_too_big_(false),
1069 limiting_recursion_(false), 1051 limiting_recursion_(false),
1070 optimize_(FLAG_regexp_optimization), 1052 optimize_(FLAG_regexp_optimization),
1071 read_backward_(false), 1053 read_backward_(false),
1072 current_expansion_factor_(1), 1054 current_expansion_factor_(1),
1073 frequency_collator_(), 1055 frequency_collator_(),
1074 isolate_(isolate), 1056 isolate_(isolate),
1075 zone_(zone) { 1057 zone_(zone) {
1076 accept_ = new(zone) EndNode(EndNode::ACCEPT, zone); 1058 accept_ = new(zone) EndNode(EndNode::ACCEPT, zone);
(...skipping 1032 matching lines...) Expand 10 before | Expand all | Expand 10 after
2109 flip ? even_label : odd_label); 2091 flip ? even_label : odd_label);
2110 } 2092 }
2111 } 2093 }
2112 2094
2113 2095
2114 static void EmitCharClass(RegExpMacroAssembler* macro_assembler, 2096 static void EmitCharClass(RegExpMacroAssembler* macro_assembler,
2115 RegExpCharacterClass* cc, bool one_byte, 2097 RegExpCharacterClass* cc, bool one_byte,
2116 Label* on_failure, int cp_offset, bool check_offset, 2098 Label* on_failure, int cp_offset, bool check_offset,
2117 bool preloaded, Zone* zone) { 2099 bool preloaded, Zone* zone) {
2118 ZoneList<CharacterRange>* ranges = cc->ranges(zone); 2100 ZoneList<CharacterRange>* ranges = cc->ranges(zone);
2119 CharacterRange::Canonicalize(ranges); 2101 if (!CharacterRange::IsCanonical(ranges)) {
2102 CharacterRange::Canonicalize(ranges);
2103 }
2120 2104
2121 int max_char; 2105 int max_char;
2122 if (one_byte) { 2106 if (one_byte) {
2123 max_char = String::kMaxOneByteCharCode; 2107 max_char = String::kMaxOneByteCharCode;
2124 } else { 2108 } else {
2125 max_char = String::kMaxUtf16CodeUnit; 2109 max_char = String::kMaxUtf16CodeUnit;
2126 } 2110 }
2127 2111
2128 int range_count = ranges->length(); 2112 int range_count = ranges->length();
2129 2113
(...skipping 21 matching lines...) Expand all
2151 if (cc->is_negated()) { 2135 if (cc->is_negated()) {
2152 macro_assembler->GoTo(on_failure); 2136 macro_assembler->GoTo(on_failure);
2153 } else { 2137 } else {
2154 // This is a common case hit by non-anchored expressions. 2138 // This is a common case hit by non-anchored expressions.
2155 if (check_offset) { 2139 if (check_offset) {
2156 macro_assembler->CheckPosition(cp_offset, on_failure); 2140 macro_assembler->CheckPosition(cp_offset, on_failure);
2157 } 2141 }
2158 } 2142 }
2159 return; 2143 return;
2160 } 2144 }
2145 if (last_valid_range == 0 &&
2146 !cc->is_negated() &&
2147 ranges->at(0).IsEverything(max_char)) {
2148 // This is a common case hit by non-anchored expressions.
2149 if (check_offset) {
2150 macro_assembler->CheckPosition(cp_offset, on_failure);
2151 }
2152 return;
2153 }
2161 2154
2162 if (!preloaded) { 2155 if (!preloaded) {
2163 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset); 2156 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset);
2164 } 2157 }
2165 2158
2166 if (cc->is_standard(zone) && 2159 if (cc->is_standard(zone) &&
2167 macro_assembler->CheckSpecialCharacterClass(cc->standard_type(), 2160 macro_assembler->CheckSpecialCharacterClass(cc->standard_type(),
2168 on_failure)) { 2161 on_failure)) {
2169 return; 2162 return;
2170 } 2163 }
2171 2164
2172 2165
2173 // A new list with ascending entries. Each entry is a code unit 2166 // A new list with ascending entries. Each entry is a code unit
2174 // where there is a boundary between code units that are part of 2167 // where there is a boundary between code units that are part of
2175 // the class and code units that are not. Normally we insert an 2168 // the class and code units that are not. Normally we insert an
2176 // entry at zero which goes to the failure label, but if there 2169 // entry at zero which goes to the failure label, but if there
2177 // was already one there we fall through for success on that entry. 2170 // was already one there we fall through for success on that entry.
2178 // Subsequent entries have alternating meaning (success/failure). 2171 // Subsequent entries have alternating meaning (success/failure).
(...skipping 619 matching lines...) Expand 10 before | Expand all | Expand 10 after
2798 // Character is outside Latin-1 completely 2791 // Character is outside Latin-1 completely
2799 if (converted == 0) return set_replacement(NULL); 2792 if (converted == 0) return set_replacement(NULL);
2800 // Convert quark to Latin-1 in place. 2793 // Convert quark to Latin-1 in place.
2801 uint16_t* copy = const_cast<uint16_t*>(quarks.start()); 2794 uint16_t* copy = const_cast<uint16_t*>(quarks.start());
2802 copy[j] = converted; 2795 copy[j] = converted;
2803 } 2796 }
2804 } else { 2797 } else {
2805 DCHECK(elm.text_type() == TextElement::CHAR_CLASS); 2798 DCHECK(elm.text_type() == TextElement::CHAR_CLASS);
2806 RegExpCharacterClass* cc = elm.char_class(); 2799 RegExpCharacterClass* cc = elm.char_class();
2807 ZoneList<CharacterRange>* ranges = cc->ranges(zone()); 2800 ZoneList<CharacterRange>* ranges = cc->ranges(zone());
2808 CharacterRange::Canonicalize(ranges); 2801 if (!CharacterRange::IsCanonical(ranges)) {
2802 CharacterRange::Canonicalize(ranges);
2803 }
2809 // Now they are in order so we only need to look at the first. 2804 // Now they are in order so we only need to look at the first.
2810 int range_count = ranges->length(); 2805 int range_count = ranges->length();
2811 if (cc->is_negated()) { 2806 if (cc->is_negated()) {
2812 if (range_count != 0 && 2807 if (range_count != 0 &&
2813 ranges->at(0).from() == 0 && 2808 ranges->at(0).from() == 0 &&
2814 ranges->at(0).to() >= String::kMaxOneByteCharCode) { 2809 ranges->at(0).to() >= String::kMaxOneByteCharCode) {
2815 // This will be handled in a later filter. 2810 // This will be handled in a later filter.
2816 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue; 2811 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue;
2817 return set_replacement(NULL); 2812 return set_replacement(NULL);
2818 } 2813 }
(...skipping 468 matching lines...) Expand 10 before | Expand all | Expand 10 after
3287 bool TextNode::SkipPass(int int_pass, bool ignore_case) { 3282 bool TextNode::SkipPass(int int_pass, bool ignore_case) {
3288 TextEmitPassType pass = static_cast<TextEmitPassType>(int_pass); 3283 TextEmitPassType pass = static_cast<TextEmitPassType>(int_pass);
3289 if (ignore_case) { 3284 if (ignore_case) {
3290 return pass == SIMPLE_CHARACTER_MATCH; 3285 return pass == SIMPLE_CHARACTER_MATCH;
3291 } else { 3286 } else {
3292 return pass == NON_LETTER_CHARACTER_MATCH || pass == CASE_CHARACTER_MATCH; 3287 return pass == NON_LETTER_CHARACTER_MATCH || pass == CASE_CHARACTER_MATCH;
3293 } 3288 }
3294 } 3289 }
3295 3290
3296 3291
3297 TextNode* TextNode::CreateForCharacterRanges(Zone* zone,
3298 ZoneList<CharacterRange>* ranges,
3299 bool read_backward,
3300 RegExpNode* on_success) {
3301 DCHECK_NOT_NULL(ranges);
3302 ZoneList<TextElement>* elms = new (zone) ZoneList<TextElement>(1, zone);
3303 elms->Add(
3304 TextElement::CharClass(new (zone) RegExpCharacterClass(ranges, false)),
3305 zone);
3306 return new (zone) TextNode(elms, read_backward, on_success);
3307 }
3308
3309
3310 TextNode* TextNode::CreateForSurrogatePair(Zone* zone, CharacterRange lead,
3311 CharacterRange trail,
3312 bool read_backward,
3313 RegExpNode* on_success) {
3314 ZoneList<CharacterRange>* lead_ranges = CharacterRange::List(zone, lead);
3315 ZoneList<CharacterRange>* trail_ranges = CharacterRange::List(zone, trail);
3316 ZoneList<TextElement>* elms = new (zone) ZoneList<TextElement>(2, zone);
3317 elms->Add(TextElement::CharClass(
3318 new (zone) RegExpCharacterClass(lead_ranges, false)),
3319 zone);
3320 elms->Add(TextElement::CharClass(
3321 new (zone) RegExpCharacterClass(trail_ranges, false)),
3322 zone);
3323 return new (zone) TextNode(elms, read_backward, on_success);
3324 }
3325
3326
3327 // This generates the code to match a text node. A text node can contain 3292 // This generates the code to match a text node. A text node can contain
3328 // straight character sequences (possibly to be matched in a case-independent 3293 // straight character sequences (possibly to be matched in a case-independent
3329 // way) and character classes. For efficiency we do not do this in a single 3294 // way) and character classes. For efficiency we do not do this in a single
3330 // pass from left to right. Instead we pass over the text node several times, 3295 // pass from left to right. Instead we pass over the text node several times,
3331 // emitting code for some character positions every time. See the comment on 3296 // emitting code for some character positions every time. See the comment on
3332 // TextEmitPass for details. 3297 // TextEmitPass for details.
3333 void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) { 3298 void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) {
3334 LimitResult limit_result = LimitVersions(compiler, trace); 3299 LimitResult limit_result = LimitVersions(compiler, trace);
3335 if (limit_result == DONE) return; 3300 if (limit_result == DONE) return;
3336 DCHECK(limit_result == CONTINUE); 3301 DCHECK(limit_result == CONTINUE);
(...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after
3433 3398
3434 3399
3435 RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode( 3400 RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode(
3436 RegExpCompiler* compiler) { 3401 RegExpCompiler* compiler) {
3437 if (read_backward()) return NULL; 3402 if (read_backward()) return NULL;
3438 if (elements()->length() != 1) return NULL; 3403 if (elements()->length() != 1) return NULL;
3439 TextElement elm = elements()->at(0); 3404 TextElement elm = elements()->at(0);
3440 if (elm.text_type() != TextElement::CHAR_CLASS) return NULL; 3405 if (elm.text_type() != TextElement::CHAR_CLASS) return NULL;
3441 RegExpCharacterClass* node = elm.char_class(); 3406 RegExpCharacterClass* node = elm.char_class();
3442 ZoneList<CharacterRange>* ranges = node->ranges(zone()); 3407 ZoneList<CharacterRange>* ranges = node->ranges(zone());
3443 CharacterRange::Canonicalize(ranges); 3408 if (!CharacterRange::IsCanonical(ranges)) {
3409 CharacterRange::Canonicalize(ranges);
3410 }
3444 if (node->is_negated()) { 3411 if (node->is_negated()) {
3445 return ranges->length() == 0 ? on_success() : NULL; 3412 return ranges->length() == 0 ? on_success() : NULL;
3446 } 3413 }
3447 if (ranges->length() != 1) return NULL; 3414 if (ranges->length() != 1) return NULL;
3448 uint32_t max_char; 3415 uint32_t max_char;
3449 if (compiler->one_byte()) { 3416 if (compiler->one_byte()) {
3450 max_char = String::kMaxOneByteCharCode; 3417 max_char = String::kMaxOneByteCharCode;
3451 } else { 3418 } else {
3452 max_char = String::kMaxUtf16CodeUnit; 3419 max_char = String::kMaxUtf16CodeUnit;
3453 } 3420 }
(...skipping 126 matching lines...) Expand 10 before | Expand all | Expand 10 after
3580 return alt_gens_[i]; 3547 return alt_gens_[i];
3581 } 3548 }
3582 3549
3583 private: 3550 private:
3584 static const int kAFew = 10; 3551 static const int kAFew = 10;
3585 ZoneList<AlternativeGeneration*> alt_gens_; 3552 ZoneList<AlternativeGeneration*> alt_gens_;
3586 AlternativeGeneration a_few_alt_gens_[kAFew]; 3553 AlternativeGeneration a_few_alt_gens_[kAFew];
3587 }; 3554 };
3588 3555
3589 3556
3590 static const uc32 kLeadSurrogateStart = 0xd800;
3591 static const uc32 kLeadSurrogateEnd = 0xdbff;
3592 static const uc32 kTrailSurrogateStart = 0xdc00;
3593 static const uc32 kTrailSurrogateEnd = 0xdfff;
3594 static const uc32 kNonBmpStart = 0x10000;
3595 static const uc32 kNonBmpEnd = 0x10ffff;
3596 static const uc32 kRangeEndMarker = 0x110000;
3597
3598 // The '2' variant is has inclusive from and exclusive to. 3557 // The '2' variant is has inclusive from and exclusive to.
3599 // This covers \s as defined in ECMA-262 5.1, 15.10.2.12, 3558 // This covers \s as defined in ECMA-262 5.1, 15.10.2.12,
3600 // which include WhiteSpace (7.2) or LineTerminator (7.3) values. 3559 // which include WhiteSpace (7.2) or LineTerminator (7.3) values.
3601 static const int kSpaceRanges[] = { 3560 static const int kSpaceRanges[] = { '\t', '\r' + 1, ' ', ' ' + 1,
3602 '\t', '\r' + 1, ' ', ' ' + 1, 0x00A0, 0x00A1, 0x1680, 0x1681, 3561 0x00A0, 0x00A1, 0x1680, 0x1681, 0x180E, 0x180F, 0x2000, 0x200B,
3603 0x180E, 0x180F, 0x2000, 0x200B, 0x2028, 0x202A, 0x202F, 0x2030, 3562 0x2028, 0x202A, 0x202F, 0x2030, 0x205F, 0x2060, 0x3000, 0x3001,
3604 0x205F, 0x2060, 0x3000, 0x3001, 0xFEFF, 0xFF00, kRangeEndMarker}; 3563 0xFEFF, 0xFF00, 0x10000 };
3605 static const int kSpaceRangeCount = arraysize(kSpaceRanges); 3564 static const int kSpaceRangeCount = arraysize(kSpaceRanges);
3606 3565
3607 static const int kWordRanges[] = { 3566 static const int kWordRanges[] = {
3608 '0', '9' + 1, 'A', 'Z' + 1, '_', '_' + 1, 'a', 'z' + 1, kRangeEndMarker}; 3567 '0', '9' + 1, 'A', 'Z' + 1, '_', '_' + 1, 'a', 'z' + 1, 0x10000 };
3609 static const int kWordRangeCount = arraysize(kWordRanges); 3568 static const int kWordRangeCount = arraysize(kWordRanges);
3610 static const int kDigitRanges[] = {'0', '9' + 1, kRangeEndMarker}; 3569 static const int kDigitRanges[] = { '0', '9' + 1, 0x10000 };
3611 static const int kDigitRangeCount = arraysize(kDigitRanges); 3570 static const int kDigitRangeCount = arraysize(kDigitRanges);
3612 static const int kSurrogateRanges[] = { 3571 static const int kSurrogateRanges[] = { 0xd800, 0xe000, 0x10000 };
3613 kLeadSurrogateStart, kLeadSurrogateStart + 1, kRangeEndMarker};
3614 static const int kSurrogateRangeCount = arraysize(kSurrogateRanges); 3572 static const int kSurrogateRangeCount = arraysize(kSurrogateRanges);
3615 static const int kLineTerminatorRanges[] = { 3573 static const int kLineTerminatorRanges[] = { 0x000A, 0x000B, 0x000D, 0x000E,
3616 0x000A, 0x000B, 0x000D, 0x000E, 0x2028, 0x202A, kRangeEndMarker}; 3574 0x2028, 0x202A, 0x10000 };
3617 static const int kLineTerminatorRangeCount = arraysize(kLineTerminatorRanges); 3575 static const int kLineTerminatorRangeCount = arraysize(kLineTerminatorRanges);
3618 3576
3577
3619 void BoyerMoorePositionInfo::Set(int character) { 3578 void BoyerMoorePositionInfo::Set(int character) {
3620 SetInterval(Interval(character, character)); 3579 SetInterval(Interval(character, character));
3621 } 3580 }
3622 3581
3623 3582
3624 void BoyerMoorePositionInfo::SetInterval(const Interval& interval) { 3583 void BoyerMoorePositionInfo::SetInterval(const Interval& interval) {
3625 s_ = AddRange(s_, kSpaceRanges, kSpaceRangeCount, interval); 3584 s_ = AddRange(s_, kSpaceRanges, kSpaceRangeCount, interval);
3626 w_ = AddRange(w_, kWordRanges, kWordRangeCount, interval); 3585 w_ = AddRange(w_, kWordRanges, kWordRangeCount, interval);
3627 d_ = AddRange(d_, kDigitRanges, kDigitRangeCount, interval); 3586 d_ = AddRange(d_, kDigitRanges, kDigitRangeCount, interval);
3628 surrogate_ = 3587 surrogate_ =
(...skipping 1137 matching lines...) Expand 10 before | Expand all | Expand 10 after
4766 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler, 4725 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler,
4767 RegExpNode* on_success) { 4726 RegExpNode* on_success) {
4768 return new (compiler->zone()) 4727 return new (compiler->zone())
4769 TextNode(elements(), compiler->read_backward(), on_success); 4728 TextNode(elements(), compiler->read_backward(), on_success);
4770 } 4729 }
4771 4730
4772 4731
4773 static bool CompareInverseRanges(ZoneList<CharacterRange>* ranges, 4732 static bool CompareInverseRanges(ZoneList<CharacterRange>* ranges,
4774 const int* special_class, 4733 const int* special_class,
4775 int length) { 4734 int length) {
4776 length--; // Remove final marker. 4735 length--; // Remove final 0x10000.
4777 DCHECK(special_class[length] == kRangeEndMarker); 4736 DCHECK(special_class[length] == 0x10000);
4778 DCHECK(ranges->length() != 0); 4737 DCHECK(ranges->length() != 0);
4779 DCHECK(length != 0); 4738 DCHECK(length != 0);
4780 DCHECK(special_class[0] != 0); 4739 DCHECK(special_class[0] != 0);
4781 if (ranges->length() != (length >> 1) + 1) { 4740 if (ranges->length() != (length >> 1) + 1) {
4782 return false; 4741 return false;
4783 } 4742 }
4784 CharacterRange range = ranges->at(0); 4743 CharacterRange range = ranges->at(0);
4785 if (range.from() != 0) { 4744 if (range.from() != 0) {
4786 return false; 4745 return false;
4787 } 4746 }
4788 for (int i = 0; i < length; i += 2) { 4747 for (int i = 0; i < length; i += 2) {
4789 if (special_class[i] != (range.to() + 1)) { 4748 if (special_class[i] != (range.to() + 1)) {
4790 return false; 4749 return false;
4791 } 4750 }
4792 range = ranges->at((i >> 1) + 1); 4751 range = ranges->at((i >> 1) + 1);
4793 if (special_class[i+1] != range.from()) { 4752 if (special_class[i+1] != range.from()) {
4794 return false; 4753 return false;
4795 } 4754 }
4796 } 4755 }
4797 if (range.to() != 0xffff) { 4756 if (range.to() != 0xffff) {
4798 return false; 4757 return false;
4799 } 4758 }
4800 return true; 4759 return true;
4801 } 4760 }
4802 4761
4803 4762
4804 static bool CompareRanges(ZoneList<CharacterRange>* ranges, 4763 static bool CompareRanges(ZoneList<CharacterRange>* ranges,
4805 const int* special_class, 4764 const int* special_class,
4806 int length) { 4765 int length) {
4807 length--; // Remove final marker. 4766 length--; // Remove final 0x10000.
4808 DCHECK(special_class[length] == kRangeEndMarker); 4767 DCHECK(special_class[length] == 0x10000);
4809 if (ranges->length() * 2 != length) { 4768 if (ranges->length() * 2 != length) {
4810 return false; 4769 return false;
4811 } 4770 }
4812 for (int i = 0; i < length; i += 2) { 4771 for (int i = 0; i < length; i += 2) {
4813 CharacterRange range = ranges->at(i >> 1); 4772 CharacterRange range = ranges->at(i >> 1);
4814 if (range.from() != special_class[i] || 4773 if (range.from() != special_class[i] ||
4815 range.to() != special_class[i + 1] - 1) { 4774 range.to() != special_class[i + 1] - 1) {
4816 return false; 4775 return false;
4817 } 4776 }
4818 } 4777 }
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after
4854 return true; 4813 return true;
4855 } 4814 }
4856 if (CompareInverseRanges(set_.ranges(zone), kWordRanges, kWordRangeCount)) { 4815 if (CompareInverseRanges(set_.ranges(zone), kWordRanges, kWordRangeCount)) {
4857 set_.set_standard_set_type('W'); 4816 set_.set_standard_set_type('W');
4858 return true; 4817 return true;
4859 } 4818 }
4860 return false; 4819 return false;
4861 } 4820 }
4862 4821
4863 4822
4864 bool RegExpCharacterClass::NeedsDesugaringForUnicode(Zone* zone) {
4865 ZoneList<CharacterRange>* ranges = this->ranges(zone);
4866 CharacterRange::Canonicalize(ranges);
4867 for (int i = ranges->length() - 1; i >= 0; i--) {
4868 uc32 from = ranges->at(i).from();
4869 uc32 to = ranges->at(i).to();
4870 // Check for non-BMP characters.
4871 if (to >= kNonBmpStart) return true;
4872 // Check for lone surrogates.
4873 if (from <= kTrailSurrogateEnd && to >= kLeadSurrogateStart) return true;
4874 }
4875 return false;
4876 }
4877
4878
4879 UnicodeRangeSplitter::UnicodeRangeSplitter(Zone* zone,
4880 ZoneList<CharacterRange>* base)
4881 : zone_(zone),
4882 table_(zone),
4883 bmp_(nullptr),
4884 lead_surrogates_(nullptr),
4885 trail_surrogates_(nullptr),
4886 non_bmp_(nullptr) {
4887 // The unicode range splitter categorizes given character ranges into:
4888 // - Code points from the BMP representable by one code unit.
4889 // - Code points outside the BMP that need to be split into surrogate pairs.
4890 // - Lone lead surrogates.
4891 // - Lone trail surrogates.
4892 // Lone surrogates are valid code points, even though no actual characters.
4893 // They require special matching to make sure we do not split surrogate pairs.
4894 // We use the dispatch table to accomplish this. The base range is split up
4895 // by the table by the overlay ranges, and the Call callback is used to
4896 // filter and collect ranges for each category.
4897 for (int i = 0; i < base->length(); i++) {
4898 table_.AddRange(base->at(i), kBase, zone_);
4899 }
4900 // Add overlay ranges.
4901 table_.AddRange(CharacterRange(0, kLeadSurrogateStart - 1), kBmpCodePoints,
4902 zone_);
4903 table_.AddRange(CharacterRange(kLeadSurrogateStart, kLeadSurrogateEnd),
4904 kLeadSurrogates, zone_);
4905 table_.AddRange(CharacterRange(kTrailSurrogateStart, kTrailSurrogateEnd),
4906 kTrailSurrogates, zone_);
4907 table_.AddRange(CharacterRange(kTrailSurrogateEnd, kNonBmpStart - 1),
4908 kBmpCodePoints, zone_);
4909 table_.AddRange(CharacterRange(kNonBmpStart, kNonBmpEnd), kNonBmpCodePoints,
4910 zone_);
4911 table_.ForEach(this);
4912 }
4913
4914
4915 void UnicodeRangeSplitter::Call(uc32 from, DispatchTable::Entry entry) {
4916 OutSet* outset = entry.out_set();
4917 if (!outset->Get(kBase)) return;
4918 ZoneList<CharacterRange>** target = NULL;
4919 if (outset->Get(kBmpCodePoints)) {
4920 target = &bmp_;
4921 } else if (outset->Get(kLeadSurrogates)) {
4922 target = &lead_surrogates_;
4923 } else if (outset->Get(kTrailSurrogates)) {
4924 target = &trail_surrogates_;
4925 } else {
4926 DCHECK(outset->Get(kNonBmpCodePoints));
4927 target = &non_bmp_;
4928 }
4929 if (*target == NULL) *target = new (zone_) ZoneList<CharacterRange>(2, zone_);
4930 (*target)->Add(CharacterRange::Range(entry.from(), entry.to()), zone_);
4931 }
4932
4933
4934 void AddBmpCharacters(RegExpCompiler* compiler, ChoiceNode* result,
4935 RegExpNode* on_success, UnicodeRangeSplitter* splitter) {
4936 ZoneList<CharacterRange>* bmp = splitter->bmp();
4937 if (bmp == nullptr) return;
4938 result->AddAlternative(GuardedAlternative(TextNode::CreateForCharacterRanges(
4939 compiler->zone(), bmp, compiler->read_backward(), on_success)));
4940 }
4941
4942
4943 void AddNonBmpSurrogatePairs(RegExpCompiler* compiler, ChoiceNode* result,
4944 RegExpNode* on_success,
4945 UnicodeRangeSplitter* splitter) {
4946 ZoneList<CharacterRange>* non_bmp = splitter->non_bmp();
4947 if (non_bmp == nullptr) return;
4948 DCHECK(compiler->unicode());
4949 DCHECK(!compiler->one_byte());
4950 Zone* zone = compiler->zone();
4951 CharacterRange::Canonicalize(non_bmp);
4952 for (int i = 0; i < non_bmp->length(); i++) {
4953 // Match surrogate pair.
4954 // E.g. [\u10005-\u11005] becomes
4955 // \ud800[\udc05-\udfff]|
4956 // [\ud801-\ud803][\udc00-\udfff]|
4957 // \ud804[\udc00-\udc05]
4958 uc32 from = non_bmp->at(i).from();
4959 uc32 to = non_bmp->at(i).to();
4960 uc16 from_l = unibrow::Utf16::LeadSurrogate(from);
4961 uc16 from_t = unibrow::Utf16::TrailSurrogate(from);
4962 uc16 to_l = unibrow::Utf16::LeadSurrogate(to);
4963 uc16 to_t = unibrow::Utf16::TrailSurrogate(to);
4964 if (from_l == to_l) {
4965 // The lead surrogate is the same.
4966 result->AddAlternative(
4967 GuardedAlternative(TextNode::CreateForSurrogatePair(
4968 zone, CharacterRange::Singleton(from_l),
4969 CharacterRange::Range(from_t, to_t), compiler->read_backward(),
4970 on_success)));
4971 } else {
4972 if (from_t != kTrailSurrogateStart) {
4973 // Add [from_l][from_t-\udfff]
4974 result->AddAlternative(
4975 GuardedAlternative(TextNode::CreateForSurrogatePair(
4976 zone, CharacterRange::Singleton(from_l),
4977 CharacterRange::Range(from_t, kTrailSurrogateEnd),
4978 compiler->read_backward(), on_success)));
4979 from_l++;
4980 }
4981 if (to_t != kTrailSurrogateEnd) {
4982 // Add [to_l][\udc00-to_t]
4983 result->AddAlternative(
4984 GuardedAlternative(TextNode::CreateForSurrogatePair(
4985 zone, CharacterRange::Singleton(to_l),
4986 CharacterRange::Range(kTrailSurrogateStart, to_t),
4987 compiler->read_backward(), on_success)));
4988 to_l--;
4989 }
4990 if (from_l <= to_l) {
4991 // Add [from_l-to_l][\udc00-\udfff]
4992 result->AddAlternative(
4993 GuardedAlternative(TextNode::CreateForSurrogatePair(
4994 zone, CharacterRange::Range(from_l, to_l),
4995 CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd),
4996 compiler->read_backward(), on_success)));
4997 }
4998 }
4999 }
5000 }
5001
5002
5003 RegExpNode* NegativeLookaroundAgainstReadDirectionAndMatch(
5004 RegExpCompiler* compiler, ZoneList<CharacterRange>* lookbehind,
5005 ZoneList<CharacterRange>* match, RegExpNode* on_success,
5006 bool read_backward) {
5007 Zone* zone = compiler->zone();
5008 RegExpNode* match_node = TextNode::CreateForCharacterRanges(
5009 zone, match, read_backward, on_success);
5010 int stack_register = compiler->UnicodeLookaroundStackRegister();
5011 int position_register = compiler->UnicodeLookaroundPositionRegister();
5012 RegExpLookaround::Builder lookaround(false, match_node, stack_register,
5013 position_register);
5014 RegExpNode* negative_match = TextNode::CreateForCharacterRanges(
5015 zone, lookbehind, !read_backward, lookaround.on_match_success());
5016 return lookaround.ForMatch(negative_match);
5017 }
5018
5019
5020 RegExpNode* MatchAndNegativeLookaroundInReadDirection(
5021 RegExpCompiler* compiler, ZoneList<CharacterRange>* match,
5022 ZoneList<CharacterRange>* lookahead, RegExpNode* on_success,
5023 bool read_backward) {
5024 Zone* zone = compiler->zone();
5025 int stack_register = compiler->UnicodeLookaroundStackRegister();
5026 int position_register = compiler->UnicodeLookaroundPositionRegister();
5027 RegExpLookaround::Builder lookaround(false, on_success, stack_register,
5028 position_register);
5029 RegExpNode* negative_match = TextNode::CreateForCharacterRanges(
5030 zone, lookahead, read_backward, lookaround.on_match_success());
5031 return TextNode::CreateForCharacterRanges(
5032 zone, match, read_backward, lookaround.ForMatch(negative_match));
5033 }
5034
5035
5036 void AddLoneLeadSurrogates(RegExpCompiler* compiler, ChoiceNode* result,
5037 RegExpNode* on_success,
5038 UnicodeRangeSplitter* splitter) {
5039 ZoneList<CharacterRange>* lead_surrogates = splitter->lead_surrogates();
5040 if (lead_surrogates == nullptr) return;
5041 Zone* zone = compiler->zone();
5042 // E.g. \ud801 becomes \ud801(?![\udc00-\udfff]).
5043 ZoneList<CharacterRange>* trail_surrogates =
5044 new (zone) ZoneList<CharacterRange>(1, zone);
5045 trail_surrogates->Add(
5046 CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd), zone);
5047
5048 RegExpNode* match =
5049 compiler->read_backward()
5050 // Reading backward. Assert that reading forward, there is no trail
5051 // surrogate, and then backward match the lead surrogate.
5052 ? NegativeLookaroundAgainstReadDirectionAndMatch(
5053 compiler, trail_surrogates, lead_surrogates, on_success, true)
5054 // Reading forward. Forwrad match the lead surrogate and assert that
5055 // no
5056 // trail surrogate follows.
5057 : MatchAndNegativeLookaroundInReadDirection(
5058 compiler, lead_surrogates, trail_surrogates, on_success, false);
5059 result->AddAlternative(GuardedAlternative(match));
5060 }
5061
5062
5063 void AddLoneTrailSurrogates(RegExpCompiler* compiler, ChoiceNode* result,
5064 RegExpNode* on_success,
5065 UnicodeRangeSplitter* splitter) {
5066 ZoneList<CharacterRange>* trail_surrogates = splitter->trail_surrogates();
5067 if (trail_surrogates == nullptr) return;
5068 Zone* zone = compiler->zone();
5069 // E.g. \udc01 becomes (?<![\ud800-\udbff])\udc01
5070 ZoneList<CharacterRange>* lead_surrogates =
5071 new (zone) ZoneList<CharacterRange>(1, zone);
5072 lead_surrogates->Add(
5073 CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd), zone);
5074
5075 RegExpNode* match =
5076 compiler->read_backward()
5077 // Reading backward. Backward match the trail surrogate and assert
5078 // that no lead surrogate precedes it.
5079 ? MatchAndNegativeLookaroundInReadDirection(
5080 compiler, trail_surrogates, lead_surrogates, on_success, true)
5081 // Reading forward. Assert that reading backward, there is no lead
5082 // surrogate, and then forward match the trail surrogate.
5083 : NegativeLookaroundAgainstReadDirectionAndMatch(
5084 compiler, lead_surrogates, trail_surrogates, on_success, false);
5085 result->AddAlternative(GuardedAlternative(match));
5086 }
5087
5088
5089 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, 4823 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
5090 RegExpNode* on_success) { 4824 RegExpNode* on_success) {
5091 set_.Canonicalize(); 4825 return new (compiler->zone())
5092 Zone* zone = compiler->zone(); 4826 TextNode(this, compiler->read_backward(), on_success);
5093 ZoneList<CharacterRange>* ranges = this->ranges(zone);
5094 if (compiler->unicode() && !compiler->one_byte()) {
5095 if (is_negated()) {
5096 ZoneList<CharacterRange>* negated =
5097 new (zone) ZoneList<CharacterRange>(2, zone);
5098 CharacterRange::Negate(ranges, negated, zone);
5099 ranges = negated;
5100 }
5101 if (ranges->length() == 0) {
5102 // No matches possible.
5103 return new (zone) EndNode(EndNode::BACKTRACK, zone);
5104 }
5105 UnicodeRangeSplitter splitter(zone, ranges);
5106 ChoiceNode* result = new (compiler->zone()) ChoiceNode(2, compiler->zone());
5107 AddBmpCharacters(compiler, result, on_success, &splitter);
5108 AddNonBmpSurrogatePairs(compiler, result, on_success, &splitter);
5109 AddLoneLeadSurrogates(compiler, result, on_success, &splitter);
5110 AddLoneTrailSurrogates(compiler, result, on_success, &splitter);
5111 return result;
5112 } else {
5113 return new (zone) TextNode(this, compiler->read_backward(), on_success);
5114 }
5115 } 4827 }
5116 4828
5117 4829
5118 int CompareFirstChar(RegExpTree* const* a, RegExpTree* const* b) { 4830 int CompareFirstChar(RegExpTree* const* a, RegExpTree* const* b) {
5119 RegExpAtom* atom1 = (*a)->AsAtom(); 4831 RegExpAtom* atom1 = (*a)->AsAtom();
5120 RegExpAtom* atom2 = (*b)->AsAtom(); 4832 RegExpAtom* atom2 = (*b)->AsAtom();
5121 uc16 character1 = atom1->data().at(0); 4833 uc16 character1 = atom1->data().at(0);
5122 uc16 character2 = atom2->data().at(0); 4834 uc16 character2 = atom2->data().at(0);
5123 if (character1 < character2) return -1; 4835 if (character1 < character2) return -1;
5124 if (character1 > character2) return 1; 4836 if (character1 > character2) return 1;
(...skipping 494 matching lines...) Expand 10 before | Expand all | Expand 10 after
5619 compiler->read_backward(), on_success); 5331 compiler->read_backward(), on_success);
5620 } 5332 }
5621 5333
5622 5334
5623 RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler, 5335 RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler,
5624 RegExpNode* on_success) { 5336 RegExpNode* on_success) {
5625 return on_success; 5337 return on_success;
5626 } 5338 }
5627 5339
5628 5340
5629 RegExpLookaround::Builder::Builder(bool is_positive, RegExpNode* on_success,
5630 int stack_pointer_register,
5631 int position_register,
5632 int capture_register_count,
5633 int capture_register_start)
5634 : is_positive_(is_positive),
5635 on_success_(on_success),
5636 stack_pointer_register_(stack_pointer_register),
5637 position_register_(position_register) {
5638 if (is_positive_) {
5639 on_match_success_ = ActionNode::PositiveSubmatchSuccess(
5640 stack_pointer_register, position_register, capture_register_count,
5641 capture_register_start, on_success_);
5642 } else {
5643 Zone* zone = on_success_->zone();
5644 on_match_success_ = new (zone) NegativeSubmatchSuccess(
5645 stack_pointer_register, position_register, capture_register_count,
5646 capture_register_start, zone);
5647 }
5648 }
5649
5650
5651 RegExpNode* RegExpLookaround::Builder::ForMatch(RegExpNode* match) {
5652 if (is_positive_) {
5653 return ActionNode::BeginSubmatch(stack_pointer_register_,
5654 position_register_, match);
5655 } else {
5656 Zone* zone = on_success_->zone();
5657 // We use a ChoiceNode to represent the negative lookaround. The first
5658 // alternative is the negative match. On success, the end node backtracks.
5659 // On failure, the second alternative is tried and leads to success.
5660 // NegativeLookaheadChoiceNode is a special ChoiceNode that ignores the
5661 // first exit when calculating quick checks.
5662 ChoiceNode* choice_node = new (zone) NegativeLookaroundChoiceNode(
5663 GuardedAlternative(match), GuardedAlternative(on_success_), zone);
5664 return ActionNode::BeginSubmatch(stack_pointer_register_,
5665 position_register_, choice_node);
5666 }
5667 }
5668
5669
5670 RegExpNode* RegExpLookaround::ToNode(RegExpCompiler* compiler, 5341 RegExpNode* RegExpLookaround::ToNode(RegExpCompiler* compiler,
5671 RegExpNode* on_success) { 5342 RegExpNode* on_success) {
5672 int stack_pointer_register = compiler->AllocateRegister(); 5343 int stack_pointer_register = compiler->AllocateRegister();
5673 int position_register = compiler->AllocateRegister(); 5344 int position_register = compiler->AllocateRegister();
5674 5345
5675 const int registers_per_capture = 2; 5346 const int registers_per_capture = 2;
5676 const int register_of_first_capture = 2; 5347 const int register_of_first_capture = 2;
5677 int register_count = capture_count_ * registers_per_capture; 5348 int register_count = capture_count_ * registers_per_capture;
5678 int register_start = 5349 int register_start =
5679 register_of_first_capture + capture_from_ * registers_per_capture; 5350 register_of_first_capture + capture_from_ * registers_per_capture;
5680 5351
5681 RegExpNode* result; 5352 RegExpNode* result;
5682 bool was_reading_backward = compiler->read_backward(); 5353 bool was_reading_backward = compiler->read_backward();
5683 compiler->set_read_backward(type() == LOOKBEHIND); 5354 compiler->set_read_backward(type() == LOOKBEHIND);
5684 Builder builder(is_positive(), on_success, stack_pointer_register, 5355 if (is_positive()) {
5685 position_register, register_count, register_start); 5356 result = ActionNode::BeginSubmatch(
5686 RegExpNode* match = body_->ToNode(compiler, builder.on_match_success()); 5357 stack_pointer_register, position_register,
5687 result = builder.ForMatch(match); 5358 body()->ToNode(compiler,
5359 ActionNode::PositiveSubmatchSuccess(
5360 stack_pointer_register, position_register,
5361 register_count, register_start, on_success)));
5362 } else {
5363 // We use a ChoiceNode for a negative lookahead because it has most of
5364 // the characteristics we need. It has the body of the lookahead as its
5365 // first alternative and the expression after the lookahead of the second
5366 // alternative. If the first alternative succeeds then the
5367 // NegativeSubmatchSuccess will unwind the stack including everything the
5368 // choice node set up and backtrack. If the first alternative fails then
5369 // the second alternative is tried, which is exactly the desired result
5370 // for a negative lookahead. The NegativeLookaheadChoiceNode is a special
5371 // ChoiceNode that knows to ignore the first exit when calculating quick
5372 // checks.
5373 Zone* zone = compiler->zone();
5374
5375 GuardedAlternative body_alt(
5376 body()->ToNode(compiler, new (zone) NegativeSubmatchSuccess(
5377 stack_pointer_register, position_register,
5378 register_count, register_start, zone)));
5379 ChoiceNode* choice_node = new (zone) NegativeLookaroundChoiceNode(
5380 body_alt, GuardedAlternative(on_success), zone);
5381 result = ActionNode::BeginSubmatch(stack_pointer_register,
5382 position_register, choice_node);
5383 }
5688 compiler->set_read_backward(was_reading_backward); 5384 compiler->set_read_backward(was_reading_backward);
5689 return result; 5385 return result;
5690 } 5386 }
5691 5387
5692 5388
5693 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler, 5389 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler,
5694 RegExpNode* on_success) { 5390 RegExpNode* on_success) {
5695 return ToNode(body(), index(), compiler, on_success); 5391 return ToNode(body(), index(), compiler, on_success);
5696 } 5392 }
5697 5393
(...skipping 27 matching lines...) Expand all
5725 } 5421 }
5726 return current; 5422 return current;
5727 } 5423 }
5728 5424
5729 5425
5730 static void AddClass(const int* elmv, 5426 static void AddClass(const int* elmv,
5731 int elmc, 5427 int elmc,
5732 ZoneList<CharacterRange>* ranges, 5428 ZoneList<CharacterRange>* ranges,
5733 Zone* zone) { 5429 Zone* zone) {
5734 elmc--; 5430 elmc--;
5735 DCHECK(elmv[elmc] == kRangeEndMarker); 5431 DCHECK(elmv[elmc] == 0x10000);
5736 for (int i = 0; i < elmc; i += 2) { 5432 for (int i = 0; i < elmc; i += 2) {
5737 DCHECK(elmv[i] < elmv[i + 1]); 5433 DCHECK(elmv[i] < elmv[i + 1]);
5738 ranges->Add(CharacterRange(elmv[i], elmv[i + 1] - 1), zone); 5434 ranges->Add(CharacterRange(elmv[i], elmv[i + 1] - 1), zone);
5739 } 5435 }
5740 } 5436 }
5741 5437
5742 5438
5743 static void AddClassNegated(const int *elmv, 5439 static void AddClassNegated(const int *elmv,
5744 int elmc, 5440 int elmc,
5745 ZoneList<CharacterRange>* ranges, 5441 ZoneList<CharacterRange>* ranges,
5746 Zone* zone) { 5442 Zone* zone) {
5747 elmc--; 5443 elmc--;
5748 DCHECK(elmv[elmc] == kRangeEndMarker); 5444 DCHECK(elmv[elmc] == 0x10000);
5749 DCHECK(elmv[0] != 0x0000); 5445 DCHECK(elmv[0] != 0x0000);
5750 DCHECK(elmv[elmc - 1] != String::kMaxCodePoint); 5446 DCHECK(elmv[elmc-1] != String::kMaxUtf16CodeUnit);
5751 uc16 last = 0x0000; 5447 uc16 last = 0x0000;
5752 for (int i = 0; i < elmc; i += 2) { 5448 for (int i = 0; i < elmc; i += 2) {
5753 DCHECK(last <= elmv[i] - 1); 5449 DCHECK(last <= elmv[i] - 1);
5754 DCHECK(elmv[i] < elmv[i + 1]); 5450 DCHECK(elmv[i] < elmv[i + 1]);
5755 ranges->Add(CharacterRange(last, elmv[i] - 1), zone); 5451 ranges->Add(CharacterRange(last, elmv[i] - 1), zone);
5756 last = elmv[i + 1]; 5452 last = elmv[i + 1];
5757 } 5453 }
5758 ranges->Add(CharacterRange(last, String::kMaxCodePoint), zone); 5454 ranges->Add(CharacterRange(last, String::kMaxUtf16CodeUnit), zone);
5759 } 5455 }
5760 5456
5761 5457
5762 void CharacterRange::AddClassEscape(uc16 type, 5458 void CharacterRange::AddClassEscape(uc16 type,
5763 ZoneList<CharacterRange>* ranges, 5459 ZoneList<CharacterRange>* ranges,
5764 Zone* zone) { 5460 Zone* zone) {
5765 switch (type) { 5461 switch (type) {
5766 case 's': 5462 case 's':
5767 AddClass(kSpaceRanges, kSpaceRangeCount, ranges, zone); 5463 AddClass(kSpaceRanges, kSpaceRangeCount, ranges, zone);
5768 break; 5464 break;
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
5805 UNREACHABLE(); 5501 UNREACHABLE();
5806 } 5502 }
5807 } 5503 }
5808 5504
5809 5505
5810 Vector<const int> CharacterRange::GetWordBounds() { 5506 Vector<const int> CharacterRange::GetWordBounds() {
5811 return Vector<const int>(kWordRanges, kWordRangeCount - 1); 5507 return Vector<const int>(kWordRanges, kWordRangeCount - 1);
5812 } 5508 }
5813 5509
5814 5510
5511 class CharacterRangeSplitter {
5512 public:
5513 CharacterRangeSplitter(ZoneList<CharacterRange>** included,
5514 ZoneList<CharacterRange>** excluded,
5515 Zone* zone)
5516 : included_(included),
5517 excluded_(excluded),
5518 zone_(zone) { }
5519 void Call(uc16 from, DispatchTable::Entry entry);
5520
5521 static const int kInBase = 0;
5522 static const int kInOverlay = 1;
5523
5524 private:
5525 ZoneList<CharacterRange>** included_;
5526 ZoneList<CharacterRange>** excluded_;
5527 Zone* zone_;
5528 };
5529
5530
5531 void CharacterRangeSplitter::Call(uc16 from, DispatchTable::Entry entry) {
5532 if (!entry.out_set()->Get(kInBase)) return;
5533 ZoneList<CharacterRange>** target = entry.out_set()->Get(kInOverlay)
5534 ? included_
5535 : excluded_;
5536 if (*target == NULL) *target = new(zone_) ZoneList<CharacterRange>(2, zone_);
5537 (*target)->Add(CharacterRange(entry.from(), entry.to()), zone_);
5538 }
5539
5540
5541 void CharacterRange::Split(ZoneList<CharacterRange>* base,
5542 Vector<const int> overlay,
5543 ZoneList<CharacterRange>** included,
5544 ZoneList<CharacterRange>** excluded,
5545 Zone* zone) {
5546 DCHECK_NULL(*included);
5547 DCHECK_NULL(*excluded);
5548 DispatchTable table(zone);
5549 for (int i = 0; i < base->length(); i++)
5550 table.AddRange(base->at(i), CharacterRangeSplitter::kInBase, zone);
5551 for (int i = 0; i < overlay.length(); i += 2) {
5552 table.AddRange(CharacterRange(overlay[i], overlay[i + 1] - 1),
5553 CharacterRangeSplitter::kInOverlay, zone);
5554 }
5555 CharacterRangeSplitter callback(included, excluded, zone);
5556 table.ForEach(&callback);
5557 }
5558
5559
5815 void CharacterRange::AddCaseEquivalents(Isolate* isolate, Zone* zone, 5560 void CharacterRange::AddCaseEquivalents(Isolate* isolate, Zone* zone,
5816 ZoneList<CharacterRange>* ranges, 5561 ZoneList<CharacterRange>* ranges,
5817 bool is_one_byte) { 5562 bool is_one_byte) {
5818 uc32 bottom = from(); 5563 uc16 bottom = from();
5819 uc32 top = to(); 5564 uc16 top = to();
5820 // Nothing to be done for surrogates.
5821 if (bottom >= kLeadSurrogateStart && top <= kTrailSurrogateEnd) return;
5822 if (is_one_byte && !RangeContainsLatin1Equivalents(*this)) { 5565 if (is_one_byte && !RangeContainsLatin1Equivalents(*this)) {
5823 if (bottom > String::kMaxOneByteCharCode) return; 5566 if (bottom > String::kMaxOneByteCharCode) return;
5824 if (top > String::kMaxOneByteCharCode) top = String::kMaxOneByteCharCode; 5567 if (top > String::kMaxOneByteCharCode) top = String::kMaxOneByteCharCode;
5825 } 5568 }
5826 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 5569 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
5827 if (top == bottom) { 5570 if (top == bottom) {
5828 // If this is a singleton we just expand the one character. 5571 // If this is a singleton we just expand the one character.
5829 int length = isolate->jsregexp_uncanonicalize()->get(bottom, '\0', chars); 5572 int length = isolate->jsregexp_uncanonicalize()->get(bottom, '\0', chars);
5830 for (int i = 0; i < length; i++) { 5573 for (int i = 0; i < length; i++) {
5831 uc32 chr = chars[i]; 5574 uc32 chr = chars[i];
(...skipping 17 matching lines...) Expand all
5849 // we look up ['z', 'Z'] and produce [c-f] and [C-F]. We then only 5592 // we look up ['z', 'Z'] and produce [c-f] and [C-F]. We then only
5850 // add a range if it is not already contained in the input, so [c-f] 5593 // add a range if it is not already contained in the input, so [c-f]
5851 // will be skipped but [C-F] will be added. If this range is not 5594 // will be skipped but [C-F] will be added. If this range is not
5852 // completely contained in a block we do this for all the blocks 5595 // completely contained in a block we do this for all the blocks
5853 // covered by the range (handling characters that is not in a block 5596 // covered by the range (handling characters that is not in a block
5854 // as a "singleton block"). 5597 // as a "singleton block").
5855 unibrow::uchar range[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 5598 unibrow::uchar range[unibrow::Ecma262UnCanonicalize::kMaxWidth];
5856 int pos = bottom; 5599 int pos = bottom;
5857 while (pos <= top) { 5600 while (pos <= top) {
5858 int length = isolate->jsregexp_canonrange()->get(pos, '\0', range); 5601 int length = isolate->jsregexp_canonrange()->get(pos, '\0', range);
5859 uc32 block_end; 5602 uc16 block_end;
5860 if (length == 0) { 5603 if (length == 0) {
5861 block_end = pos; 5604 block_end = pos;
5862 } else { 5605 } else {
5863 DCHECK_EQ(1, length); 5606 DCHECK_EQ(1, length);
5864 block_end = range[0]; 5607 block_end = range[0];
5865 } 5608 }
5866 int end = (block_end > top) ? top : block_end; 5609 int end = (block_end > top) ? top : block_end;
5867 length = isolate->jsregexp_uncanonicalize()->get(block_end, '\0', range); 5610 length = isolate->jsregexp_uncanonicalize()->get(block_end, '\0', range);
5868 for (int i = 0; i < length; i++) { 5611 for (int i = 0; i < length; i++) {
5869 uc32 c = range[i]; 5612 uc32 c = range[i];
5870 uc32 range_from = c - (block_end - pos); 5613 uc16 range_from = c - (block_end - pos);
5871 uc32 range_to = c - (block_end - end); 5614 uc16 range_to = c - (block_end - end);
5872 if (!(bottom <= range_from && range_to <= top)) { 5615 if (!(bottom <= range_from && range_to <= top)) {
5873 ranges->Add(CharacterRange(range_from, range_to), zone); 5616 ranges->Add(CharacterRange(range_from, range_to), zone);
5874 } 5617 }
5875 } 5618 }
5876 pos = end + 1; 5619 pos = end + 1;
5877 } 5620 }
5878 } 5621 }
5879 } 5622 }
5880 5623
5881 5624
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after
5922 5665
5923 5666
5924 static int InsertRangeInCanonicalList(ZoneList<CharacterRange>* list, 5667 static int InsertRangeInCanonicalList(ZoneList<CharacterRange>* list,
5925 int count, 5668 int count,
5926 CharacterRange insert) { 5669 CharacterRange insert) {
5927 // Inserts a range into list[0..count[, which must be sorted 5670 // Inserts a range into list[0..count[, which must be sorted
5928 // by from value and non-overlapping and non-adjacent, using at most 5671 // by from value and non-overlapping and non-adjacent, using at most
5929 // list[0..count] for the result. Returns the number of resulting 5672 // list[0..count] for the result. Returns the number of resulting
5930 // canonicalized ranges. Inserting a range may collapse existing ranges into 5673 // canonicalized ranges. Inserting a range may collapse existing ranges into
5931 // fewer ranges, so the return value can be anything in the range 1..count+1. 5674 // fewer ranges, so the return value can be anything in the range 1..count+1.
5932 uc32 from = insert.from(); 5675 uc16 from = insert.from();
5933 uc32 to = insert.to(); 5676 uc16 to = insert.to();
5934 int start_pos = 0; 5677 int start_pos = 0;
5935 int end_pos = count; 5678 int end_pos = count;
5936 for (int i = count - 1; i >= 0; i--) { 5679 for (int i = count - 1; i >= 0; i--) {
5937 CharacterRange current = list->at(i); 5680 CharacterRange current = list->at(i);
5938 if (current.from() > to + 1) { 5681 if (current.from() > to + 1) {
5939 end_pos = i; 5682 end_pos = i;
5940 } else if (current.to() + 1 < from) { 5683 } else if (current.to() + 1 < from) {
5941 start_pos = i + 1; 5684 start_pos = i + 1;
5942 break; 5685 break;
5943 } 5686 }
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after
6023 DCHECK(CharacterRange::IsCanonical(character_ranges)); 5766 DCHECK(CharacterRange::IsCanonical(character_ranges));
6024 } 5767 }
6025 5768
6026 5769
6027 void CharacterRange::Negate(ZoneList<CharacterRange>* ranges, 5770 void CharacterRange::Negate(ZoneList<CharacterRange>* ranges,
6028 ZoneList<CharacterRange>* negated_ranges, 5771 ZoneList<CharacterRange>* negated_ranges,
6029 Zone* zone) { 5772 Zone* zone) {
6030 DCHECK(CharacterRange::IsCanonical(ranges)); 5773 DCHECK(CharacterRange::IsCanonical(ranges));
6031 DCHECK_EQ(0, negated_ranges->length()); 5774 DCHECK_EQ(0, negated_ranges->length());
6032 int range_count = ranges->length(); 5775 int range_count = ranges->length();
6033 uc32 from = 0; 5776 uc16 from = 0;
6034 int i = 0; 5777 int i = 0;
6035 if (range_count > 0 && ranges->at(0).from() == 0) { 5778 if (range_count > 0 && ranges->at(0).from() == 0) {
6036 from = ranges->at(0).to(); 5779 from = ranges->at(0).to();
6037 i = 1; 5780 i = 1;
6038 } 5781 }
6039 while (i < range_count) { 5782 while (i < range_count) {
6040 CharacterRange range = ranges->at(i); 5783 CharacterRange range = ranges->at(i);
6041 negated_ranges->Add(CharacterRange(from + 1, range.from() - 1), zone); 5784 negated_ranges->Add(CharacterRange(from + 1, range.from() - 1), zone);
6042 from = range.to(); 5785 from = range.to();
6043 i++; 5786 i++;
6044 } 5787 }
6045 if (from < String::kMaxCodePoint) { 5788 if (from < String::kMaxUtf16CodeUnit) {
6046 negated_ranges->Add(CharacterRange(from + 1, String::kMaxCodePoint), zone); 5789 negated_ranges->Add(CharacterRange(from + 1, String::kMaxUtf16CodeUnit),
5790 zone);
6047 } 5791 }
6048 } 5792 }
6049 5793
6050 5794
6051 // ------------------------------------------------------------------- 5795 // -------------------------------------------------------------------
6052 // Splay tree 5796 // Splay tree
6053 5797
6054 5798
6055 OutSet* OutSet::Extend(unsigned value, Zone* zone) { 5799 OutSet* OutSet::Extend(unsigned value, Zone* zone) {
6056 if (Get(value)) 5800 if (Get(value))
(...skipping 30 matching lines...) Expand all
6087 if (value < kFirstLimit) { 5831 if (value < kFirstLimit) {
6088 return (first_ & (1 << value)) != 0; 5832 return (first_ & (1 << value)) != 0;
6089 } else if (remaining_ == NULL) { 5833 } else if (remaining_ == NULL) {
6090 return false; 5834 return false;
6091 } else { 5835 } else {
6092 return remaining_->Contains(value); 5836 return remaining_->Contains(value);
6093 } 5837 }
6094 } 5838 }
6095 5839
6096 5840
6097 const uc32 DispatchTable::Config::kNoKey = unibrow::Utf8::kBadChar; 5841 const uc16 DispatchTable::Config::kNoKey = unibrow::Utf8::kBadChar;
6098 5842
6099 5843
6100 void DispatchTable::AddRange(CharacterRange full_range, int value, 5844 void DispatchTable::AddRange(CharacterRange full_range, int value,
6101 Zone* zone) { 5845 Zone* zone) {
6102 CharacterRange current = full_range; 5846 CharacterRange current = full_range;
6103 if (tree()->is_empty()) { 5847 if (tree()->is_empty()) {
6104 // If this is the first range we just insert into the table. 5848 // If this is the first range we just insert into the table.
6105 ZoneSplayTree<Config>::Locator loc; 5849 ZoneSplayTree<Config>::Locator loc;
6106 bool inserted = tree()->Insert(current.from(), &loc); 5850 bool inserted = tree()->Insert(current.from(), &loc);
6107 DCHECK(inserted); 5851 DCHECK(inserted);
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after
6189 USE(inserted); 5933 USE(inserted);
6190 ins.set_value(Entry(current.from(), 5934 ins.set_value(Entry(current.from(),
6191 current.to(), 5935 current.to(),
6192 empty()->Extend(value, zone))); 5936 empty()->Extend(value, zone)));
6193 break; 5937 break;
6194 } 5938 }
6195 } 5939 }
6196 } 5940 }
6197 5941
6198 5942
6199 OutSet* DispatchTable::Get(uc32 value) { 5943 OutSet* DispatchTable::Get(uc16 value) {
6200 ZoneSplayTree<Config>::Locator loc; 5944 ZoneSplayTree<Config>::Locator loc;
6201 if (!tree()->FindGreatestLessThan(value, &loc)) 5945 if (!tree()->FindGreatestLessThan(value, &loc))
6202 return empty(); 5946 return empty();
6203 Entry* entry = &loc.value(); 5947 Entry* entry = &loc.value();
6204 if (value <= entry->to()) 5948 if (value <= entry->to())
6205 return entry->out_set(); 5949 return entry->out_set();
6206 else 5950 else
6207 return empty(); 5951 return empty();
6208 } 5952 }
6209 5953
(...skipping 297 matching lines...) Expand 10 before | Expand all | Expand 10 after
6507 } 6251 }
6508 6252
6509 6253
6510 void DispatchTableConstructor::VisitAction(ActionNode* that) { 6254 void DispatchTableConstructor::VisitAction(ActionNode* that) {
6511 RegExpNode* target = that->on_success(); 6255 RegExpNode* target = that->on_success();
6512 target->Accept(this); 6256 target->Accept(this);
6513 } 6257 }
6514 6258
6515 6259
6516 RegExpEngine::CompilationResult RegExpEngine::Compile( 6260 RegExpEngine::CompilationResult RegExpEngine::Compile(
6517 Isolate* isolate, Zone* zone, RegExpCompileData* data, 6261 Isolate* isolate, Zone* zone, RegExpCompileData* data, bool ignore_case,
6518 JSRegExp::Flags flags, Handle<String> pattern, 6262 bool is_global, bool is_multiline, bool is_sticky, Handle<String> pattern,
6519 Handle<String> sample_subject, bool is_one_byte) { 6263 Handle<String> sample_subject, bool is_one_byte) {
6520 if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) { 6264 if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) {
6521 return IrregexpRegExpTooBig(isolate); 6265 return IrregexpRegExpTooBig(isolate);
6522 } 6266 }
6523 bool ignore_case = flags & JSRegExp::kIgnoreCase; 6267 RegExpCompiler compiler(isolate, zone, data->capture_count, ignore_case,
6524 bool is_sticky = flags & JSRegExp::kSticky;
6525 bool is_global = flags & JSRegExp::kGlobal;
6526 RegExpCompiler compiler(isolate, zone, data->capture_count, flags,
6527 is_one_byte); 6268 is_one_byte);
6528 6269
6529 if (compiler.optimize()) compiler.set_optimize(!TooMuchRegExpCode(pattern)); 6270 if (compiler.optimize()) compiler.set_optimize(!TooMuchRegExpCode(pattern));
6530 6271
6531 // Sample some characters from the middle of the string. 6272 // Sample some characters from the middle of the string.
6532 static const int kSampleSize = 128; 6273 static const int kSampleSize = 128;
6533 6274
6534 sample_subject = String::Flatten(sample_subject); 6275 sample_subject = String::Flatten(sample_subject);
6535 int chars_sampled = 0; 6276 int chars_sampled = 0;
6536 int half_way = (sample_subject->length() - kSampleSize) / 2; 6277 int half_way = (sample_subject->length() - kSampleSize) / 2;
(...skipping 222 matching lines...) Expand 10 before | Expand all | Expand 10 after
6759 6500
6760 6501
6761 void RegExpResultsCache::Clear(FixedArray* cache) { 6502 void RegExpResultsCache::Clear(FixedArray* cache) {
6762 for (int i = 0; i < kRegExpResultsCacheSize; i++) { 6503 for (int i = 0; i < kRegExpResultsCacheSize; i++) {
6763 cache->set(i, Smi::FromInt(0)); 6504 cache->set(i, Smi::FromInt(0));
6764 } 6505 }
6765 } 6506 }
6766 6507
6767 } // namespace internal 6508 } // namespace internal
6768 } // namespace v8 6509 } // namespace v8
OLDNEW
« no previous file with comments | « src/regexp/jsregexp.h ('k') | src/regexp/regexp-ast.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698