src/regexp/jsregexp.cc - Issue 1578253005: [regexp] implement character classes for unicode regexps.

Side by Side Diff: src/regexp/jsregexp.cc

Issue 1578253005: [regexp] implement character classes for unicode regexps. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master

Patch Set: more tests Created 4 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2012 the V8 project authors. All rights reserved.	1 // Copyright 2012 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "src/regexp/jsregexp.h"	5 #include "src/regexp/jsregexp.h"

6	6

7 #include "src/ast/ast.h"	7 #include "src/ast/ast.h"

8 #include "src/base/platform/platform.h"	8 #include "src/base/platform/platform.h"

9 #include "src/compilation-cache.h"	9 #include "src/compilation-cache.h"

10 #include "src/compiler.h"	10 #include "src/compiler.h"

(...skipping 54 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
65 Handle<String> error_text) {	65 Handle<String> error_text) {

66 USE(ThrowRegExpException(re, Handle<String>(re->Pattern()), error_text));	66 USE(ThrowRegExpException(re, Handle<String>(re->Pattern()), error_text));

67 }	67 }

68	68

69	69

70 ContainedInLattice AddRange(ContainedInLattice containment,	70 ContainedInLattice AddRange(ContainedInLattice containment,

71 const int* ranges,	71 const int* ranges,

72 int ranges_length,	72 int ranges_length,

73 Interval new_range) {	73 Interval new_range) {

74 DCHECK((ranges_length & 1) == 1);	74 DCHECK((ranges_length & 1) == 1);

75 DCHECK(ranges[ranges_length - 1] == String::kMaxUtf16CodeUnit + 1);	75 DCHECK(ranges[ranges_length - 1] == String::kMaxCodePoint + 1);

76 if (containment == kLatticeUnknown) return containment;	76 if (containment == kLatticeUnknown) return containment;

77 bool inside = false;	77 bool inside = false;

78 int last = 0;	78 int last = 0;

79 for (int i = 0; i < ranges_length; inside = !inside, last = ranges[i], i++) {	79 for (int i = 0; i < ranges_length; inside = !inside, last = ranges[i], i++) {

80 // Consider the range from last to ranges[i].	80 // Consider the range from last to ranges[i].

81 // We haven't got to the new range yet.	81 // We haven't got to the new range yet.

82 if (ranges[i] <= new_range.from()) continue;	82 if (ranges[i] <= new_range.from()) continue;

83 // New range is wholly inside last-ranges[i]. Note that new_range.to() is	83 // New range is wholly inside last-ranges[i]. Note that new_range.to() is

84 // inclusive, but the values in ranges are not.	84 // inclusive, but the values in ranges are not.

85 if (last <= new_range.from() && new_range.to() < ranges[i]) {	85 if (last <= new_range.from() && new_range.to() < ranges[i]) {

(...skipping 52 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
138	138

139 Handle<Object> result;	139 Handle<Object> result;

140 if (in_cache) {	140 if (in_cache) {

141 re->set_data(*cached);	141 re->set_data(*cached);

142 return re;	142 return re;

143 }	143 }

144 pattern = String::Flatten(pattern);	144 pattern = String::Flatten(pattern);

145 PostponeInterruptsScope postpone(isolate);	145 PostponeInterruptsScope postpone(isolate);

146 RegExpCompileData parse_result;	146 RegExpCompileData parse_result;

147 FlatStringReader reader(isolate, pattern);	147 FlatStringReader reader(isolate, pattern);

148 if (!RegExpParser::ParseRegExp(re->GetIsolate(), &zone, &reader,	148 if (!RegExpParser::ParseRegExp(re->GetIsolate(), &zone, &reader, flags,

149 flags & JSRegExp::kMultiline,	149 &parse_result)) {

150 flags & JSRegExp::kUnicode, &parse_result)) {

151 // Throw an exception if we fail to parse the pattern.	150 // Throw an exception if we fail to parse the pattern.

152 return ThrowRegExpException(re, pattern, parse_result.error);	151 return ThrowRegExpException(re, pattern, parse_result.error);

153 }	152 }

154	153

155 bool has_been_compiled = false;	154 bool has_been_compiled = false;

156	155

157 if (parse_result.simple && !(flags & JSRegExp::kIgnoreCase) &&	156 if (parse_result.simple && !(flags & JSRegExp::kIgnoreCase) &&

158 !(flags & JSRegExp::kSticky) && !HasFewDifferentCharacters(pattern)) {	157 !(flags & JSRegExp::kSticky) && !HasFewDifferentCharacters(pattern)) {

159 // Parse-tree is a single atom that is equal to the pattern.	158 // Parse-tree is a single atom that is equal to the pattern.

160 AtomCompile(re, pattern, flags, pattern);	159 AtomCompile(re, pattern, flags, pattern);

(...skipping 203 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
364 ThrowRegExpException(re, error_message);	363 ThrowRegExpException(re, error_message);

365 return false;	364 return false;

366 }	365 }

367	366

368 JSRegExp::Flags flags = re->GetFlags();	367 JSRegExp::Flags flags = re->GetFlags();

369	368

370 Handle<String> pattern(re->Pattern());	369 Handle<String> pattern(re->Pattern());

371 pattern = String::Flatten(pattern);	370 pattern = String::Flatten(pattern);

372 RegExpCompileData compile_data;	371 RegExpCompileData compile_data;

373 FlatStringReader reader(isolate, pattern);	372 FlatStringReader reader(isolate, pattern);

374 if (!RegExpParser::ParseRegExp(isolate, &zone, &reader,	373 if (!RegExpParser::ParseRegExp(isolate, &zone, &reader, flags,

375 flags & JSRegExp::kMultiline,	374 &compile_data)) {

376 flags & JSRegExp::kUnicode, &compile_data)) {

377 // Throw an exception if we fail to parse the pattern.	375 // Throw an exception if we fail to parse the pattern.

378 // THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once.	376 // THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once.

379 USE(ThrowRegExpException(re, pattern, compile_data.error));	377 USE(ThrowRegExpException(re, pattern, compile_data.error));

380 return false;	378 return false;

381 }	379 }

382 RegExpEngine::CompilationResult result = RegExpEngine::Compile(	380 RegExpEngine::CompilationResult result =

383 isolate, &zone, &compile_data, flags & JSRegExp::kIgnoreCase,	381 RegExpEngine::Compile(isolate, &zone, &compile_data, flags, pattern,

384 flags & JSRegExp::kGlobal, flags & JSRegExp::kMultiline,	382 sample_subject, is_one_byte);

385 flags & JSRegExp::kSticky, pattern, sample_subject, is_one_byte);

386 if (result.error_message != NULL) {	383 if (result.error_message != NULL) {

387 // Unable to compile regexp.	384 // Unable to compile regexp.

388 Handle<String> error_message = isolate->factory()->NewStringFromUtf8(	385 Handle<String> error_message = isolate->factory()->NewStringFromUtf8(

389 CStrVector(result.error_message)).ToHandleChecked();	386 CStrVector(result.error_message)).ToHandleChecked();

390 ThrowRegExpException(re, error_message);	387 ThrowRegExpException(re, error_message);

391 return false;	388 return false;

392 }	389 }

393	390

394 Handle<FixedArray> data = Handle<FixedArray>(FixedArray::cast(re->data()));	391 Handle<FixedArray> data = Handle<FixedArray>(FixedArray::cast(re->data()));

395 data->set(JSRegExp::code_index(is_one_byte), result.code);	392 data->set(JSRegExp::code_index(is_one_byte), result.code);

(...skipping 542 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
938	935

939 private:	936 private:

940 CharacterFrequency frequencies_[RegExpMacroAssembler::kTableSize];	937 CharacterFrequency frequencies_[RegExpMacroAssembler::kTableSize];

941 int total_samples_;	938 int total_samples_;

942 };	939 };

943	940

944	941

945 class RegExpCompiler {	942 class RegExpCompiler {

946 public:	943 public:

947 RegExpCompiler(Isolate* isolate, Zone* zone, int capture_count,	944 RegExpCompiler(Isolate* isolate, Zone* zone, int capture_count,

948 bool ignore_case, bool is_one_byte);	945 JSRegExp::Flags flags, bool is_one_byte);

949	946

950 int AllocateRegister() {	947 int AllocateRegister() {

951 if (next_register_ >= RegExpMacroAssembler::kMaxRegister) {	948 if (next_register_ >= RegExpMacroAssembler::kMaxRegister) {

952 reg_exp_too_big_ = true;	949 reg_exp_too_big_ = true;

953 return next_register_;	950 return next_register_;

954 }	951 }

955 return next_register_++;	952 return next_register_++;

956 }	953 }

957	954

	955 // Lookarounds to match lone surrogates for unicode character class matches

	956 // are never nested. We can therefore reuse registers.

	957 int UnicodeLookaroundStackRegister() {

	958 if (unicode_lookaround_stack_register_ == kNoRegister) {

	959 unicode_lookaround_stack_register_ = AllocateRegister();

	960 }

	961 return unicode_lookaround_stack_register_;

	962 }

	963

	964 int UnicodeLookaroundPositionRegister() {

	965 if (unicode_lookaround_position_register_ == kNoRegister) {

	966 unicode_lookaround_position_register_ = AllocateRegister();

	967 }

	968 return unicode_lookaround_position_register_;

	969 }

	970

958 RegExpEngine::CompilationResult Assemble(RegExpMacroAssembler* assembler,	971 RegExpEngine::CompilationResult Assemble(RegExpMacroAssembler* assembler,

959 RegExpNode* start,	972 RegExpNode* start,

960 int capture_count,	973 int capture_count,

961 Handle<String> pattern);	974 Handle<String> pattern);

962	975

963 inline void AddWork(RegExpNode* node) {	976 inline void AddWork(RegExpNode* node) {

964 if (!node->on_work_list() && !node->label()->is_bound()) {	977 if (!node->on_work_list() && !node->label()->is_bound()) {

965 node->set_on_work_list(true);	978 node->set_on_work_list(true);

966 work_list_->Add(node);	979 work_list_->Add(node);

967 }	980 }

968 }	981 }

969	982

970 static const int kImplementationOffset = 0;	983 static const int kImplementationOffset = 0;

971 static const int kNumberOfRegistersOffset = 0;	984 static const int kNumberOfRegistersOffset = 0;

972 static const int kCodeOffset = 1;	985 static const int kCodeOffset = 1;

973	986

974 RegExpMacroAssembler* macro_assembler() { return macro_assembler_; }	987 RegExpMacroAssembler* macro_assembler() { return macro_assembler_; }

975 EndNode* accept() { return accept_; }	988 EndNode* accept() { return accept_; }

976	989

977 static const int kMaxRecursion = 100;	990 static const int kMaxRecursion = 100;

978 inline int recursion_depth() { return recursion_depth_; }	991 inline int recursion_depth() { return recursion_depth_; }

979 inline void IncrementRecursionDepth() { recursion_depth_++; }	992 inline void IncrementRecursionDepth() { recursion_depth_++; }

980 inline void DecrementRecursionDepth() { recursion_depth_--; }	993 inline void DecrementRecursionDepth() { recursion_depth_--; }

981	994

982 void SetRegExpTooBig() { reg_exp_too_big_ = true; }	995 void SetRegExpTooBig() { reg_exp_too_big_ = true; }

983	996

984 inline bool ignore_case() { return ignore_case_; }	997 inline bool ignore_case() { return (flags_ & JSRegExp::kIgnoreCase) != 0; }

	998 inline bool unicode() { return (flags_ & JSRegExp::kUnicode) != 0; }

985 inline bool one_byte() { return one_byte_; }	999 inline bool one_byte() { return one_byte_; }

986 inline bool optimize() { return optimize_; }	1000 inline bool optimize() { return optimize_; }

987 inline void set_optimize(bool value) { optimize_ = value; }	1001 inline void set_optimize(bool value) { optimize_ = value; }

988 inline bool limiting_recursion() { return limiting_recursion_; }	1002 inline bool limiting_recursion() { return limiting_recursion_; }

989 inline void set_limiting_recursion(bool value) {	1003 inline void set_limiting_recursion(bool value) {

990 limiting_recursion_ = value;	1004 limiting_recursion_ = value;

991 }	1005 }

992 bool read_backward() { return read_backward_; }	1006 bool read_backward() { return read_backward_; }

993 void set_read_backward(bool value) { read_backward_ = value; }	1007 void set_read_backward(bool value) { read_backward_ = value; }

994 FrequencyCollator* frequency_collator() { return &frequency_collator_; }	1008 FrequencyCollator* frequency_collator() { return &frequency_collator_; }

995	1009

996 int current_expansion_factor() { return current_expansion_factor_; }	1010 int current_expansion_factor() { return current_expansion_factor_; }

997 void set_current_expansion_factor(int value) {	1011 void set_current_expansion_factor(int value) {

998 current_expansion_factor_ = value;	1012 current_expansion_factor_ = value;

999 }	1013 }

1000	1014

1001 Isolate* isolate() const { return isolate_; }	1015 Isolate* isolate() const { return isolate_; }

1002 Zone* zone() const { return zone_; }	1016 Zone* zone() const { return zone_; }

1003	1017

1004 static const int kNoRegister = -1;	1018 static const int kNoRegister = -1;

1005	1019

1006 private:	1020 private:

1007 EndNode* accept_;	1021 EndNode* accept_;

1008 int next_register_;	1022 int next_register_;

	1023 int unicode_lookaround_stack_register_;

	1024 int unicode_lookaround_position_register_;

1009 List<RegExpNode> work_list_;	1025 List<RegExpNode> work_list_;

1010 int recursion_depth_;	1026 int recursion_depth_;

1011 RegExpMacroAssembler* macro_assembler_;	1027 RegExpMacroAssembler* macro_assembler_;

1012 bool ignore_case_;	1028 JSRegExp::Flags flags_;

1013 bool one_byte_;	1029 bool one_byte_;

1014 bool reg_exp_too_big_;	1030 bool reg_exp_too_big_;

1015 bool limiting_recursion_;	1031 bool limiting_recursion_;

1016 bool optimize_;	1032 bool optimize_;

1017 bool read_backward_;	1033 bool read_backward_;

1018 int current_expansion_factor_;	1034 int current_expansion_factor_;

1019 FrequencyCollator frequency_collator_;	1035 FrequencyCollator frequency_collator_;

1020 Isolate* isolate_;	1036 Isolate* isolate_;

1021 Zone* zone_;	1037 Zone* zone_;

1022 };	1038 };

(...skipping 11 matching lines...) Expand all Loading...
1034	1050

1035	1051

1036 static RegExpEngine::CompilationResult IrregexpRegExpTooBig(Isolate* isolate) {	1052 static RegExpEngine::CompilationResult IrregexpRegExpTooBig(Isolate* isolate) {

1037 return RegExpEngine::CompilationResult(isolate, "RegExp too big");	1053 return RegExpEngine::CompilationResult(isolate, "RegExp too big");

1038 }	1054 }

1039	1055

1040	1056

1041 // Attempts to compile the regexp using an Irregexp code generator. Returns	1057 // Attempts to compile the regexp using an Irregexp code generator. Returns

1042 // a fixed array or a null handle depending on whether it succeeded.	1058 // a fixed array or a null handle depending on whether it succeeded.

1043 RegExpCompiler::RegExpCompiler(Isolate* isolate, Zone* zone, int capture_count,	1059 RegExpCompiler::RegExpCompiler(Isolate* isolate, Zone* zone, int capture_count,

1044 bool ignore_case, bool one_byte)	1060 JSRegExp::Flags flags, bool one_byte)

1045 : next_register_(2 * (capture_count + 1)),	1061 : next_register_(2 * (capture_count + 1)),

	1062 unicode_lookaround_stack_register_(kNoRegister),

	1063 unicode_lookaround_position_register_(kNoRegister),

1046 work_list_(NULL),	1064 work_list_(NULL),

1047 recursion_depth_(0),	1065 recursion_depth_(0),

1048 ignore_case_(ignore_case),	1066 flags_(flags),

1049 one_byte_(one_byte),	1067 one_byte_(one_byte),

1050 reg_exp_too_big_(false),	1068 reg_exp_too_big_(false),

1051 limiting_recursion_(false),	1069 limiting_recursion_(false),

1052 optimize_(FLAG_regexp_optimization),	1070 optimize_(FLAG_regexp_optimization),

1053 read_backward_(false),	1071 read_backward_(false),

1054 current_expansion_factor_(1),	1072 current_expansion_factor_(1),

1055 frequency_collator_(),	1073 frequency_collator_(),

1056 isolate_(isolate),	1074 isolate_(isolate),

1057 zone_(zone) {	1075 zone_(zone) {

1058 accept_ = new(zone) EndNode(EndNode::ACCEPT, zone);	1076 accept_ = new(zone) EndNode(EndNode::ACCEPT, zone);

(...skipping 1032 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2091 flip ? even_label : odd_label);	2109 flip ? even_label : odd_label);

2092 }	2110 }

2093 }	2111 }

2094	2112

2095	2113

2096 static void EmitCharClass(RegExpMacroAssembler* macro_assembler,	2114 static void EmitCharClass(RegExpMacroAssembler* macro_assembler,

2097 RegExpCharacterClass* cc, bool one_byte,	2115 RegExpCharacterClass* cc, bool one_byte,

2098 Label* on_failure, int cp_offset, bool check_offset,	2116 Label* on_failure, int cp_offset, bool check_offset,

2099 bool preloaded, Zone* zone) {	2117 bool preloaded, Zone* zone) {

2100 ZoneList<CharacterRange>* ranges = cc->ranges(zone);	2118 ZoneList<CharacterRange>* ranges = cc->ranges(zone);

2101 if (!CharacterRange::IsCanonical(ranges)) {	2119 CharacterRange::Canonicalize(ranges);

2102 CharacterRange::Canonicalize(ranges);

2103 }

2104	2120

2105 int max_char;	2121 int max_char;

2106 if (one_byte) {	2122 if (one_byte) {

2107 max_char = String::kMaxOneByteCharCode;	2123 max_char = String::kMaxOneByteCharCode;

2108 } else {	2124 } else {

2109 max_char = String::kMaxUtf16CodeUnit;	2125 max_char = String::kMaxUtf16CodeUnit;

2110 }	2126 }

2111	2127

2112 int range_count = ranges->length();	2128 int range_count = ranges->length();

2113	2129

(...skipping 21 matching lines...) Expand all Loading...
2135 if (cc->is_negated()) {	2151 if (cc->is_negated()) {

2136 macro_assembler->GoTo(on_failure);	2152 macro_assembler->GoTo(on_failure);

2137 } else {	2153 } else {

2138 // This is a common case hit by non-anchored expressions.	2154 // This is a common case hit by non-anchored expressions.

2139 if (check_offset) {	2155 if (check_offset) {

2140 macro_assembler->CheckPosition(cp_offset, on_failure);	2156 macro_assembler->CheckPosition(cp_offset, on_failure);

2141 }	2157 }

2142 }	2158 }

2143 return;	2159 return;

2144 }	2160 }

2145 if (last_valid_range == 0 &&

2146 !cc->is_negated() &&

2147 ranges->at(0).IsEverything(max_char)) {

2148 // This is a common case hit by non-anchored expressions.

2149 if (check_offset) {

2150 macro_assembler->CheckPosition(cp_offset, on_failure);

2151 }

2152 return;

2153 }

2154	2161

2155 if (!preloaded) {	2162 if (!preloaded) {

2156 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset);	2163 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset);

2157 }	2164 }

2158	2165

2159 if (cc->is_standard(zone) &&	2166 if (cc->is_standard(zone) &&

2160 macro_assembler->CheckSpecialCharacterClass(cc->standard_type(),	2167 macro_assembler->CheckSpecialCharacterClass(cc->standard_type(),

2161 on_failure)) {	2168 on_failure)) {

2162 return;	2169 return;

2163 }	2170 }

2164	2171

2165	2172

2166 // A new list with ascending entries. Each entry is a code unit	2173 // A new list with ascending entries. Each entry is a code unit

2167 // where there is a boundary between code units that are part of	2174 // where there is a boundary between code units that are part of

2168 // the class and code units that are not. Normally we insert an	2175 // the class and code units that are not. Normally we insert an

2169 // entry at zero which goes to the failure label, but if there	2176 // entry at zero which goes to the failure label, but if there

2170 // was already one there we fall through for success on that entry.	2177 // was already one there we fall through for success on that entry.

2171 // Subsequent entries have alternating meaning (success/failure).	2178 // Subsequent entries have alternating meaning (success/failure).

(...skipping 619 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2791 // Character is outside Latin-1 completely	2798 // Character is outside Latin-1 completely

2792 if (converted == 0) return set_replacement(NULL);	2799 if (converted == 0) return set_replacement(NULL);

2793 // Convert quark to Latin-1 in place.	2800 // Convert quark to Latin-1 in place.

2794 uint16_t* copy = const_cast<uint16_t*>(quarks.start());	2801 uint16_t* copy = const_cast<uint16_t*>(quarks.start());

2795 copy[j] = converted;	2802 copy[j] = converted;

2796 }	2803 }

2797 } else {	2804 } else {

2798 DCHECK(elm.text_type() == TextElement::CHAR_CLASS);	2805 DCHECK(elm.text_type() == TextElement::CHAR_CLASS);

2799 RegExpCharacterClass* cc = elm.char_class();	2806 RegExpCharacterClass* cc = elm.char_class();

2800 ZoneList<CharacterRange>* ranges = cc->ranges(zone());	2807 ZoneList<CharacterRange>* ranges = cc->ranges(zone());

2801 if (!CharacterRange::IsCanonical(ranges)) {	2808 CharacterRange::Canonicalize(ranges);

2802 CharacterRange::Canonicalize(ranges);

2803 }

2804 // Now they are in order so we only need to look at the first.	2809 // Now they are in order so we only need to look at the first.

2805 int range_count = ranges->length();	2810 int range_count = ranges->length();

2806 if (cc->is_negated()) {	2811 if (cc->is_negated()) {

2807 if (range_count != 0 &&	2812 if (range_count != 0 &&

2808 ranges->at(0).from() == 0 &&	2813 ranges->at(0).from() == 0 &&

2809 ranges->at(0).to() >= String::kMaxOneByteCharCode) {	2814 ranges->at(0).to() >= String::kMaxOneByteCharCode) {

2810 // This will be handled in a later filter.	2815 // This will be handled in a later filter.

2811 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue;	2816 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue;

2812 return set_replacement(NULL);	2817 return set_replacement(NULL);

2813 }	2818 }

(...skipping 468 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3282 bool TextNode::SkipPass(int int_pass, bool ignore_case) {	3287 bool TextNode::SkipPass(int int_pass, bool ignore_case) {

3283 TextEmitPassType pass = static_cast<TextEmitPassType>(int_pass);	3288 TextEmitPassType pass = static_cast<TextEmitPassType>(int_pass);

3284 if (ignore_case) {	3289 if (ignore_case) {

3285 return pass == SIMPLE_CHARACTER_MATCH;	3290 return pass == SIMPLE_CHARACTER_MATCH;

3286 } else {	3291 } else {

3287 return pass == NON_LETTER_CHARACTER_MATCH \|\| pass == CASE_CHARACTER_MATCH;	3292 return pass == NON_LETTER_CHARACTER_MATCH \|\| pass == CASE_CHARACTER_MATCH;

3288 }	3293 }

3289 }	3294 }

3290	3295

3291	3296

	3297 TextNode* TextNode::CreateForCharacterRanges(Zone* zone,

	3298 ZoneList<CharacterRange>* ranges,

	3299 bool read_backward,

	3300 RegExpNode* on_success) {

	3301 DCHECK_NOT_NULL(ranges);

	3302 ZoneList<TextElement>* elms = new (zone) ZoneList<TextElement>(1, zone);

	3303 elms->Add(

	3304 TextElement::CharClass(new (zone) RegExpCharacterClass(ranges, false)),

	3305 zone);

	3306 return new (zone) TextNode(elms, read_backward, on_success);

	3307 }

	3308

	3309

	3310 TextNode* TextNode::CreateForSurrogatePair(Zone* zone, CharacterRange lead,

	3311 CharacterRange trail,

	3312 bool read_backward,

	3313 RegExpNode* on_success) {

	3314 ZoneList<CharacterRange>* lead_ranges = CharacterRange::List(zone, lead);

	3315 ZoneList<CharacterRange>* trail_ranges = CharacterRange::List(zone, trail);

	3316 ZoneList<TextElement>* elms = new (zone) ZoneList<TextElement>(2, zone);

	3317 elms->Add(TextElement::CharClass(

	3318 new (zone) RegExpCharacterClass(lead_ranges, false)),

	3319 zone);

	3320 elms->Add(TextElement::CharClass(

	3321 new (zone) RegExpCharacterClass(trail_ranges, false)),

	3322 zone);

	3323 return new (zone) TextNode(elms, read_backward, on_success);

	3324 }

	3325

	3326

3292 // This generates the code to match a text node. A text node can contain	3327 // This generates the code to match a text node. A text node can contain

3293 // straight character sequences (possibly to be matched in a case-independent	3328 // straight character sequences (possibly to be matched in a case-independent

3294 // way) and character classes. For efficiency we do not do this in a single	3329 // way) and character classes. For efficiency we do not do this in a single

3295 // pass from left to right. Instead we pass over the text node several times,	3330 // pass from left to right. Instead we pass over the text node several times,

3296 // emitting code for some character positions every time. See the comment on	3331 // emitting code for some character positions every time. See the comment on

3297 // TextEmitPass for details.	3332 // TextEmitPass for details.

3298 void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) {	3333 void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) {

3299 LimitResult limit_result = LimitVersions(compiler, trace);	3334 LimitResult limit_result = LimitVersions(compiler, trace);

3300 if (limit_result == DONE) return;	3335 if (limit_result == DONE) return;

3301 DCHECK(limit_result == CONTINUE);	3336 DCHECK(limit_result == CONTINUE);

(...skipping 96 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3398	3433

3399	3434

3400 RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode(	3435 RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode(

3401 RegExpCompiler* compiler) {	3436 RegExpCompiler* compiler) {

3402 if (read_backward()) return NULL;	3437 if (read_backward()) return NULL;

3403 if (elements()->length() != 1) return NULL;	3438 if (elements()->length() != 1) return NULL;

3404 TextElement elm = elements()->at(0);	3439 TextElement elm = elements()->at(0);

3405 if (elm.text_type() != TextElement::CHAR_CLASS) return NULL;	3440 if (elm.text_type() != TextElement::CHAR_CLASS) return NULL;

3406 RegExpCharacterClass* node = elm.char_class();	3441 RegExpCharacterClass* node = elm.char_class();

3407 ZoneList<CharacterRange>* ranges = node->ranges(zone());	3442 ZoneList<CharacterRange>* ranges = node->ranges(zone());

3408 if (!CharacterRange::IsCanonical(ranges)) {	3443 CharacterRange::Canonicalize(ranges);

3409 CharacterRange::Canonicalize(ranges);

3410 }

3411 if (node->is_negated()) {	3444 if (node->is_negated()) {

3412 return ranges->length() == 0 ? on_success() : NULL;	3445 return ranges->length() == 0 ? on_success() : NULL;

3413 }	3446 }

3414 if (ranges->length() != 1) return NULL;	3447 if (ranges->length() != 1) return NULL;

3415 uint32_t max_char;	3448 uint32_t max_char;

3416 if (compiler->one_byte()) {	3449 if (compiler->one_byte()) {

3417 max_char = String::kMaxOneByteCharCode;	3450 max_char = String::kMaxOneByteCharCode;

3418 } else {	3451 } else {

3419 max_char = String::kMaxUtf16CodeUnit;	3452 max_char = String::kMaxUtf16CodeUnit;

3420 }	3453 }

(...skipping 126 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3547 return alt_gens_[i];	3580 return alt_gens_[i];

3548 }	3581 }

3549	3582

3550 private:	3583 private:

3551 static const int kAFew = 10;	3584 static const int kAFew = 10;

3552 ZoneList<AlternativeGeneration*> alt_gens_;	3585 ZoneList<AlternativeGeneration*> alt_gens_;

3553 AlternativeGeneration a_few_alt_gens_[kAFew];	3586 AlternativeGeneration a_few_alt_gens_[kAFew];

3554 };	3587 };

3555	3588

3556	3589

	3590 static const uc32 kLeadSurrogateStart = 0xd800;

	3591 static const uc32 kLeadSurrogateEnd = 0xdbff;

	3592 static const uc32 kTrailSurrogateStart = 0xdc00;

	3593 static const uc32 kTrailSurrogateEnd = 0xdfff;

	3594 static const uc32 kNonBmpStart = 0x10000;

	3595 static const uc32 kNonBmpEnd = 0x10ffff;

	3596 static const uc32 kRangeEndMarker = 0x110000;

	3597

3557 // The '2' variant is has inclusive from and exclusive to.	3598 // The '2' variant is has inclusive from and exclusive to.

3558 // This covers \s as defined in ECMA-262 5.1, 15.10.2.12,	3599 // This covers \s as defined in ECMA-262 5.1, 15.10.2.12,

3559 // which include WhiteSpace (7.2) or LineTerminator (7.3) values.	3600 // which include WhiteSpace (7.2) or LineTerminator (7.3) values.

3560 static const int kSpaceRanges[] = { '\t', '\r' + 1, ' ', ' ' + 1,	3601 static const int kSpaceRanges[] = {

3561 0x00A0, 0x00A1, 0x1680, 0x1681, 0x180E, 0x180F, 0x2000, 0x200B,	3602 '\t', '\r' + 1, ' ', ' ' + 1, 0x00A0, 0x00A1, 0x1680, 0x1681,

3562 0x2028, 0x202A, 0x202F, 0x2030, 0x205F, 0x2060, 0x3000, 0x3001,	3603 0x180E, 0x180F, 0x2000, 0x200B, 0x2028, 0x202A, 0x202F, 0x2030,

3563 0xFEFF, 0xFF00, 0x10000 };	3604 0x205F, 0x2060, 0x3000, 0x3001, 0xFEFF, 0xFF00, kRangeEndMarker};

3564 static const int kSpaceRangeCount = arraysize(kSpaceRanges);	3605 static const int kSpaceRangeCount = arraysize(kSpaceRanges);

3565	3606

3566 static const int kWordRanges[] = {	3607 static const int kWordRanges[] = {

3567 '0', '9' + 1, 'A', 'Z' + 1, '_', '_' + 1, 'a', 'z' + 1, 0x10000 };	3608 '0', '9' + 1, 'A', 'Z' + 1, '_', '_' + 1, 'a', 'z' + 1, kRangeEndMarker};

3568 static const int kWordRangeCount = arraysize(kWordRanges);	3609 static const int kWordRangeCount = arraysize(kWordRanges);

3569 static const int kDigitRanges[] = { '0', '9' + 1, 0x10000 };	3610 static const int kDigitRanges[] = {'0', '9' + 1, kRangeEndMarker};

3570 static const int kDigitRangeCount = arraysize(kDigitRanges);	3611 static const int kDigitRangeCount = arraysize(kDigitRanges);

3571 static const int kSurrogateRanges[] = { 0xd800, 0xe000, 0x10000 };	3612 static const int kSurrogateRanges[] = {

	3613 kLeadSurrogateStart, kLeadSurrogateStart + 1, kRangeEndMarker};

3572 static const int kSurrogateRangeCount = arraysize(kSurrogateRanges);	3614 static const int kSurrogateRangeCount = arraysize(kSurrogateRanges);

3573 static const int kLineTerminatorRanges[] = { 0x000A, 0x000B, 0x000D, 0x000E,	3615 static const int kLineTerminatorRanges[] = {

3574 0x2028, 0x202A, 0x10000 };	3616 0x000A, 0x000B, 0x000D, 0x000E, 0x2028, 0x202A, kRangeEndMarker};

3575 static const int kLineTerminatorRangeCount = arraysize(kLineTerminatorRanges);	3617 static const int kLineTerminatorRangeCount = arraysize(kLineTerminatorRanges);

3576	3618

3577

3578 void BoyerMoorePositionInfo::Set(int character) {	3619 void BoyerMoorePositionInfo::Set(int character) {

3579 SetInterval(Interval(character, character));	3620 SetInterval(Interval(character, character));

3580 }	3621 }

3581	3622

3582	3623

3583 void BoyerMoorePositionInfo::SetInterval(const Interval& interval) {	3624 void BoyerMoorePositionInfo::SetInterval(const Interval& interval) {

3584 s_ = AddRange(s_, kSpaceRanges, kSpaceRangeCount, interval);	3625 s_ = AddRange(s_, kSpaceRanges, kSpaceRangeCount, interval);

3585 w_ = AddRange(w_, kWordRanges, kWordRangeCount, interval);	3626 w_ = AddRange(w_, kWordRanges, kWordRangeCount, interval);

3586 d_ = AddRange(d_, kDigitRanges, kDigitRangeCount, interval);	3627 d_ = AddRange(d_, kDigitRanges, kDigitRangeCount, interval);

3587 surrogate_ =	3628 surrogate_ =

(...skipping 1137 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4725 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler,	4766 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler,

4726 RegExpNode* on_success) {	4767 RegExpNode* on_success) {

4727 return new (compiler->zone())	4768 return new (compiler->zone())

4728 TextNode(elements(), compiler->read_backward(), on_success);	4769 TextNode(elements(), compiler->read_backward(), on_success);

4729 }	4770 }

4730	4771

4731	4772

4732 static bool CompareInverseRanges(ZoneList<CharacterRange>* ranges,	4773 static bool CompareInverseRanges(ZoneList<CharacterRange>* ranges,

4733 const int* special_class,	4774 const int* special_class,

4734 int length) {	4775 int length) {

4735 length--; // Remove final 0x10000.	4776 length--; // Remove final marker.

4736 DCHECK(special_class[length] == 0x10000);	4777 DCHECK(special_class[length] == kRangeEndMarker);

4737 DCHECK(ranges->length() != 0);	4778 DCHECK(ranges->length() != 0);

4738 DCHECK(length != 0);	4779 DCHECK(length != 0);

4739 DCHECK(special_class[0] != 0);	4780 DCHECK(special_class[0] != 0);

4740 if (ranges->length() != (length >> 1) + 1) {	4781 if (ranges->length() != (length >> 1) + 1) {

4741 return false;	4782 return false;

4742 }	4783 }

4743 CharacterRange range = ranges->at(0);	4784 CharacterRange range = ranges->at(0);

4744 if (range.from() != 0) {	4785 if (range.from() != 0) {

4745 return false;	4786 return false;

4746 }	4787 }

4747 for (int i = 0; i < length; i += 2) {	4788 for (int i = 0; i < length; i += 2) {

4748 if (special_class[i] != (range.to() + 1)) {	4789 if (special_class[i] != (range.to() + 1)) {

4749 return false;	4790 return false;

4750 }	4791 }

4751 range = ranges->at((i >> 1) + 1);	4792 range = ranges->at((i >> 1) + 1);

4752 if (special_class[i+1] != range.from()) {	4793 if (special_class[i+1] != range.from()) {

4753 return false;	4794 return false;

4754 }	4795 }

4755 }	4796 }

4756 if (range.to() != 0xffff) {	4797 if (range.to() != 0xffff) {

4757 return false;	4798 return false;

4758 }	4799 }

4759 return true;	4800 return true;

4760 }	4801 }

4761	4802

4762	4803

4763 static bool CompareRanges(ZoneList<CharacterRange>* ranges,	4804 static bool CompareRanges(ZoneList<CharacterRange>* ranges,

4764 const int* special_class,	4805 const int* special_class,

4765 int length) {	4806 int length) {

4766 length--; // Remove final 0x10000.	4807 length--; // Remove final marker.

4767 DCHECK(special_class[length] == 0x10000);	4808 DCHECK(special_class[length] == kRangeEndMarker);

4768 if (ranges->length() * 2 != length) {	4809 if (ranges->length() * 2 != length) {

4769 return false;	4810 return false;

4770 }	4811 }

4771 for (int i = 0; i < length; i += 2) {	4812 for (int i = 0; i < length; i += 2) {

4772 CharacterRange range = ranges->at(i >> 1);	4813 CharacterRange range = ranges->at(i >> 1);

4773 if (range.from() != special_class[i] \|\|	4814 if (range.from() != special_class[i] \|\|

4774 range.to() != special_class[i + 1] - 1) {	4815 range.to() != special_class[i + 1] - 1) {

4775 return false;	4816 return false;

4776 }	4817 }

4777 }	4818 }

(...skipping 35 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4813 return true;	4854 return true;

4814 }	4855 }

4815 if (CompareInverseRanges(set_.ranges(zone), kWordRanges, kWordRangeCount)) {	4856 if (CompareInverseRanges(set_.ranges(zone), kWordRanges, kWordRangeCount)) {

4816 set_.set_standard_set_type('W');	4857 set_.set_standard_set_type('W');

4817 return true;	4858 return true;

4818 }	4859 }

4819 return false;	4860 return false;

4820 }	4861 }

4821	4862

4822	4863

	4864 bool RegExpCharacterClass::NeedsDesugaringForUnicode(Zone* zone) {

	4865 ZoneList<CharacterRange>* ranges = this->ranges(zone);

	4866 CharacterRange::Canonicalize(ranges);

	4867 for (int i = ranges->length() - 1; i >= 0; i--) {

	4868 uc32 from = ranges->at(i).from();

	4869 uc32 to = ranges->at(i).to();

	4870 // Check for non-BMP characters.

	4871 if (to >= kNonBmpStart) return true;

	4872 // Check for lone surrogates.

	4873 if (from <= kTrailSurrogateEnd && to >= kLeadSurrogateStart) return true;

	4874 }

	4875 return false;

	4876 }

	4877

	4878

	4879 UnicodeRangeSplitter::UnicodeRangeSplitter(Zone* zone,

	4880 ZoneList<CharacterRange>* base)

	4881 : zone_(zone),

	4882 table_(zone),

	4883 bmp_(nullptr),

	4884 lead_surrogates_(nullptr),

	4885 trail_surrogates_(nullptr),

	4886 non_bmp_(nullptr) {

	4887 // The unicode range splitter categorizes given character ranges into:

	4888 // - Code points from the BMP representable by one code unit.

	4889 // - Code points outside the BMP that need to be split into surrogate pairs.

	4890 // - Lone lead surrogates.

	4891 // - Lone trail surrogates.

	4892 // Lone surrogates are valid code points, even though no actual characters.

	4893 // They require special matching to make sure we do not split surrogate pairs.

	4894 // We use the dispatch table to accomplish this. The base range is split up

	4895 // by the table by the overlay ranges, and the Call callback is used to

	4896 // filter and collect ranges for each category.

	4897 for (int i = 0; i < base->length(); i++) {

	4898 table_.AddRange(base->at(i), kBase, zone_);

	4899 }

	4900 // Add overlay ranges.

	4901 table_.AddRange(CharacterRange(0, kLeadSurrogateStart - 1), kBmpCodePoints,

	4902 zone_);

	4903 table_.AddRange(CharacterRange(kLeadSurrogateStart, kLeadSurrogateEnd),

	4904 kLeadSurrogates, zone_);

	4905 table_.AddRange(CharacterRange(kTrailSurrogateStart, kTrailSurrogateEnd),

	4906 kTrailSurrogates, zone_);

	4907 table_.AddRange(CharacterRange(kTrailSurrogateEnd, kNonBmpStart - 1),

	4908 kBmpCodePoints, zone_);

	4909 table_.AddRange(CharacterRange(kNonBmpStart, kNonBmpEnd), kNonBmpCodePoints,

	4910 zone_);

	4911 table_.ForEach(this);

	4912 }

	4913

	4914

	4915 void UnicodeRangeSplitter::Call(uc32 from, DispatchTable::Entry entry) {

	4916 OutSet* outset = entry.out_set();

	4917 if (!outset->Get(kBase)) return;

	4918 ZoneList<CharacterRange>** target = NULL;

	4919 if (outset->Get(kBmpCodePoints)) {

	4920 target = &bmp_;

	4921 } else if (outset->Get(kLeadSurrogates)) {

	4922 target = &lead_surrogates_;

	4923 } else if (outset->Get(kTrailSurrogates)) {

	4924 target = &trail_surrogates_;

	4925 } else {

	4926 DCHECK(outset->Get(kNonBmpCodePoints));

	4927 target = &non_bmp_;

	4928 }

	4929 if (target == NULL) target = new (zone_) ZoneList<CharacterRange>(2, zone_);

	4930 (*target)->Add(CharacterRange::Range(entry.from(), entry.to()), zone_);

	4931 }

	4932

	4933

	4934 void AddBmpCharacters(RegExpCompiler* compiler, ChoiceNode* result,

	4935 RegExpNode* on_success, UnicodeRangeSplitter* splitter) {

	4936 ZoneList<CharacterRange>* bmp = splitter->bmp();

	4937 if (bmp == nullptr) return;

	4938 result->AddAlternative(GuardedAlternative(TextNode::CreateForCharacterRanges(

	4939 compiler->zone(), bmp, compiler->read_backward(), on_success)));

	4940 }

	4941

	4942

	4943 void AddNonBmpSurrogatePairs(RegExpCompiler* compiler, ChoiceNode* result,

	4944 RegExpNode* on_success,

	4945 UnicodeRangeSplitter* splitter) {

	4946 ZoneList<CharacterRange>* non_bmp = splitter->non_bmp();

	4947 if (non_bmp == nullptr) return;

	4948 DCHECK(compiler->unicode());

	4949 DCHECK(!compiler->one_byte());

	4950 Zone* zone = compiler->zone();

	4951 CharacterRange::Canonicalize(non_bmp);

	4952 for (int i = 0; i < non_bmp->length(); i++) {

	4953 // Match surrogate pair.

	4954 // E.g. [\u10005-\u11005] becomes

	4955 // \ud800[\udc05-\udfff]\|

	4956 // [\ud801-\ud803][\udc00-\udfff]\|

	4957 // \ud804[\udc00-\udc05]

	4958 uc32 from = non_bmp->at(i).from();

	4959 uc32 to = non_bmp->at(i).to();

	4960 uc16 from_l = unibrow::Utf16::LeadSurrogate(from);

	4961 uc16 from_t = unibrow::Utf16::TrailSurrogate(from);

	4962 uc16 to_l = unibrow::Utf16::LeadSurrogate(to);

	4963 uc16 to_t = unibrow::Utf16::TrailSurrogate(to);

	4964 if (from_l == to_l) {

	4965 // The lead surrogate is the same.

	4966 result->AddAlternative(

	4967 GuardedAlternative(TextNode::CreateForSurrogatePair(

	4968 zone, CharacterRange::Singleton(from_l),

	4969 CharacterRange::Range(from_t, to_t), compiler->read_backward(),

	4970 on_success)));

	4971 } else {

	4972 if (from_t != kTrailSurrogateStart) {

	4973 // Add [from_l][from_t-\udfff]

	4974 result->AddAlternative(

	4975 GuardedAlternative(TextNode::CreateForSurrogatePair(

	4976 zone, CharacterRange::Singleton(from_l),

	4977 CharacterRange::Range(from_t, kTrailSurrogateEnd),

	4978 compiler->read_backward(), on_success)));

	4979 from_l++;

	4980 }

	4981 if (to_t != kTrailSurrogateEnd) {

	4982 // Add [to_l][\udc00-to_t]

	4983 result->AddAlternative(

	4984 GuardedAlternative(TextNode::CreateForSurrogatePair(

	4985 zone, CharacterRange::Singleton(to_l),

	4986 CharacterRange::Range(kTrailSurrogateStart, to_t),

	4987 compiler->read_backward(), on_success)));

	4988 to_l--;

	4989 }

	4990 if (from_l <= to_l) {

	4991 // Add [from_l-to_l][\udc00-\udfff]

	4992 result->AddAlternative(

	4993 GuardedAlternative(TextNode::CreateForSurrogatePair(

	4994 zone, CharacterRange::Range(from_l, to_l),

	4995 CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd),

	4996 compiler->read_backward(), on_success)));

	4997 }

	4998 }

	4999 }

	5000 }

	5001

	5002

	5003 RegExpNode* NegativeLookaroundAgainstReadDirectionAndMatch(

	5004 RegExpCompiler* compiler, ZoneList<CharacterRange>* lookbehind,

	5005 ZoneList<CharacterRange>* match, RegExpNode* on_success,

	5006 bool read_backward) {

	5007 Zone* zone = compiler->zone();

	5008 RegExpNode* match_node = TextNode::CreateForCharacterRanges(

	5009 zone, match, read_backward, on_success);

	5010 int stack_register = compiler->UnicodeLookaroundStackRegister();

	5011 int position_register = compiler->UnicodeLookaroundPositionRegister();

	5012 RegExpLookaround::Builder lookaround(false, match_node, stack_register,

	5013 position_register);

	5014 RegExpNode* negative_match = TextNode::CreateForCharacterRanges(

	5015 zone, lookbehind, !read_backward, lookaround.on_match_success());

	5016 return lookaround.ForMatch(negative_match);

	5017 }

	5018

	5019

	5020 RegExpNode* MatchAndNegativeLookaroundInReadDirection(

	5021 RegExpCompiler* compiler, ZoneList<CharacterRange>* match,

	5022 ZoneList<CharacterRange>* lookahead, RegExpNode* on_success,

	5023 bool read_backward) {

	5024 Zone* zone = compiler->zone();

	5025 int stack_register = compiler->UnicodeLookaroundStackRegister();

	5026 int position_register = compiler->UnicodeLookaroundPositionRegister();

	5027 RegExpLookaround::Builder lookaround(false, on_success, stack_register,

	5028 position_register);

	5029 RegExpNode* negative_match = TextNode::CreateForCharacterRanges(

	5030 zone, lookahead, read_backward, lookaround.on_match_success());

	5031 return TextNode::CreateForCharacterRanges(

	5032 zone, match, read_backward, lookaround.ForMatch(negative_match));

	5033 }

	5034

	5035

	5036 void AddLoneLeadSurrogates(RegExpCompiler* compiler, ChoiceNode* result,

	5037 RegExpNode* on_success,

	5038 UnicodeRangeSplitter* splitter) {

	5039 ZoneList<CharacterRange>* lead_surrogates = splitter->lead_surrogates();

	5040 if (lead_surrogates == nullptr) return;

	5041 Zone* zone = compiler->zone();

	5042 // E.g. \ud801 becomes \ud801(?![\udc00-\udfff]).

	5043 ZoneList<CharacterRange>* trail_surrogates =

	5044 new (zone) ZoneList<CharacterRange>(1, zone);

	5045 trail_surrogates->Add(

	5046 CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd), zone);

	5047

	5048 RegExpNode* match =

	5049 compiler->read_backward()

	5050 // Reading backward. Assert that reading forward, there is no trail

	5051 // surrogate, and then backward match the lead surrogate.

	5052 ? NegativeLookaroundAgainstReadDirectionAndMatch(

	5053 compiler, trail_surrogates, lead_surrogates, on_success, true)

	5054 // Reading forward. Forwrad match the lead surrogate and assert that

	5055 // no

	5056 // trail surrogate follows.

	5057 : MatchAndNegativeLookaroundInReadDirection(

	5058 compiler, lead_surrogates, trail_surrogates, on_success, false);

	5059 result->AddAlternative(GuardedAlternative(match));

	5060 }

	5061

	5062

	5063 void AddLoneTrailSurrogates(RegExpCompiler* compiler, ChoiceNode* result,

	5064 RegExpNode* on_success,

	5065 UnicodeRangeSplitter* splitter) {

	5066 ZoneList<CharacterRange>* trail_surrogates = splitter->trail_surrogates();

	5067 if (trail_surrogates == nullptr) return;

	5068 Zone* zone = compiler->zone();

	5069 // E.g. \udc01 becomes (?<![\ud800-\udbff])\udc01

	5070 ZoneList<CharacterRange>* lead_surrogates =

	5071 new (zone) ZoneList<CharacterRange>(1, zone);

	5072 lead_surrogates->Add(

	5073 CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd), zone);

	5074

	5075 RegExpNode* match =

	5076 compiler->read_backward()

	5077 // Reading backward. Backward match the trail surrogate and assert

	5078 // that no lead surrogate precedes it.

	5079 ? MatchAndNegativeLookaroundInReadDirection(

	5080 compiler, trail_surrogates, lead_surrogates, on_success, true)

	5081 // Reading forward. Assert that reading backward, there is no lead

	5082 // surrogate, and then forward match the trail surrogate.

	5083 : NegativeLookaroundAgainstReadDirectionAndMatch(

	5084 compiler, lead_surrogates, trail_surrogates, on_success, false);

	5085 result->AddAlternative(GuardedAlternative(match));

	5086 }

	5087

	5088

4823 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,	5089 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,

4824 RegExpNode* on_success) {	5090 RegExpNode* on_success) {

4825 return new (compiler->zone())	5091 set_.Canonicalize();

4826 TextNode(this, compiler->read_backward(), on_success);	5092 Zone* zone = compiler->zone();

4827 }	5093 ZoneList<CharacterRange>* ranges = this->ranges(zone);

4828	5094 if (compiler->unicode() && !compiler->one_byte()) {

4829	5095 if (is_negated()) {

	5096 ZoneList<CharacterRange>* negated =

	5097 new (zone) ZoneList<CharacterRange>(2, zone);

	5098 CharacterRange::Negate(ranges, negated, zone);

	5099 ranges = negated;

	5100 }

	5101 if (ranges->length() == 0) {

	5102 // No matches possible.

	5103 return new (zone) EndNode(EndNode::BACKTRACK, zone);

	5104 }

	5105 UnicodeRangeSplitter splitter(zone, ranges);

	5106 ChoiceNode* result = new (compiler->zone()) ChoiceNode(2, compiler->zone());

	5107 AddBmpCharacters(compiler, result, on_success, &splitter);

	5108 AddNonBmpSurrogatePairs(compiler, result, on_success, &splitter);

	5109 AddLoneLeadSurrogates(compiler, result, on_success, &splitter);

	5110 AddLoneTrailSurrogates(compiler, result, on_success, &splitter);

	5111 return result;

	5112 } else {

	5113 return new (zone) TextNode(this, compiler->read_backward(), on_success);

	5114 }

	5115 }

	5116

	5117

4830 int CompareFirstChar(RegExpTree* const* a, RegExpTree* const* b) {	5118 int CompareFirstChar(RegExpTree* const* a, RegExpTree* const* b) {

4831 RegExpAtom* atom1 = (*a)->AsAtom();	5119 RegExpAtom* atom1 = (*a)->AsAtom();

4832 RegExpAtom* atom2 = (*b)->AsAtom();	5120 RegExpAtom* atom2 = (*b)->AsAtom();

4833 uc16 character1 = atom1->data().at(0);	5121 uc16 character1 = atom1->data().at(0);

4834 uc16 character2 = atom2->data().at(0);	5122 uc16 character2 = atom2->data().at(0);

4835 if (character1 < character2) return -1;	5123 if (character1 < character2) return -1;

4836 if (character1 > character2) return 1;	5124 if (character1 > character2) return 1;

4837 return 0;	5125 return 0;

4838 }	5126 }

4839	5127

(...skipping 491 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5331 compiler->read_backward(), on_success);	5619 compiler->read_backward(), on_success);

5332 }	5620 }

5333	5621

5334	5622

5335 RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler,	5623 RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler,

5336 RegExpNode* on_success) {	5624 RegExpNode* on_success) {

5337 return on_success;	5625 return on_success;

5338 }	5626 }

5339	5627

5340	5628

	5629 RegExpLookaround::Builder::Builder(bool is_positive, RegExpNode* on_success,

	5630 int stack_pointer_register,

	5631 int position_register,

	5632 int capture_register_count,

	5633 int capture_register_start)

	5634 : is_positive_(is_positive),

	5635 on_success_(on_success),

	5636 stack_pointer_register_(stack_pointer_register),

	5637 position_register_(position_register) {

	5638 if (is_positive_) {

	5639 on_match_success_ = ActionNode::PositiveSubmatchSuccess(

	5640 stack_pointer_register, position_register, capture_register_count,

	5641 capture_register_start, on_success_);

	5642 } else {

	5643 Zone* zone = on_success_->zone();

	5644 on_match_success_ = new (zone) NegativeSubmatchSuccess(

	5645 stack_pointer_register, position_register, capture_register_count,

	5646 capture_register_start, zone);

	5647 }

	5648 }

	5649

	5650

	5651 RegExpNode* RegExpLookaround::Builder::ForMatch(RegExpNode* match) {

	5652 if (is_positive_) {

	5653 return ActionNode::BeginSubmatch(stack_pointer_register_,

	5654 position_register_, match);

	5655 } else {

	5656 Zone* zone = on_success_->zone();

	5657 // We use a ChoiceNode to represent the negative lookaround. The first

	5658 // alternative is the negative match. On success, the end node backtracks.

	5659 // On failure, the second alternative is tried and leads to success.

	5660 // NegativeLookaheadChoiceNode is a special ChoiceNode that ignores the

	5661 // first exit when calculating quick checks.

	5662 ChoiceNode* choice_node = new (zone) NegativeLookaroundChoiceNode(

	5663 GuardedAlternative(match), GuardedAlternative(on_success_), zone);

	5664 return ActionNode::BeginSubmatch(stack_pointer_register_,

	5665 position_register_, choice_node);

	5666 }

	5667 }

	5668

	5669

5341 RegExpNode* RegExpLookaround::ToNode(RegExpCompiler* compiler,	5670 RegExpNode* RegExpLookaround::ToNode(RegExpCompiler* compiler,

5342 RegExpNode* on_success) {	5671 RegExpNode* on_success) {

5343 int stack_pointer_register = compiler->AllocateRegister();	5672 int stack_pointer_register = compiler->AllocateRegister();

5344 int position_register = compiler->AllocateRegister();	5673 int position_register = compiler->AllocateRegister();

5345	5674

5346 const int registers_per_capture = 2;	5675 const int registers_per_capture = 2;

5347 const int register_of_first_capture = 2;	5676 const int register_of_first_capture = 2;

5348 int register_count = capture_count_ * registers_per_capture;	5677 int register_count = capture_count_ * registers_per_capture;

5349 int register_start =	5678 int register_start =

5350 register_of_first_capture + capture_from_ * registers_per_capture;	5679 register_of_first_capture + capture_from_ * registers_per_capture;

5351	5680

5352 RegExpNode* result;	5681 RegExpNode* result;

5353 bool was_reading_backward = compiler->read_backward();	5682 bool was_reading_backward = compiler->read_backward();

5354 compiler->set_read_backward(type() == LOOKBEHIND);	5683 compiler->set_read_backward(type() == LOOKBEHIND);

5355 if (is_positive()) {	5684 Builder builder(is_positive(), on_success, stack_pointer_register,

5356 result = ActionNode::BeginSubmatch(	5685 position_register, register_count, register_start);

5357 stack_pointer_register, position_register,	5686 RegExpNode* match = body_->ToNode(compiler, builder.on_match_success());

5358 body()->ToNode(compiler,	5687 result = builder.ForMatch(match);

5359 ActionNode::PositiveSubmatchSuccess(

5360 stack_pointer_register, position_register,

5361 register_count, register_start, on_success)));

5362 } else {

5363 // We use a ChoiceNode for a negative lookahead because it has most of

5364 // the characteristics we need. It has the body of the lookahead as its

5365 // first alternative and the expression after the lookahead of the second

5366 // alternative. If the first alternative succeeds then the

5367 // NegativeSubmatchSuccess will unwind the stack including everything the

5368 // choice node set up and backtrack. If the first alternative fails then

5369 // the second alternative is tried, which is exactly the desired result

5370 // for a negative lookahead. The NegativeLookaheadChoiceNode is a special

5371 // ChoiceNode that knows to ignore the first exit when calculating quick

5372 // checks.

5373 Zone* zone = compiler->zone();

5374

5375 GuardedAlternative body_alt(

5376 body()->ToNode(compiler, new (zone) NegativeSubmatchSuccess(

5377 stack_pointer_register, position_register,

5378 register_count, register_start, zone)));

5379 ChoiceNode* choice_node = new (zone) NegativeLookaroundChoiceNode(

5380 body_alt, GuardedAlternative(on_success), zone);

5381 result = ActionNode::BeginSubmatch(stack_pointer_register,

5382 position_register, choice_node);

5383 }

5384 compiler->set_read_backward(was_reading_backward);	5688 compiler->set_read_backward(was_reading_backward);

5385 return result;	5689 return result;

5386 }	5690 }

5387	5691

5388	5692

5389 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler,	5693 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler,

5390 RegExpNode* on_success) {	5694 RegExpNode* on_success) {

5391 return ToNode(body(), index(), compiler, on_success);	5695 return ToNode(body(), index(), compiler, on_success);

5392 }	5696 }

5393	5697

(...skipping 27 matching lines...) Expand all Loading...
5421 }	5725 }

5422 return current;	5726 return current;

5423 }	5727 }

5424	5728

5425	5729

5426 static void AddClass(const int* elmv,	5730 static void AddClass(const int* elmv,

5427 int elmc,	5731 int elmc,

5428 ZoneList<CharacterRange>* ranges,	5732 ZoneList<CharacterRange>* ranges,

5429 Zone* zone) {	5733 Zone* zone) {

5430 elmc--;	5734 elmc--;

5431 DCHECK(elmv[elmc] == 0x10000);	5735 DCHECK(elmv[elmc] == kRangeEndMarker);

5432 for (int i = 0; i < elmc; i += 2) {	5736 for (int i = 0; i < elmc; i += 2) {

5433 DCHECK(elmv[i] < elmv[i + 1]);	5737 DCHECK(elmv[i] < elmv[i + 1]);

5434 ranges->Add(CharacterRange(elmv[i], elmv[i + 1] - 1), zone);	5738 ranges->Add(CharacterRange(elmv[i], elmv[i + 1] - 1), zone);

5435 }	5739 }

5436 }	5740 }

5437	5741

5438	5742

5439 static void AddClassNegated(const int *elmv,	5743 static void AddClassNegated(const int *elmv,

5440 int elmc,	5744 int elmc,

5441 ZoneList<CharacterRange>* ranges,	5745 ZoneList<CharacterRange>* ranges,

5442 Zone* zone) {	5746 Zone* zone) {

5443 elmc--;	5747 elmc--;

5444 DCHECK(elmv[elmc] == 0x10000);	5748 DCHECK(elmv[elmc] == kRangeEndMarker);

5445 DCHECK(elmv[0] != 0x0000);	5749 DCHECK(elmv[0] != 0x0000);

5446 DCHECK(elmv[elmc-1] != String::kMaxUtf16CodeUnit);	5750 DCHECK(elmv[elmc - 1] != String::kMaxCodePoint);

5447 uc16 last = 0x0000;	5751 uc16 last = 0x0000;

5448 for (int i = 0; i < elmc; i += 2) {	5752 for (int i = 0; i < elmc; i += 2) {

5449 DCHECK(last <= elmv[i] - 1);	5753 DCHECK(last <= elmv[i] - 1);

5450 DCHECK(elmv[i] < elmv[i + 1]);	5754 DCHECK(elmv[i] < elmv[i + 1]);

5451 ranges->Add(CharacterRange(last, elmv[i] - 1), zone);	5755 ranges->Add(CharacterRange(last, elmv[i] - 1), zone);

5452 last = elmv[i + 1];	5756 last = elmv[i + 1];

5453 }	5757 }

5454 ranges->Add(CharacterRange(last, String::kMaxUtf16CodeUnit), zone);	5758 ranges->Add(CharacterRange(last, String::kMaxCodePoint), zone);

5455 }	5759 }

5456	5760

5457	5761

5458 void CharacterRange::AddClassEscape(uc16 type,	5762 void CharacterRange::AddClassEscape(uc16 type,

5459 ZoneList<CharacterRange>* ranges,	5763 ZoneList<CharacterRange>* ranges,

5460 Zone* zone) {	5764 Zone* zone) {

5461 switch (type) {	5765 switch (type) {

5462 case 's':	5766 case 's':

5463 AddClass(kSpaceRanges, kSpaceRangeCount, ranges, zone);	5767 AddClass(kSpaceRanges, kSpaceRangeCount, ranges, zone);

5464 break;	5768 break;

(...skipping 36 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5501 UNREACHABLE();	5805 UNREACHABLE();

5502 }	5806 }

5503 }	5807 }

5504	5808

5505	5809

5506 Vector<const int> CharacterRange::GetWordBounds() {	5810 Vector<const int> CharacterRange::GetWordBounds() {

5507 return Vector<const int>(kWordRanges, kWordRangeCount - 1);	5811 return Vector<const int>(kWordRanges, kWordRangeCount - 1);

5508 }	5812 }

5509	5813

5510	5814

5511 class CharacterRangeSplitter {

5512 public:

5513 CharacterRangeSplitter(ZoneList<CharacterRange>** included,

5514 ZoneList<CharacterRange>** excluded,

5515 Zone* zone)

5516 : included_(included),

5517 excluded_(excluded),

5518 zone_(zone) { }

5519 void Call(uc16 from, DispatchTable::Entry entry);

5520

5521 static const int kInBase = 0;

5522 static const int kInOverlay = 1;

5523

5524 private:

5525 ZoneList<CharacterRange>** included_;

5526 ZoneList<CharacterRange>** excluded_;

5527 Zone* zone_;

5528 };

5529

5530

5531 void CharacterRangeSplitter::Call(uc16 from, DispatchTable::Entry entry) {

5532 if (!entry.out_set()->Get(kInBase)) return;

5533 ZoneList<CharacterRange>** target = entry.out_set()->Get(kInOverlay)

5534 ? included_

5535 : excluded_;

5536 if (target == NULL) target = new(zone_) ZoneList<CharacterRange>(2, zone_);

5537 (*target)->Add(CharacterRange(entry.from(), entry.to()), zone_);

5538 }

5539

5540

5541 void CharacterRange::Split(ZoneList<CharacterRange>* base,

5542 Vector<const int> overlay,

5543 ZoneList<CharacterRange>** included,

5544 ZoneList<CharacterRange>** excluded,

5545 Zone* zone) {

5546 DCHECK_NULL(*included);

5547 DCHECK_NULL(*excluded);

5548 DispatchTable table(zone);

5549 for (int i = 0; i < base->length(); i++)

5550 table.AddRange(base->at(i), CharacterRangeSplitter::kInBase, zone);

5551 for (int i = 0; i < overlay.length(); i += 2) {

5552 table.AddRange(CharacterRange(overlay[i], overlay[i + 1] - 1),

5553 CharacterRangeSplitter::kInOverlay, zone);

5554 }

5555 CharacterRangeSplitter callback(included, excluded, zone);

5556 table.ForEach(&callback);

5557 }

5558

5559

5560 void CharacterRange::AddCaseEquivalents(Isolate* isolate, Zone* zone,	5815 void CharacterRange::AddCaseEquivalents(Isolate* isolate, Zone* zone,

5561 ZoneList<CharacterRange>* ranges,	5816 ZoneList<CharacterRange>* ranges,

5562 bool is_one_byte) {	5817 bool is_one_byte) {

5563 uc16 bottom = from();	5818 uc32 bottom = from();

5564 uc16 top = to();	5819 uc32 top = to();

	5820 // Nothing to be done for surrogates.

	5821 if (bottom >= kLeadSurrogateStart && top <= kTrailSurrogateEnd) return;

5565 if (is_one_byte && !RangeContainsLatin1Equivalents(*this)) {	5822 if (is_one_byte && !RangeContainsLatin1Equivalents(*this)) {

5566 if (bottom > String::kMaxOneByteCharCode) return;	5823 if (bottom > String::kMaxOneByteCharCode) return;

5567 if (top > String::kMaxOneByteCharCode) top = String::kMaxOneByteCharCode;	5824 if (top > String::kMaxOneByteCharCode) top = String::kMaxOneByteCharCode;

5568 }	5825 }

5569 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];	5826 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];

5570 if (top == bottom) {	5827 if (top == bottom) {

5571 // If this is a singleton we just expand the one character.	5828 // If this is a singleton we just expand the one character.

5572 int length = isolate->jsregexp_uncanonicalize()->get(bottom, '\0', chars);	5829 int length = isolate->jsregexp_uncanonicalize()->get(bottom, '\0', chars);

5573 for (int i = 0; i < length; i++) {	5830 for (int i = 0; i < length; i++) {

5574 uc32 chr = chars[i];	5831 uc32 chr = chars[i];

(...skipping 17 matching lines...) Expand all Loading...
5592 // we look up ['z', 'Z'] and produce [c-f] and [C-F]. We then only	5849 // we look up ['z', 'Z'] and produce [c-f] and [C-F]. We then only

5593 // add a range if it is not already contained in the input, so [c-f]	5850 // add a range if it is not already contained in the input, so [c-f]

5594 // will be skipped but [C-F] will be added. If this range is not	5851 // will be skipped but [C-F] will be added. If this range is not

5595 // completely contained in a block we do this for all the blocks	5852 // completely contained in a block we do this for all the blocks

5596 // covered by the range (handling characters that is not in a block	5853 // covered by the range (handling characters that is not in a block

5597 // as a "singleton block").	5854 // as a "singleton block").

5598 unibrow::uchar range[unibrow::Ecma262UnCanonicalize::kMaxWidth];	5855 unibrow::uchar range[unibrow::Ecma262UnCanonicalize::kMaxWidth];

5599 int pos = bottom;	5856 int pos = bottom;

5600 while (pos <= top) {	5857 while (pos <= top) {

5601 int length = isolate->jsregexp_canonrange()->get(pos, '\0', range);	5858 int length = isolate->jsregexp_canonrange()->get(pos, '\0', range);

5602 uc16 block_end;	5859 uc32 block_end;

5603 if (length == 0) {	5860 if (length == 0) {

5604 block_end = pos;	5861 block_end = pos;

5605 } else {	5862 } else {

5606 DCHECK_EQ(1, length);	5863 DCHECK_EQ(1, length);

5607 block_end = range[0];	5864 block_end = range[0];

5608 }	5865 }

5609 int end = (block_end > top) ? top : block_end;	5866 int end = (block_end > top) ? top : block_end;

5610 length = isolate->jsregexp_uncanonicalize()->get(block_end, '\0', range);	5867 length = isolate->jsregexp_uncanonicalize()->get(block_end, '\0', range);

5611 for (int i = 0; i < length; i++) {	5868 for (int i = 0; i < length; i++) {

5612 uc32 c = range[i];	5869 uc32 c = range[i];

5613 uc16 range_from = c - (block_end - pos);	5870 uc32 range_from = c - (block_end - pos);

5614 uc16 range_to = c - (block_end - end);	5871 uc32 range_to = c - (block_end - end);

5615 if (!(bottom <= range_from && range_to <= top)) {	5872 if (!(bottom <= range_from && range_to <= top)) {

5616 ranges->Add(CharacterRange(range_from, range_to), zone);	5873 ranges->Add(CharacterRange(range_from, range_to), zone);

5617 }	5874 }

5618 }	5875 }

5619 pos = end + 1;	5876 pos = end + 1;

5620 }	5877 }

5621 }	5878 }

5622 }	5879 }

5623	5880

5624	5881

(...skipping 40 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5665	5922

5666	5923

5667 static int InsertRangeInCanonicalList(ZoneList<CharacterRange>* list,	5924 static int InsertRangeInCanonicalList(ZoneList<CharacterRange>* list,

5668 int count,	5925 int count,

5669 CharacterRange insert) {	5926 CharacterRange insert) {

5670 // Inserts a range into list[0..count[, which must be sorted	5927 // Inserts a range into list[0..count[, which must be sorted

5671 // by from value and non-overlapping and non-adjacent, using at most	5928 // by from value and non-overlapping and non-adjacent, using at most

5672 // list[0..count] for the result. Returns the number of resulting	5929 // list[0..count] for the result. Returns the number of resulting

5673 // canonicalized ranges. Inserting a range may collapse existing ranges into	5930 // canonicalized ranges. Inserting a range may collapse existing ranges into

5674 // fewer ranges, so the return value can be anything in the range 1..count+1.	5931 // fewer ranges, so the return value can be anything in the range 1..count+1.

5675 uc16 from = insert.from();	5932 uc32 from = insert.from();

5676 uc16 to = insert.to();	5933 uc32 to = insert.to();

5677 int start_pos = 0;	5934 int start_pos = 0;

5678 int end_pos = count;	5935 int end_pos = count;

5679 for (int i = count - 1; i >= 0; i--) {	5936 for (int i = count - 1; i >= 0; i--) {

5680 CharacterRange current = list->at(i);	5937 CharacterRange current = list->at(i);

5681 if (current.from() > to + 1) {	5938 if (current.from() > to + 1) {

5682 end_pos = i;	5939 end_pos = i;

5683 } else if (current.to() + 1 < from) {	5940 } else if (current.to() + 1 < from) {

5684 start_pos = i + 1;	5941 start_pos = i + 1;

5685 break;	5942 break;

5686 }	5943 }

(...skipping 79 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5766 DCHECK(CharacterRange::IsCanonical(character_ranges));	6023 DCHECK(CharacterRange::IsCanonical(character_ranges));

5767 }	6024 }

5768	6025

5769	6026

5770 void CharacterRange::Negate(ZoneList<CharacterRange>* ranges,	6027 void CharacterRange::Negate(ZoneList<CharacterRange>* ranges,

5771 ZoneList<CharacterRange>* negated_ranges,	6028 ZoneList<CharacterRange>* negated_ranges,

5772 Zone* zone) {	6029 Zone* zone) {

5773 DCHECK(CharacterRange::IsCanonical(ranges));	6030 DCHECK(CharacterRange::IsCanonical(ranges));

5774 DCHECK_EQ(0, negated_ranges->length());	6031 DCHECK_EQ(0, negated_ranges->length());

5775 int range_count = ranges->length();	6032 int range_count = ranges->length();

5776 uc16 from = 0;	6033 uc32 from = 0;

5777 int i = 0;	6034 int i = 0;

5778 if (range_count > 0 && ranges->at(0).from() == 0) {	6035 if (range_count > 0 && ranges->at(0).from() == 0) {

5779 from = ranges->at(0).to();	6036 from = ranges->at(0).to();

5780 i = 1;	6037 i = 1;

5781 }	6038 }

5782 while (i < range_count) {	6039 while (i < range_count) {

5783 CharacterRange range = ranges->at(i);	6040 CharacterRange range = ranges->at(i);

5784 negated_ranges->Add(CharacterRange(from + 1, range.from() - 1), zone);	6041 negated_ranges->Add(CharacterRange(from + 1, range.from() - 1), zone);

5785 from = range.to();	6042 from = range.to();

5786 i++;	6043 i++;

5787 }	6044 }

5788 if (from < String::kMaxUtf16CodeUnit) {	6045 if (from < String::kMaxCodePoint) {

5789 negated_ranges->Add(CharacterRange(from + 1, String::kMaxUtf16CodeUnit),	6046 negated_ranges->Add(CharacterRange(from + 1, String::kMaxCodePoint), zone);

5790 zone);

5791 }	6047 }

5792 }	6048 }

5793	6049

5794	6050

5795 // -------------------------------------------------------------------	6051 // -------------------------------------------------------------------

5796 // Splay tree	6052 // Splay tree

5797	6053

5798	6054

5799 OutSet* OutSet::Extend(unsigned value, Zone* zone) {	6055 OutSet* OutSet::Extend(unsigned value, Zone* zone) {

5800 if (Get(value))	6056 if (Get(value))

(...skipping 30 matching lines...) Expand all Loading...
5831 if (value < kFirstLimit) {	6087 if (value < kFirstLimit) {

5832 return (first_ & (1 << value)) != 0;	6088 return (first_ & (1 << value)) != 0;

5833 } else if (remaining_ == NULL) {	6089 } else if (remaining_ == NULL) {

5834 return false;	6090 return false;

5835 } else {	6091 } else {

5836 return remaining_->Contains(value);	6092 return remaining_->Contains(value);

5837 }	6093 }

5838 }	6094 }

5839	6095

5840	6096

5841 const uc16 DispatchTable::Config::kNoKey = unibrow::Utf8::kBadChar;	6097 const uc32 DispatchTable::Config::kNoKey = unibrow::Utf8::kBadChar;

5842	6098

5843	6099

5844 void DispatchTable::AddRange(CharacterRange full_range, int value,	6100 void DispatchTable::AddRange(CharacterRange full_range, int value,

5845 Zone* zone) {	6101 Zone* zone) {

5846 CharacterRange current = full_range;	6102 CharacterRange current = full_range;

5847 if (tree()->is_empty()) {	6103 if (tree()->is_empty()) {

5848 // If this is the first range we just insert into the table.	6104 // If this is the first range we just insert into the table.

5849 ZoneSplayTree<Config>::Locator loc;	6105 ZoneSplayTree<Config>::Locator loc;

5850 bool inserted = tree()->Insert(current.from(), &loc);	6106 bool inserted = tree()->Insert(current.from(), &loc);

5851 DCHECK(inserted);	6107 DCHECK(inserted);

(...skipping 81 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5933 USE(inserted);	6189 USE(inserted);

5934 ins.set_value(Entry(current.from(),	6190 ins.set_value(Entry(current.from(),

5935 current.to(),	6191 current.to(),

5936 empty()->Extend(value, zone)));	6192 empty()->Extend(value, zone)));

5937 break;	6193 break;

5938 }	6194 }

5939 }	6195 }

5940 }	6196 }

5941	6197

5942	6198

5943 OutSet* DispatchTable::Get(uc16 value) {	6199 OutSet* DispatchTable::Get(uc32 value) {

5944 ZoneSplayTree<Config>::Locator loc;	6200 ZoneSplayTree<Config>::Locator loc;

5945 if (!tree()->FindGreatestLessThan(value, &loc))	6201 if (!tree()->FindGreatestLessThan(value, &loc))

5946 return empty();	6202 return empty();

5947 Entry* entry = &loc.value();	6203 Entry* entry = &loc.value();

5948 if (value <= entry->to())	6204 if (value <= entry->to())

5949 return entry->out_set();	6205 return entry->out_set();

5950 else	6206 else

5951 return empty();	6207 return empty();

5952 }	6208 }

5953	6209

(...skipping 297 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
6251 }	6507 }

6252	6508

6253	6509

6254 void DispatchTableConstructor::VisitAction(ActionNode* that) {	6510 void DispatchTableConstructor::VisitAction(ActionNode* that) {

6255 RegExpNode* target = that->on_success();	6511 RegExpNode* target = that->on_success();

6256 target->Accept(this);	6512 target->Accept(this);

6257 }	6513 }

6258	6514

6259	6515

6260 RegExpEngine::CompilationResult RegExpEngine::Compile(	6516 RegExpEngine::CompilationResult RegExpEngine::Compile(

6261 Isolate* isolate, Zone* zone, RegExpCompileData* data, bool ignore_case,	6517 Isolate* isolate, Zone* zone, RegExpCompileData* data,

6262 bool is_global, bool is_multiline, bool is_sticky, Handle<String> pattern,	6518 JSRegExp::Flags flags, Handle<String> pattern,

6263 Handle<String> sample_subject, bool is_one_byte) {	6519 Handle<String> sample_subject, bool is_one_byte) {

6264 if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) {	6520 if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) {

6265 return IrregexpRegExpTooBig(isolate);	6521 return IrregexpRegExpTooBig(isolate);

6266 }	6522 }

6267 RegExpCompiler compiler(isolate, zone, data->capture_count, ignore_case,	6523 bool ignore_case = flags & JSRegExp::kIgnoreCase;

	6524 bool is_sticky = flags & JSRegExp::kSticky;

	6525 bool is_global = flags & JSRegExp::kGlobal;

	6526 RegExpCompiler compiler(isolate, zone, data->capture_count, flags,

6268 is_one_byte);	6527 is_one_byte);

6269	6528

6270 if (compiler.optimize()) compiler.set_optimize(!TooMuchRegExpCode(pattern));	6529 if (compiler.optimize()) compiler.set_optimize(!TooMuchRegExpCode(pattern));

6271	6530

6272 // Sample some characters from the middle of the string.	6531 // Sample some characters from the middle of the string.

6273 static const int kSampleSize = 128;	6532 static const int kSampleSize = 128;

6274	6533

6275 sample_subject = String::Flatten(sample_subject);	6534 sample_subject = String::Flatten(sample_subject);

6276 int chars_sampled = 0;	6535 int chars_sampled = 0;

6277 int half_way = (sample_subject->length() - kSampleSize) / 2;	6536 int half_way = (sample_subject->length() - kSampleSize) / 2;

(...skipping 222 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
6500	6759

6501	6760

6502 void RegExpResultsCache::Clear(FixedArray* cache) {	6761 void RegExpResultsCache::Clear(FixedArray* cache) {

6503 for (int i = 0; i < kRegExpResultsCacheSize; i++) {	6762 for (int i = 0; i < kRegExpResultsCacheSize; i++) {

6504 cache->set(i, Smi::FromInt(0));	6763 cache->set(i, Smi::FromInt(0));

6505 }	6764 }

6506 }	6765 }

6507	6766

6508 } // namespace internal	6767 } // namespace internal

6509 } // namespace v8	6768 } // namespace v8

OLD	NEW

« no previous file with comments | « src/regexp/jsregexp.h ('k') | src/regexp/regexp-ast.h » ('j') | no next file with comments »