Chromium Code Reviews| Index: src/jsregexp.cc |
| diff --git a/src/jsregexp.cc b/src/jsregexp.cc |
| index d44b5afceaa9ae46066f7da216b5be3dd526d3a2..ee2e54156437a01a67f3fa505dd1d563b58beff6 100644 |
| --- a/src/jsregexp.cc |
| +++ b/src/jsregexp.cc |
| @@ -260,7 +260,7 @@ Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re, |
| } else { |
| FlattenString(pattern); |
| ZoneScope zone_scope(DELETE_ON_EXIT); |
| - RegExpParseResult parse_result; |
| + RegExpCompileData parse_result; |
| FlatStringReader reader(pattern); |
| if (!ParseRegExp(&reader, flags.is_multiline(), &parse_result)) { |
| // Throw an exception if we fail to parse the pattern. |
| @@ -706,20 +706,19 @@ static Handle<FixedArray> GetCompiledIrregexp(Handle<JSRegExp> re, |
| pattern->Flatten(shape); |
| } |
| - RegExpParseResult parse_result; |
| + RegExpCompileData compile_data; |
| FlatStringReader reader(pattern); |
| - if (!ParseRegExp(&reader, flags.is_multiline(), &parse_result)) { |
| + if (!ParseRegExp(&reader, flags.is_multiline(), &compile_data)) { |
| // Throw an exception if we fail to parse the pattern. |
| // THIS SHOULD NOT HAPPEN. We already parsed it successfully once. |
| ThrowRegExpException(re, |
| pattern, |
| - parse_result.error, |
| + compile_data.error, |
| "malformed_regexp"); |
| return Handle<FixedArray>::null(); |
| } |
| Handle<FixedArray> compiled_entry = |
| - RegExpEngine::Compile(&parse_result, |
| - NULL, |
| + RegExpEngine::Compile(&compile_data, |
| flags.is_ignore_case(), |
| flags.is_multiline(), |
| pattern, |
| @@ -2603,9 +2602,7 @@ RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler, |
| RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, |
| RegExpNode* on_success) { |
| - ZoneList<TextElement>* elms = new ZoneList<TextElement>(1); |
| - elms->Add(TextElement::CharClass(this)); |
| - return new TextNode(elms, on_success); |
| + return new TextNode(this, on_success); |
| } |
| @@ -3265,7 +3262,7 @@ OutSet* DispatchTable::Get(uc16 value) { |
| // Analysis |
| -void Analysis::EnsureAnalyzed(RegExpNode* that) { |
| +void AssertionPropagation::EnsureAnalyzed(RegExpNode* that) { |
| if (that->info()->been_analyzed || that->info()->being_analyzed) |
| return; |
| that->info()->being_analyzed = true; |
| @@ -3275,7 +3272,7 @@ void Analysis::EnsureAnalyzed(RegExpNode* that) { |
| } |
| -void Analysis::VisitEnd(EndNode* that) { |
| +void AssertionPropagation::VisitEnd(EndNode* that) { |
| // nothing to do |
| } |
| @@ -3298,7 +3295,7 @@ void TextNode::CalculateOffsets() { |
| } |
| -void Analysis::VisitText(TextNode* that) { |
| +void AssertionPropagation::VisitText(TextNode* that) { |
| if (ignore_case_) { |
| that->MakeCaseIndependent(); |
| } |
| @@ -3314,7 +3311,7 @@ void Analysis::VisitText(TextNode* that) { |
| } |
| -void Analysis::VisitAction(ActionNode* that) { |
| +void AssertionPropagation::VisitAction(ActionNode* that) { |
| RegExpNode* target = that->on_success(); |
| EnsureAnalyzed(target); |
| // If the next node is interested in what it follows then this node |
| @@ -3323,7 +3320,7 @@ void Analysis::VisitAction(ActionNode* that) { |
| } |
| -void Analysis::VisitChoice(ChoiceNode* that) { |
| +void AssertionPropagation::VisitChoice(ChoiceNode* that) { |
| NodeInfo* info = that->info(); |
| for (int i = 0; i < that->alternatives()->length(); i++) { |
| RegExpNode* node = that->alternatives()->at(i).node(); |
| @@ -3335,7 +3332,7 @@ void Analysis::VisitChoice(ChoiceNode* that) { |
| } |
| -void Analysis::VisitBackReference(BackReferenceNode* that) { |
| +void AssertionPropagation::VisitBackReference(BackReferenceNode* that) { |
| EnsureAnalyzed(that->on_success()); |
| } |
| @@ -3650,15 +3647,109 @@ void DispatchTableConstructor::VisitAction(ActionNode* that) { |
| } |
| -Handle<FixedArray> RegExpEngine::Compile(RegExpParseResult* input, |
| - RegExpNode** node_return, |
| +#ifdef DEBUG |
| + |
| + |
| +class VisitNodeScope { |
| + public: |
| + explicit VisitNodeScope(RegExpNode* node) : node_(node) { |
| + ASSERT(!node->info()->visited); |
| + node->info()->visited = true; |
| + } |
| + ~VisitNodeScope() { |
| + node_->info()->visited = false; |
| + } |
| + private: |
| + RegExpNode* node_; |
| +}; |
| + |
| + |
| +class NodeValidator : public NodeVisitor { |
| + public: |
| + explicit NodeValidator(bool has_been_expanded) |
| + : has_been_expanded_(has_been_expanded) { } |
| + void ValidateInfo(NodeInfo* info); |
| +#define DECLARE_VISIT(Type) \ |
| + virtual void Visit##Type(Type##Node* that); |
| +FOR_EACH_NODE_TYPE(DECLARE_VISIT) |
| +#undef DECLARE_VISIT |
| + private: |
| + bool has_been_expanded_; |
|
Lasse Reichstein
2008/12/11 08:34:23
Isn't this really two different visitors, rolled i
Christian Plesner Hansen
2008/12/11 09:14:12
Good point.
|
| +}; |
| + |
| + |
| +void NodeValidator::ValidateInfo(NodeInfo* info) { |
| + if (has_been_expanded_) { |
| + ASSERT_EQ(info->determine_newline, info->does_determine_newline); |
| + ASSERT_EQ(info->determine_start, info->does_determine_start); |
| + ASSERT_EQ(info->determine_word, info->does_determine_word); |
| + ASSERT_EQ(info->follows_word_interest, |
| + (info->follows_word != NodeInfo::UNKNOWN)); |
| + if (false) { |
| + // These are still unimplemented. |
| + ASSERT_EQ(info->follows_start_interest, |
| + (info->follows_start != NodeInfo::UNKNOWN)); |
| + ASSERT_EQ(info->follows_newline_interest, |
| + (info->follows_newline != NodeInfo::UNKNOWN)); |
| + } |
| + } else { |
| + ASSERT(info->been_analyzed); |
| + } |
| +} |
| + |
| + |
| +void NodeValidator::VisitAction(ActionNode* that) { |
| + if (that->info()->visited) return; |
| + VisitNodeScope scope(that); |
| + ValidateInfo(that->info()); |
| + that->on_success()->Accept(this); |
| +} |
| + |
| + |
| +void NodeValidator::VisitBackReference(BackReferenceNode* that) { |
| + if (that->info()->visited) return; |
| + VisitNodeScope scope(that); |
| + ValidateInfo(that->info()); |
| + that->on_success()->Accept(this); |
| +} |
| + |
| + |
| +void NodeValidator::VisitChoice(ChoiceNode* that) { |
| + if (that->info()->visited) return; |
| + VisitNodeScope scope(that); |
| + ValidateInfo(that->info()); |
| + ZoneList<GuardedAlternative>* alts = that->alternatives(); |
| + for (int i = 0; i < alts->length(); i++) |
| + alts->at(i).node()->Accept(this); |
| +} |
| + |
| + |
| +void NodeValidator::VisitEnd(EndNode* that) { |
| + if (that->info()->visited) return; |
| + VisitNodeScope scope(that); |
| + ValidateInfo(that->info()); |
| +} |
| + |
| + |
| +void NodeValidator::VisitText(TextNode* that) { |
| + if (that->info()->visited) return; |
| + VisitNodeScope scope(that); |
| + ValidateInfo(that->info()); |
| + that->on_success()->Accept(this); |
| +} |
| + |
| + |
| +#endif |
| + |
| + |
| +Handle<FixedArray> RegExpEngine::Compile(RegExpCompileData* data, |
| bool ignore_case, |
| bool is_multiline, |
| Handle<String> pattern, |
| bool is_ascii) { |
| - RegExpCompiler compiler(input->capture_count, ignore_case, is_ascii); |
| + RegExpCompiler compiler(data->capture_count, ignore_case, is_ascii); |
| // Wrap the body of the regexp in capture #0. |
| - RegExpNode* captured_body = RegExpCapture::ToNode(input->tree, |
| + RegExpNode* captured_body = RegExpCapture::ToNode(data->tree, |
| 0, |
| &compiler, |
| compiler.accept()); |
| @@ -3673,17 +3764,43 @@ Handle<FixedArray> RegExpEngine::Compile(RegExpParseResult* input, |
| new RegExpCharacterClass('*'), |
| &compiler, |
| captured_body); |
| - if (node_return != NULL) *node_return = node; |
| - Analysis analysis(ignore_case); |
| + AssertionPropagation analysis(ignore_case); |
| analysis.EnsureAnalyzed(node); |
| NodeInfo info = *node->info(); |
| + data->has_lookbehind = info.HasLookbehind(); |
| + if (data->has_lookbehind) { |
| + // If this node needs information about the preceding text we let |
| + // it start with a character class that consumes a single character |
| + // and proceeds to wherever is appropriate. This means that if |
| + // has_lookbehind is set the code generator must start one character |
| + // before the start position. |
| + node = new TextNode(new RegExpCharacterClass('*'), node); |
|
Lasse Reichstein
2008/12/11 08:34:23
What happens if the look-behind is at the start of
Christian Plesner Hansen
2008/12/11 09:14:12
Yes, it can use has_lookbehind to determine that.
|
| + analysis.EnsureAnalyzed(node); |
| + } |
| + |
| +#ifdef DEBUG |
| + NodeValidator pre_expansion_validator(false); |
| + node->Accept(&pre_expansion_validator); |
| +#endif |
| + |
| node = node->EnsureExpanded(&info); |
| +#ifdef DEBUG |
| + NodeValidator post_expansion_validator(true); |
| + node->Accept(&post_expansion_validator); |
| +#endif |
| + |
| + data->node = node; |
| + |
| if (is_multiline && !FLAG_attempt_multiline_irregexp) { |
| return Handle<FixedArray>::null(); |
| } |
| + if (data->has_lookbehind) { |
| + return Handle<FixedArray>::null(); |
| + } |
| + |
| if (FLAG_irregexp_native) { |
| #ifdef ARM |
| // Unimplemented, fall-through to bytecode implementation. |
| @@ -3695,10 +3812,10 @@ Handle<FixedArray> RegExpEngine::Compile(RegExpParseResult* input, |
| mode = RegExpMacroAssemblerIA32::UC16; |
| } |
| RegExpMacroAssemblerIA32 macro_assembler(mode, |
| - (input->capture_count + 1) * 2); |
| + (data->capture_count + 1) * 2); |
| return compiler.Assemble(¯o_assembler, |
| node, |
| - input->capture_count, |
| + data->capture_count, |
| pattern); |
| #endif |
| } |
| @@ -3706,7 +3823,7 @@ Handle<FixedArray> RegExpEngine::Compile(RegExpParseResult* input, |
| RegExpMacroAssemblerIrregexp macro_assembler(codes); |
| return compiler.Assemble(¯o_assembler, |
| node, |
| - input->capture_count, |
| + data->capture_count, |
| pattern); |
| } |