Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(59)

Side by Side Diff: src/jsregexp.cc

Issue 13343: More assertion propagation. (Closed)
Patch Set: "Does it lint?" Created 12 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 242 matching lines...) Expand 10 before | Expand all | Expand 10 after
253 bool in_cache = !cached.is_null(); 253 bool in_cache = !cached.is_null();
254 LOG(RegExpCompileEvent(re, in_cache)); 254 LOG(RegExpCompileEvent(re, in_cache));
255 255
256 Handle<Object> result; 256 Handle<Object> result;
257 if (in_cache) { 257 if (in_cache) {
258 re->set_data(*cached); 258 re->set_data(*cached);
259 result = re; 259 result = re;
260 } else { 260 } else {
261 FlattenString(pattern); 261 FlattenString(pattern);
262 ZoneScope zone_scope(DELETE_ON_EXIT); 262 ZoneScope zone_scope(DELETE_ON_EXIT);
263 RegExpParseResult parse_result; 263 RegExpCompileData parse_result;
264 FlatStringReader reader(pattern); 264 FlatStringReader reader(pattern);
265 if (!ParseRegExp(&reader, flags.is_multiline(), &parse_result)) { 265 if (!ParseRegExp(&reader, flags.is_multiline(), &parse_result)) {
266 // Throw an exception if we fail to parse the pattern. 266 // Throw an exception if we fail to parse the pattern.
267 ThrowRegExpException(re, 267 ThrowRegExpException(re,
268 pattern, 268 pattern,
269 parse_result.error, 269 parse_result.error,
270 "malformed_regexp"); 270 "malformed_regexp");
271 return Handle<Object>::null(); 271 return Handle<Object>::null();
272 } 272 }
273 RegExpAtom* atom = parse_result.tree->AsAtom(); 273 RegExpAtom* atom = parse_result.tree->AsAtom();
(...skipping 425 matching lines...) Expand 10 before | Expand all | Expand 10 after
699 ZoneScope zone_scope(DELETE_ON_EXIT); 699 ZoneScope zone_scope(DELETE_ON_EXIT);
700 700
701 JSRegExp::Flags flags = re->GetFlags(); 701 JSRegExp::Flags flags = re->GetFlags();
702 702
703 Handle<String> pattern(re->Pattern()); 703 Handle<String> pattern(re->Pattern());
704 StringShape shape(*pattern); 704 StringShape shape(*pattern);
705 if (!pattern->IsFlat(shape)) { 705 if (!pattern->IsFlat(shape)) {
706 pattern->Flatten(shape); 706 pattern->Flatten(shape);
707 } 707 }
708 708
709 RegExpParseResult parse_result; 709 RegExpCompileData compile_data;
710 FlatStringReader reader(pattern); 710 FlatStringReader reader(pattern);
711 if (!ParseRegExp(&reader, flags.is_multiline(), &parse_result)) { 711 if (!ParseRegExp(&reader, flags.is_multiline(), &compile_data)) {
712 // Throw an exception if we fail to parse the pattern. 712 // Throw an exception if we fail to parse the pattern.
713 // THIS SHOULD NOT HAPPEN. We already parsed it successfully once. 713 // THIS SHOULD NOT HAPPEN. We already parsed it successfully once.
714 ThrowRegExpException(re, 714 ThrowRegExpException(re,
715 pattern, 715 pattern,
716 parse_result.error, 716 compile_data.error,
717 "malformed_regexp"); 717 "malformed_regexp");
718 return Handle<FixedArray>::null(); 718 return Handle<FixedArray>::null();
719 } 719 }
720 Handle<FixedArray> compiled_entry = 720 Handle<FixedArray> compiled_entry =
721 RegExpEngine::Compile(&parse_result, 721 RegExpEngine::Compile(&compile_data,
722 NULL,
723 flags.is_ignore_case(), 722 flags.is_ignore_case(),
724 flags.is_multiline(), 723 flags.is_multiline(),
725 pattern, 724 pattern,
726 is_ascii); 725 is_ascii);
727 if (!compiled_entry.is_null()) { 726 if (!compiled_entry.is_null()) {
728 alternatives->set(index, *compiled_entry); 727 alternatives->set(index, *compiled_entry);
729 } 728 }
730 return compiled_entry; 729 return compiled_entry;
731 } 730 }
732 731
(...skipping 1863 matching lines...) Expand 10 before | Expand all | Expand 10 after
2596 2595
2597 2596
2598 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler, 2597 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler,
2599 RegExpNode* on_success) { 2598 RegExpNode* on_success) {
2600 return new TextNode(elements(), on_success); 2599 return new TextNode(elements(), on_success);
2601 } 2600 }
2602 2601
2603 2602
2604 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, 2603 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
2605 RegExpNode* on_success) { 2604 RegExpNode* on_success) {
2606 ZoneList<TextElement>* elms = new ZoneList<TextElement>(1); 2605 return new TextNode(this, on_success);
2607 elms->Add(TextElement::CharClass(this));
2608 return new TextNode(elms, on_success);
2609 } 2606 }
2610 2607
2611 2608
2612 RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler, 2609 RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler,
2613 RegExpNode* on_success) { 2610 RegExpNode* on_success) {
2614 ZoneList<RegExpTree*>* alternatives = this->alternatives(); 2611 ZoneList<RegExpTree*>* alternatives = this->alternatives();
2615 int length = alternatives->length(); 2612 int length = alternatives->length();
2616 ChoiceNode* result = new ChoiceNode(length); 2613 ChoiceNode* result = new ChoiceNode(length);
2617 for (int i = 0; i < length; i++) { 2614 for (int i = 0; i < length; i++) {
2618 GuardedAlternative alternative(alternatives->at(i)->ToNode(compiler, 2615 GuardedAlternative alternative(alternatives->at(i)->ToNode(compiler,
(...skipping 639 matching lines...) Expand 10 before | Expand all | Expand 10 after
3258 return entry->out_set(); 3255 return entry->out_set();
3259 else 3256 else
3260 return empty(); 3257 return empty();
3261 } 3258 }
3262 3259
3263 3260
3264 // ------------------------------------------------------------------- 3261 // -------------------------------------------------------------------
3265 // Analysis 3262 // Analysis
3266 3263
3267 3264
3268 void Analysis::EnsureAnalyzed(RegExpNode* that) { 3265 void AssertionPropagation::EnsureAnalyzed(RegExpNode* that) {
3269 if (that->info()->been_analyzed || that->info()->being_analyzed) 3266 if (that->info()->been_analyzed || that->info()->being_analyzed)
3270 return; 3267 return;
3271 that->info()->being_analyzed = true; 3268 that->info()->being_analyzed = true;
3272 that->Accept(this); 3269 that->Accept(this);
3273 that->info()->being_analyzed = false; 3270 that->info()->being_analyzed = false;
3274 that->info()->been_analyzed = true; 3271 that->info()->been_analyzed = true;
3275 } 3272 }
3276 3273
3277 3274
3278 void Analysis::VisitEnd(EndNode* that) { 3275 void AssertionPropagation::VisitEnd(EndNode* that) {
3279 // nothing to do 3276 // nothing to do
3280 } 3277 }
3281 3278
3282 3279
3283 void TextNode::CalculateOffsets() { 3280 void TextNode::CalculateOffsets() {
3284 int element_count = elements()->length(); 3281 int element_count = elements()->length();
3285 // Set up the offsets of the elements relative to the start. This is a fixed 3282 // Set up the offsets of the elements relative to the start. This is a fixed
3286 // quantity since a TextNode can only contain fixed-width things. 3283 // quantity since a TextNode can only contain fixed-width things.
3287 int cp_offset = 0; 3284 int cp_offset = 0;
3288 for (int i = 0; i < element_count; i++) { 3285 for (int i = 0; i < element_count; i++) {
3289 TextElement& elm = elements()->at(i); 3286 TextElement& elm = elements()->at(i);
3290 elm.cp_offset = cp_offset; 3287 elm.cp_offset = cp_offset;
3291 if (elm.type == TextElement::ATOM) { 3288 if (elm.type == TextElement::ATOM) {
3292 cp_offset += elm.data.u_atom->data().length(); 3289 cp_offset += elm.data.u_atom->data().length();
3293 } else { 3290 } else {
3294 cp_offset++; 3291 cp_offset++;
3295 Vector<const uc16> quarks = elm.data.u_atom->data(); 3292 Vector<const uc16> quarks = elm.data.u_atom->data();
3296 } 3293 }
3297 } 3294 }
3298 } 3295 }
3299 3296
3300 3297
3301 void Analysis::VisitText(TextNode* that) { 3298 void AssertionPropagation::VisitText(TextNode* that) {
3302 if (ignore_case_) { 3299 if (ignore_case_) {
3303 that->MakeCaseIndependent(); 3300 that->MakeCaseIndependent();
3304 } 3301 }
3305 EnsureAnalyzed(that->on_success()); 3302 EnsureAnalyzed(that->on_success());
3306 NodeInfo* info = that->info(); 3303 NodeInfo* info = that->info();
3307 NodeInfo* next_info = that->on_success()->info(); 3304 NodeInfo* next_info = that->on_success()->info();
3308 // If the following node is interested in what it follows then this 3305 // If the following node is interested in what it follows then this
3309 // node must determine it. 3306 // node must determine it.
3310 info->determine_newline = next_info->follows_newline_interest; 3307 info->determine_newline = next_info->follows_newline_interest;
3311 info->determine_word = next_info->follows_word_interest; 3308 info->determine_word = next_info->follows_word_interest;
3312 info->determine_start = next_info->follows_start_interest; 3309 info->determine_start = next_info->follows_start_interest;
3313 that->CalculateOffsets(); 3310 that->CalculateOffsets();
3314 } 3311 }
3315 3312
3316 3313
3317 void Analysis::VisitAction(ActionNode* that) { 3314 void AssertionPropagation::VisitAction(ActionNode* that) {
3318 RegExpNode* target = that->on_success(); 3315 RegExpNode* target = that->on_success();
3319 EnsureAnalyzed(target); 3316 EnsureAnalyzed(target);
3320 // If the next node is interested in what it follows then this node 3317 // If the next node is interested in what it follows then this node
3321 // has to be interested too so it can pass the information on. 3318 // has to be interested too so it can pass the information on.
3322 that->info()->AddFromFollowing(target->info()); 3319 that->info()->AddFromFollowing(target->info());
3323 } 3320 }
3324 3321
3325 3322
3326 void Analysis::VisitChoice(ChoiceNode* that) { 3323 void AssertionPropagation::VisitChoice(ChoiceNode* that) {
3327 NodeInfo* info = that->info(); 3324 NodeInfo* info = that->info();
3328 for (int i = 0; i < that->alternatives()->length(); i++) { 3325 for (int i = 0; i < that->alternatives()->length(); i++) {
3329 RegExpNode* node = that->alternatives()->at(i).node(); 3326 RegExpNode* node = that->alternatives()->at(i).node();
3330 EnsureAnalyzed(node); 3327 EnsureAnalyzed(node);
3331 // Anything the following nodes need to know has to be known by 3328 // Anything the following nodes need to know has to be known by
3332 // this node also, so it can pass it on. 3329 // this node also, so it can pass it on.
3333 info->AddFromFollowing(node->info()); 3330 info->AddFromFollowing(node->info());
3334 } 3331 }
3335 } 3332 }
3336 3333
3337 3334
3338 void Analysis::VisitBackReference(BackReferenceNode* that) { 3335 void AssertionPropagation::VisitBackReference(BackReferenceNode* that) {
3339 EnsureAnalyzed(that->on_success()); 3336 EnsureAnalyzed(that->on_success());
3340 } 3337 }
3341 3338
3342 3339
3343 // ------------------------------------------------------------------- 3340 // -------------------------------------------------------------------
3344 // Assumption expansion 3341 // Assumption expansion
3345 3342
3346 3343
3347 RegExpNode* RegExpNode::EnsureExpanded(NodeInfo* info) { 3344 RegExpNode* RegExpNode::EnsureExpanded(NodeInfo* info) {
3348 siblings_.Ensure(this); 3345 siblings_.Ensure(this);
(...skipping 294 matching lines...) Expand 10 before | Expand all | Expand 10 after
3643 } 3640 }
3644 } 3641 }
3645 3642
3646 3643
3647 void DispatchTableConstructor::VisitAction(ActionNode* that) { 3644 void DispatchTableConstructor::VisitAction(ActionNode* that) {
3648 RegExpNode* target = that->on_success(); 3645 RegExpNode* target = that->on_success();
3649 target->Accept(this); 3646 target->Accept(this);
3650 } 3647 }
3651 3648
3652 3649
3653 Handle<FixedArray> RegExpEngine::Compile(RegExpParseResult* input, 3650 #ifdef DEBUG
3654 RegExpNode** node_return, 3651
3652
3653 class VisitNodeScope {
3654 public:
3655 explicit VisitNodeScope(RegExpNode* node) : node_(node) {
3656 ASSERT(!node->info()->visited);
3657 node->info()->visited = true;
3658 }
3659 ~VisitNodeScope() {
3660 node_->info()->visited = false;
3661 }
3662 private:
3663 RegExpNode* node_;
3664 };
3665
3666
3667 class NodeValidator : public NodeVisitor {
3668 public:
3669 explicit NodeValidator(bool has_been_expanded)
3670 : has_been_expanded_(has_been_expanded) { }
3671 void ValidateInfo(NodeInfo* info);
3672 #define DECLARE_VISIT(Type) \
3673 virtual void Visit##Type(Type##Node* that);
3674 FOR_EACH_NODE_TYPE(DECLARE_VISIT)
3675 #undef DECLARE_VISIT
3676 private:
3677 bool has_been_expanded_;
Lasse Reichstein 2008/12/11 08:34:23 Isn't this really two different visitors, rolled i
Christian Plesner Hansen 2008/12/11 09:14:12 Good point.
3678 };
3679
3680
3681 void NodeValidator::ValidateInfo(NodeInfo* info) {
3682 if (has_been_expanded_) {
3683 ASSERT_EQ(info->determine_newline, info->does_determine_newline);
3684 ASSERT_EQ(info->determine_start, info->does_determine_start);
3685 ASSERT_EQ(info->determine_word, info->does_determine_word);
3686 ASSERT_EQ(info->follows_word_interest,
3687 (info->follows_word != NodeInfo::UNKNOWN));
3688 if (false) {
3689 // These are still unimplemented.
3690 ASSERT_EQ(info->follows_start_interest,
3691 (info->follows_start != NodeInfo::UNKNOWN));
3692 ASSERT_EQ(info->follows_newline_interest,
3693 (info->follows_newline != NodeInfo::UNKNOWN));
3694 }
3695 } else {
3696 ASSERT(info->been_analyzed);
3697 }
3698 }
3699
3700
3701 void NodeValidator::VisitAction(ActionNode* that) {
3702 if (that->info()->visited) return;
3703 VisitNodeScope scope(that);
3704 ValidateInfo(that->info());
3705 that->on_success()->Accept(this);
3706 }
3707
3708
3709 void NodeValidator::VisitBackReference(BackReferenceNode* that) {
3710 if (that->info()->visited) return;
3711 VisitNodeScope scope(that);
3712 ValidateInfo(that->info());
3713 that->on_success()->Accept(this);
3714 }
3715
3716
3717 void NodeValidator::VisitChoice(ChoiceNode* that) {
3718 if (that->info()->visited) return;
3719 VisitNodeScope scope(that);
3720 ValidateInfo(that->info());
3721 ZoneList<GuardedAlternative>* alts = that->alternatives();
3722 for (int i = 0; i < alts->length(); i++)
3723 alts->at(i).node()->Accept(this);
3724 }
3725
3726
3727 void NodeValidator::VisitEnd(EndNode* that) {
3728 if (that->info()->visited) return;
3729 VisitNodeScope scope(that);
3730 ValidateInfo(that->info());
3731 }
3732
3733
3734 void NodeValidator::VisitText(TextNode* that) {
3735 if (that->info()->visited) return;
3736 VisitNodeScope scope(that);
3737 ValidateInfo(that->info());
3738 that->on_success()->Accept(this);
3739 }
3740
3741
3742 #endif
3743
3744
3745 Handle<FixedArray> RegExpEngine::Compile(RegExpCompileData* data,
3655 bool ignore_case, 3746 bool ignore_case,
3656 bool is_multiline, 3747 bool is_multiline,
3657 Handle<String> pattern, 3748 Handle<String> pattern,
3658 bool is_ascii) { 3749 bool is_ascii) {
3659 RegExpCompiler compiler(input->capture_count, ignore_case, is_ascii); 3750 RegExpCompiler compiler(data->capture_count, ignore_case, is_ascii);
3660 // Wrap the body of the regexp in capture #0. 3751 // Wrap the body of the regexp in capture #0.
3661 RegExpNode* captured_body = RegExpCapture::ToNode(input->tree, 3752 RegExpNode* captured_body = RegExpCapture::ToNode(data->tree,
3662 0, 3753 0,
3663 &compiler, 3754 &compiler,
3664 compiler.accept()); 3755 compiler.accept());
3665 // Add a .*? at the beginning, outside the body capture. 3756 // Add a .*? at the beginning, outside the body capture.
3666 // Note: We could choose to not add this if the regexp is anchored at 3757 // Note: We could choose to not add this if the regexp is anchored at
3667 // the start of the input but I'm not sure how best to do that and 3758 // the start of the input but I'm not sure how best to do that and
3668 // since we don't even handle ^ yet I'm saving that optimization for 3759 // since we don't even handle ^ yet I'm saving that optimization for
3669 // later. 3760 // later.
3670 RegExpNode* node = RegExpQuantifier::ToNode(0, 3761 RegExpNode* node = RegExpQuantifier::ToNode(0,
3671 RegExpQuantifier::kInfinity, 3762 RegExpQuantifier::kInfinity,
3672 false, 3763 false,
3673 new RegExpCharacterClass('*'), 3764 new RegExpCharacterClass('*'),
3674 &compiler, 3765 &compiler,
3675 captured_body); 3766 captured_body);
3676 if (node_return != NULL) *node_return = node; 3767 AssertionPropagation analysis(ignore_case);
3677 Analysis analysis(ignore_case);
3678 analysis.EnsureAnalyzed(node); 3768 analysis.EnsureAnalyzed(node);
3679 3769
3680 NodeInfo info = *node->info(); 3770 NodeInfo info = *node->info();
3771 data->has_lookbehind = info.HasLookbehind();
3772 if (data->has_lookbehind) {
3773 // If this node needs information about the preceding text we let
3774 // it start with a character class that consumes a single character
3775 // and proceeds to wherever is appropriate. This means that if
3776 // has_lookbehind is set the code generator must start one character
3777 // before the start position.
3778 node = new TextNode(new RegExpCharacterClass('*'), node);
Lasse Reichstein 2008/12/11 08:34:23 What happens if the look-behind is at the start of
Christian Plesner Hansen 2008/12/11 09:14:12 Yes, it can use has_lookbehind to determine that.
3779 analysis.EnsureAnalyzed(node);
3780 }
3781
3782 #ifdef DEBUG
3783 NodeValidator pre_expansion_validator(false);
3784 node->Accept(&pre_expansion_validator);
3785 #endif
3786
3681 node = node->EnsureExpanded(&info); 3787 node = node->EnsureExpanded(&info);
3682 3788
3789 #ifdef DEBUG
3790 NodeValidator post_expansion_validator(true);
3791 node->Accept(&post_expansion_validator);
3792 #endif
3793
3794 data->node = node;
3795
3683 if (is_multiline && !FLAG_attempt_multiline_irregexp) { 3796 if (is_multiline && !FLAG_attempt_multiline_irregexp) {
3684 return Handle<FixedArray>::null(); 3797 return Handle<FixedArray>::null();
3685 } 3798 }
3686 3799
3800 if (data->has_lookbehind) {
3801 return Handle<FixedArray>::null();
3802 }
3803
3687 if (FLAG_irregexp_native) { 3804 if (FLAG_irregexp_native) {
3688 #ifdef ARM 3805 #ifdef ARM
3689 // Unimplemented, fall-through to bytecode implementation. 3806 // Unimplemented, fall-through to bytecode implementation.
3690 #else // IA32 3807 #else // IA32
3691 RegExpMacroAssemblerIA32::Mode mode; 3808 RegExpMacroAssemblerIA32::Mode mode;
3692 if (is_ascii) { 3809 if (is_ascii) {
3693 mode = RegExpMacroAssemblerIA32::ASCII; 3810 mode = RegExpMacroAssemblerIA32::ASCII;
3694 } else { 3811 } else {
3695 mode = RegExpMacroAssemblerIA32::UC16; 3812 mode = RegExpMacroAssemblerIA32::UC16;
3696 } 3813 }
3697 RegExpMacroAssemblerIA32 macro_assembler(mode, 3814 RegExpMacroAssemblerIA32 macro_assembler(mode,
3698 (input->capture_count + 1) * 2); 3815 (data->capture_count + 1) * 2);
3699 return compiler.Assemble(&macro_assembler, 3816 return compiler.Assemble(&macro_assembler,
3700 node, 3817 node,
3701 input->capture_count, 3818 data->capture_count,
3702 pattern); 3819 pattern);
3703 #endif 3820 #endif
3704 } 3821 }
3705 EmbeddedVector<byte, 1024> codes; 3822 EmbeddedVector<byte, 1024> codes;
3706 RegExpMacroAssemblerIrregexp macro_assembler(codes); 3823 RegExpMacroAssemblerIrregexp macro_assembler(codes);
3707 return compiler.Assemble(&macro_assembler, 3824 return compiler.Assemble(&macro_assembler,
3708 node, 3825 node,
3709 input->capture_count, 3826 data->capture_count,
3710 pattern); 3827 pattern);
3711 } 3828 }
3712 3829
3713 3830
3714 }} // namespace v8::internal 3831 }} // namespace v8::internal
OLDNEW
« no previous file with comments | « src/jsregexp.h ('k') | src/parser.h » ('j') | test/cctest/test-regexp.cc » ('J')

Powered by Google App Engine
This is Rietveld 408576698