Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(466)

Side by Side Diff: src/jsregexp.cc

Issue 10254: Rudimentary analysis (Closed)
Patch Set: Created 12 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 629 matching lines...) Expand 10 before | Expand all | Expand 10 after
640 void PrintNode(const char* label, RegExpNode* node); 640 void PrintNode(const char* label, RegExpNode* node);
641 void Visit(RegExpNode* node); 641 void Visit(RegExpNode* node);
642 StringStream* stream() { return &stream_; } 642 StringStream* stream() { return &stream_; }
643 private: 643 private:
644 HeapStringAllocator alloc_; 644 HeapStringAllocator alloc_;
645 StringStream stream_; 645 StringStream stream_;
646 std::set<RegExpNode*> seen_; 646 std::set<RegExpNode*> seen_;
647 }; 647 };
648 648
649 649
650 // Data that is passed around during analysis.
651 class AnalysisData {
652 public:
653 AnalysisData(RegExpCompiler* compiler)
654 : compiler_(compiler),
655 table_(NULL),
656 choice_index_(-1) { }
657 DispatchTable* table() { return table_; }
658 RegExpCompiler* compiler() { return compiler_; }
659 int choice_index() { return choice_index_; }
660 protected:
661 AnalysisData(AnalysisData* prev) { *this = *prev; }
662 RegExpCompiler* compiler_;
663 DispatchTable *table_;
664 int choice_index_;
665 };
666
667
668 // A subclass of analysis data that allows fields to be set. Anyone
669 // who needs to create new analysis data creates an instance of this
670 // class, configures it, and then passes it on as an AnalysisData
671 // which doesn't allow its fields to be changed.
672 class SubAnalysisData: public AnalysisData {
Lasse Reichstein 2008/11/10 16:35:45 How about AnalysisDataBuilder? SubAnalysisData do
Christian Plesner Hansen 2008/11/11 06:12:35 The 'Sub' was meant to convey that the result is a
673 public:
674 SubAnalysisData(AnalysisData* prev) : AnalysisData(prev) { }
675 void set_table(DispatchTable* value) { table_ = value; }
676 void set_choice_index(int value) { choice_index_ = value; }
677 };
678
679
650 class RegExpCompiler { 680 class RegExpCompiler {
651 public: 681 public:
652 explicit RegExpCompiler(int capture_count) 682 explicit RegExpCompiler(int capture_count)
653 : next_register_(2 * capture_count) { } 683 : next_register_(2 * capture_count) { }
654 684
655 RegExpNode* Compile(RegExpTree* tree, 685 RegExpNode* Compile(RegExpTree* tree,
656 RegExpNode* on_success, 686 RegExpNode* on_success,
657 RegExpNode* on_failure) { 687 RegExpNode* on_failure);
658 return tree->ToNode(this, on_success, on_failure);
659 }
660 688
661 int AllocateRegister() { return next_register_++; } 689 int AllocateRegister() { return next_register_++; }
662 690
663 private: 691 private:
664 int next_register_; 692 int next_register_;
665 }; 693 };
666 694
667 695
668 class RegExpNode: public ZoneObject { 696 class RegExpNode: public ZoneObject {
669 public: 697 public:
670 virtual ~RegExpNode() { } 698 virtual ~RegExpNode() { }
671 virtual void EmitDot(DotPrinter* out); 699 virtual void EmitDot(DotPrinter* out);
700 virtual void Analyze(AnalysisData* data) = 0;
672 }; 701 };
673 702
674 703
675 class SeqRegExpNode: public RegExpNode { 704 class SeqRegExpNode: public RegExpNode {
676 public: 705 public:
677 explicit SeqRegExpNode(RegExpNode* on_success) 706 explicit SeqRegExpNode(RegExpNode* on_success)
678 : on_success_(on_success) { } 707 : on_success_(on_success) { }
679 RegExpNode* on_success() { return on_success_; } 708 RegExpNode* on_success() { return on_success_; }
680 private: 709 private:
681 RegExpNode* on_success_; 710 RegExpNode* on_success_;
682 }; 711 };
683 712
684 713
685 class EndNode: public RegExpNode { 714 class EndNode: public RegExpNode {
686 public: 715 public:
687 enum Action { ACCEPT, BACKTRACK }; 716 enum Action { ACCEPT, BACKTRACK };
688 virtual void EmitDot(DotPrinter* out); 717 virtual void EmitDot(DotPrinter* out);
718 virtual void Analyze(AnalysisData* data);
689 static EndNode* GetAccept() { return &kAccept; } 719 static EndNode* GetAccept() { return &kAccept; }
690 static EndNode* GetBacktrack() { return &kBacktrack; } 720 static EndNode* GetBacktrack() { return &kBacktrack; }
691 private: 721 private:
692 explicit EndNode(Action action) : action_(action) { } 722 explicit EndNode(Action action) : action_(action) { }
693 Action action_; 723 Action action_;
694 static EndNode kAccept; 724 static EndNode kAccept;
695 static EndNode kBacktrack; 725 static EndNode kBacktrack;
696 }; 726 };
697 727
698 728
699 EndNode EndNode::kAccept(ACCEPT); 729 EndNode EndNode::kAccept(ACCEPT);
700 EndNode EndNode::kBacktrack(BACKTRACK); 730 EndNode EndNode::kBacktrack(BACKTRACK);
701 731
702 732
703 class AtomNode: public SeqRegExpNode { 733 class AtomNode: public SeqRegExpNode {
704 public: 734 public:
705 AtomNode(Vector<const uc16> data, 735 AtomNode(Vector<const uc16> data,
706 RegExpNode* on_success, 736 RegExpNode* on_success,
707 RegExpNode* on_failure) 737 RegExpNode* on_failure)
708 : SeqRegExpNode(on_success), 738 : SeqRegExpNode(on_success),
709 on_failure_(on_failure), 739 on_failure_(on_failure),
710 data_(data) { } 740 data_(data) { }
711 virtual void EmitDot(DotPrinter* out); 741 virtual void EmitDot(DotPrinter* out);
742 virtual void Analyze(AnalysisData* data);
712 Vector<const uc16> data() { return data_; } 743 Vector<const uc16> data() { return data_; }
713 RegExpNode* on_failure() { return on_failure_; } 744 RegExpNode* on_failure() { return on_failure_; }
714 private: 745 private:
715 RegExpNode* on_failure_; 746 RegExpNode* on_failure_;
716 Vector<const uc16> data_; 747 Vector<const uc16> data_;
717 }; 748 };
718 749
719 750
720 class BackreferenceNode: public SeqRegExpNode { 751 class BackreferenceNode: public SeqRegExpNode {
721 public: 752 public:
722 BackreferenceNode(int start_reg, 753 BackreferenceNode(int start_reg,
723 int end_reg, 754 int end_reg,
724 RegExpNode* on_success, 755 RegExpNode* on_success,
725 RegExpNode* on_failure) 756 RegExpNode* on_failure)
726 : SeqRegExpNode(on_success), 757 : SeqRegExpNode(on_success),
727 on_failure_(on_failure), 758 on_failure_(on_failure),
728 start_reg_(start_reg), 759 start_reg_(start_reg),
729 end_reg_(end_reg) { } 760 end_reg_(end_reg) { }
730 virtual void EmitDot(DotPrinter* out); 761 virtual void EmitDot(DotPrinter* out);
762 virtual void Analyze(AnalysisData* data);
731 RegExpNode* on_failure() { return on_failure_; } 763 RegExpNode* on_failure() { return on_failure_; }
732 int start_register() { return start_reg_; } 764 int start_register() { return start_reg_; }
733 int end_register() { return end_reg_; } 765 int end_register() { return end_reg_; }
734 private: 766 private:
735 RegExpNode* on_failure_; 767 RegExpNode* on_failure_;
736 int start_reg_; 768 int start_reg_;
737 int end_reg_; 769 int end_reg_;
738 }; 770 };
739 771
740 772
741 class CharacterClassNode: public SeqRegExpNode { 773 class CharacterClassNode: public SeqRegExpNode {
742 public: 774 public:
743 CharacterClassNode(ZoneList<CharacterRange>* ranges, 775 CharacterClassNode(ZoneList<CharacterRange>* ranges,
744 RegExpNode* on_success, 776 RegExpNode* on_success,
745 RegExpNode* on_failure) 777 RegExpNode* on_failure)
746 : SeqRegExpNode(on_success), 778 : SeqRegExpNode(on_success),
747 on_failure_(on_failure), 779 on_failure_(on_failure),
748 ranges_(ranges) { } 780 ranges_(ranges) { }
749 virtual void EmitDot(DotPrinter* out); 781 virtual void EmitDot(DotPrinter* out);
782 virtual void Analyze(AnalysisData* data);
750 ZoneList<CharacterRange>* ranges() { return ranges_; } 783 ZoneList<CharacterRange>* ranges() { return ranges_; }
751 RegExpNode* on_failure() { return on_failure_; } 784 RegExpNode* on_failure() { return on_failure_; }
752 private: 785 private:
753 RegExpNode* on_failure_; 786 RegExpNode* on_failure_;
754 ZoneList<CharacterRange>* ranges_; 787 ZoneList<CharacterRange>* ranges_;
755 }; 788 };
756 789
757 790
758 class Guard: public ZoneObject { 791 class Guard: public ZoneObject {
759 public: 792 public:
(...skipping 28 matching lines...) Expand all
788 if (guards_ == NULL) 821 if (guards_ == NULL)
789 guards_ = new ZoneList<Guard*>(1); 822 guards_ = new ZoneList<Guard*>(1);
790 guards_->Add(guard); 823 guards_->Add(guard);
791 } 824 }
792 825
793 826
794 class ChoiceNode: public RegExpNode { 827 class ChoiceNode: public RegExpNode {
795 public: 828 public:
796 explicit ChoiceNode(int expected_size, RegExpNode* on_failure) 829 explicit ChoiceNode(int expected_size, RegExpNode* on_failure)
797 : on_failure_(on_failure), 830 : on_failure_(on_failure),
798 choices_(new ZoneList<GuardedAlternative>(expected_size)) { } 831 choices_(new ZoneList<GuardedAlternative>(expected_size)),
832 visited_(false) { }
Lasse Reichstein 2008/11/10 16:35:45 Why is it only ChoiceNode that has a visited field
Christian Plesner Hansen 2008/11/11 06:12:35 Yes, but it does no harm if you don't catch it unt
799 virtual void EmitDot(DotPrinter* out); 833 virtual void EmitDot(DotPrinter* out);
834 virtual void Analyze(AnalysisData* data);
800 void AddChild(GuardedAlternative node) { choices()->Add(node); } 835 void AddChild(GuardedAlternative node) { choices()->Add(node); }
801 ZoneList<GuardedAlternative>* choices() { return choices_; } 836 ZoneList<GuardedAlternative>* choices() { return choices_; }
802 DispatchTable* table() { return &table_; } 837 DispatchTable* table() { return &table_; }
803 RegExpNode* on_failure() { return on_failure_; } 838 RegExpNode* on_failure() { return on_failure_; }
804 private: 839 private:
805 RegExpNode* on_failure_; 840 RegExpNode* on_failure_;
806 ZoneList<GuardedAlternative>* choices_; 841 ZoneList<GuardedAlternative>* choices_;
807 DispatchTable table_; 842 DispatchTable table_;
843 bool visited_;
808 }; 844 };
809 845
810 846
811 class ActionNode: public SeqRegExpNode { 847 class ActionNode: public SeqRegExpNode {
812 public: 848 public:
813 enum Type { 849 enum Type {
814 STORE_REGISTER, 850 STORE_REGISTER,
815 INCREMENT_REGISTER, 851 INCREMENT_REGISTER,
816 STORE_POSITION, 852 STORE_POSITION,
817 RESTORE_POSITION, 853 RESTORE_POSITION,
(...skipping 25 matching lines...) Expand all
843 static ActionNode* BeginSubmatch(RegExpNode* on_success) { 879 static ActionNode* BeginSubmatch(RegExpNode* on_success) {
844 return new ActionNode(BEGIN_SUBMATCH, on_success); 880 return new ActionNode(BEGIN_SUBMATCH, on_success);
845 } 881 }
846 static ActionNode* EscapeSubmatch(RegExpNode* on_success) { 882 static ActionNode* EscapeSubmatch(RegExpNode* on_success) {
847 return new ActionNode(ESCAPE_SUBMATCH, on_success); 883 return new ActionNode(ESCAPE_SUBMATCH, on_success);
848 } 884 }
849 static ActionNode* EndSubmatch(RegExpNode* on_success) { 885 static ActionNode* EndSubmatch(RegExpNode* on_success) {
850 return new ActionNode(END_SUBMATCH, on_success); 886 return new ActionNode(END_SUBMATCH, on_success);
851 } 887 }
852 virtual void EmitDot(DotPrinter* out); 888 virtual void EmitDot(DotPrinter* out);
889 virtual void Analyze(AnalysisData* data);
853 Type type() { return type_; } 890 Type type() { return type_; }
854 private: 891 private:
855 ActionNode(Type type, RegExpNode* on_success) 892 ActionNode(Type type, RegExpNode* on_success)
856 : SeqRegExpNode(on_success), 893 : SeqRegExpNode(on_success),
857 type_(type) { } 894 type_(type) { }
858 Type type_; 895 Type type_;
859 union { 896 union {
860 struct { 897 struct {
861 int reg_; 898 int reg_;
862 int value_; 899 int value_;
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after
912 static void PrintOnFailure(DotPrinter* out, 949 static void PrintOnFailure(DotPrinter* out,
913 RegExpNode* from, 950 RegExpNode* from,
914 RegExpNode* on_failure) { 951 RegExpNode* on_failure) {
915 if (on_failure == EndNode::GetBacktrack()) return; 952 if (on_failure == EndNode::GetBacktrack()) return;
916 out->stream()->Add(" n%p -> n%p [style=dotted];\n", from, on_failure); 953 out->stream()->Add(" n%p -> n%p [style=dotted];\n", from, on_failure);
917 out->Visit(on_failure); 954 out->Visit(on_failure);
918 } 955 }
919 956
920 957
921 void ChoiceNode::EmitDot(DotPrinter* out) { 958 void ChoiceNode::EmitDot(DotPrinter* out) {
922 out->stream()->Add(" n%p [label=\"?\", shape=circle];\n", this); 959 out->stream()->Add(" n%p [label=\"? (%p)\"];\n", this, this);
923 PrintOnFailure(out, this, this->on_failure()); 960 PrintOnFailure(out, this, this->on_failure());
924 for (int i = 0; i < choices()->length(); i++) { 961 for (int i = 0; i < choices()->length(); i++) {
925 GuardedAlternative alt = choices()->at(i); 962 GuardedAlternative alt = choices()->at(i);
926 out->stream()->Add(" n%p -> n%p [label=\"%i", 963 out->stream()->Add(" n%p -> n%p [label=\"%i",
927 this, 964 this,
928 alt.node(), 965 alt.node(),
929 i); 966 i);
930 if (alt.guards() != NULL) { 967 if (alt.guards() != NULL) {
931 out->stream()->Add(" ["); 968 out->stream()->Add(" [");
932 for (int j = 0; j < alt.guards()->length(); j++) { 969 for (int j = 0; j < alt.guards()->length(); j++) {
933 if (j > 0) out->stream()->Add(" "); 970 if (j > 0) out->stream()->Add(" ");
934 Guard* guard = alt.guards()->at(j); 971 Guard* guard = alt.guards()->at(j);
935 switch (guard->op()) { 972 switch (guard->op()) {
936 case Guard::GEQ: 973 case Guard::GEQ:
937 out->stream()->Add("$%i &#8805; %i", guard->reg(), guard->value()); 974 out->stream()->Add("$%i &#8805; %i", guard->reg(), guard->value());
938 break; 975 break;
939 case Guard::LT: 976 case Guard::LT:
940 out->stream()->Add("$%i < %i", guard->reg(), guard->value()); 977 out->stream()->Add("$%i < %i", guard->reg(), guard->value());
941 break; 978 break;
942 } 979 }
943 } 980 }
944 out->stream()->Add("]"); 981 out->stream()->Add("]");
945 } 982 }
946 out->stream()->Add("\"];\n"); 983 out->stream()->Add("\"];\n");
947 out->Visit(choices()->at(i).node()); 984 out->Visit(choices()->at(i).node());
948 } 985 }
986 fprintf(stderr, "--- %p ---\n", static_cast<void*>(this));
Lasse Reichstein 2008/11/10 16:35:45 Debug-print?
Christian Plesner Hansen 2008/11/11 06:12:35 It turns out I can use OS::PrintError. Also, I'll
987 table()->Dump();
949 } 988 }
950 989
951 990
952 void AtomNode::EmitDot(DotPrinter* out) { 991 void AtomNode::EmitDot(DotPrinter* out) {
953 out->stream()->Add(" n%p [label=\"'%w'\", shape=doubleoctagon];\n", 992 out->stream()->Add(" n%p [label=\"'%w'\", shape=doubleoctagon];\n",
954 this, 993 this,
955 data()); 994 data());
956 out->stream()->Add(" n%p -> n%p;\n", this, this->on_success()); 995 out->stream()->Add(" n%p -> n%p;\n", this, this->on_success());
957 out->Visit(this->on_success()); 996 out->Visit(this->on_success());
958 PrintOnFailure(out, this, this->on_failure()); 997 PrintOnFailure(out, this, this->on_failure());
(...skipping 426 matching lines...) Expand 10 before | Expand all | Expand 10 after
1385 // There is no overlap so we can just add the range 1424 // There is no overlap so we can just add the range
1386 ZoneSplayTree<Config>::Locator ins; 1425 ZoneSplayTree<Config>::Locator ins;
1387 ASSERT_RESULT(tree()->Insert(current.from(), &ins)); 1426 ASSERT_RESULT(tree()->Insert(current.from(), &ins));
1388 ins.set_value(Entry(current.from(), current.to(), value)); 1427 ins.set_value(Entry(current.from(), current.to(), value));
1389 break; 1428 break;
1390 } 1429 }
1391 } 1430 }
1392 } 1431 }
1393 1432
1394 1433
1434 class DispatchTableDumper {
1435 public:
1436 DispatchTableDumper(StringStream* stream) : stream_(stream) { }
1437 void Call(uc16 key, DispatchTable::Entry entry);
1438 StringStream* stream() { return stream_; }
1439 private:
1440 StringStream* stream_;
1441 };
1442
1443
1444 void DispatchTableDumper::Call(uc16 key, DispatchTable::Entry entry) {
1445 stream()->Add("[%k-%k]: {", key, entry.from());
1446 OutSet set = entry.out_set();
1447 bool first = true;
1448 for (unsigned i = 0; i < OutSet::kFirstLimit; i++) {
1449 if (set.Get(i)) {
1450 if (first) first = false;
1451 else stream()->Add(", ");
1452 stream()->Add("%i", i);
1453 }
1454 }
1455 stream()->Add("}\n");
1456 }
1457
1458
1459 void DispatchTable::Dump() {
1460 HeapStringAllocator alloc;
1461 StringStream stream(&alloc);
1462 tree()->ForEach(DispatchTableDumper(&stream));
1463 fprintf(stderr, "%s", *stream.ToCString());
1464 }
1465
1466
1395 OutSet DispatchTable::Get(uc16 value) { 1467 OutSet DispatchTable::Get(uc16 value) {
1396 ZoneSplayTree<Config>::Locator loc; 1468 ZoneSplayTree<Config>::Locator loc;
1397 if (!tree()->FindGreatestLessThan(value, &loc)) 1469 if (!tree()->FindGreatestLessThan(value, &loc))
1398 return OutSet::empty(); 1470 return OutSet::empty();
1399 Entry* entry = &loc.value(); 1471 Entry* entry = &loc.value();
1400 if (value <= entry->to()) 1472 if (value <= entry->to())
1401 return entry->out_set(); 1473 return entry->out_set();
1402 else 1474 else
1403 return OutSet::empty(); 1475 return OutSet::empty();
1404 } 1476 }
1405 1477
1406 1478
1479 // -------------------------------------------------------------------
1480 // Analysis
1481
1482
1483 void EndNode::Analyze(AnalysisData* data) {
1484 // nothing to do
1485 }
1486
1487
1488 void ChoiceNode::Analyze(AnalysisData* incoming) {
1489 if (visited_) return;
1490 visited_ = true;
1491 ZoneList<GuardedAlternative>* choices = this->choices();
1492 SubAnalysisData data(incoming);
1493 data.set_table(table());
1494 for (int i = 0; i < choices->length(); i++) {
1495 data.set_choice_index(i);
1496 choices->at(i).node()->Analyze(&data);
1497 }
1498 visited_ = false;
1499 }
1500
1501
1502 void BackreferenceNode::Analyze(AnalysisData* data) {
1503 UNIMPLEMENTED();
1504 }
1505
1506
1507 void CharacterClassNode::Analyze(AnalysisData* data) {
1508 if (data->table() != NULL) {
1509 int index = data->choice_index();
1510 ZoneList<CharacterRange>* ranges = this->ranges();
1511 for (int i = 0; i < ranges->length(); i++) {
1512 CharacterRange range = ranges->at(i);
1513 data->table()->AddRange(range, index);
1514 }
1515 }
1516 SubAnalysisData outgoing(data);
1517 outgoing.set_table(NULL);
1518 on_success()->Analyze(&outgoing);
1519 }
1520
1521
1522 void AtomNode::Analyze(AnalysisData* data) {
1523 if (data->table() != NULL) {
1524 uc16 c = this->data()[0];
1525 data->table()->AddRange(CharacterRange(c, c), data->choice_index());
1526 }
1527 SubAnalysisData outgoing(data);
1528 outgoing.set_table(NULL);
1529 on_success()->Analyze(&outgoing);
1530 }
1531
1532
1533 void ActionNode::Analyze(AnalysisData* data) {
1534 on_success()->Analyze(data);
1535 }
1536
1537
1538 RegExpNode* RegExpCompiler::Compile(RegExpTree* tree,
1539 RegExpNode* on_success,
1540 RegExpNode* on_failure) {
1541 RegExpNode* node = tree->ToNode(this, on_success, on_failure);
1542 AnalysisData data(this);
1543 node->Analyze(&data);
1544 return node;
1545 }
1546
1547
1407 void RegExpEngine::DotPrint(const char* label, RegExpNode* node) { 1548 void RegExpEngine::DotPrint(const char* label, RegExpNode* node) {
1408 DotPrinter printer; 1549 DotPrinter printer;
1409 printer.PrintNode(label, node); 1550 printer.PrintNode(label, node);
1410 } 1551 }
1411 1552
1412 1553
1413 RegExpNode* RegExpEngine::Compile(RegExpParseResult* input) { 1554 RegExpNode* RegExpEngine::Compile(RegExpParseResult* input) {
1414 RegExpCompiler compiler(input->capture_count); 1555 RegExpCompiler compiler(input->capture_count);
1415 RegExpNode* node = compiler.Compile(input->tree, 1556 RegExpNode* node = compiler.Compile(input->tree,
1416 EndNode::GetAccept(), 1557 EndNode::GetAccept(),
1417 EndNode::GetBacktrack()); 1558 EndNode::GetBacktrack());
1418 return node; 1559 return node;
1419 } 1560 }
1420 1561
1421 1562
1422 }} // namespace v8::internal 1563 }} // namespace v8::internal
OLDNEW
« no previous file with comments | « src/jsregexp.h ('k') | src/jsregexp-inl.h » ('j') | test/cctest/test-regexp.cc » ('J')

Powered by Google App Engine
This is Rietveld 408576698