Chromium Code Reviews| Index: src/jsregexp.cc |
| diff --git a/src/jsregexp.cc b/src/jsregexp.cc |
| index e06dd54096c01c3b13b8512ed638390d9c7fb7e5..176642ddbfbde81a67d0d0bbb34058c285cc8f34 100644 |
| --- a/src/jsregexp.cc |
| +++ b/src/jsregexp.cc |
| @@ -647,6 +647,36 @@ class DotPrinter { |
| }; |
| +// Data that is passed around during analysis. |
| +class AnalysisData { |
| + public: |
| + AnalysisData(RegExpCompiler* compiler) |
| + : compiler_(compiler), |
| + table_(NULL), |
| + choice_index_(-1) { } |
| + DispatchTable* table() { return table_; } |
| + RegExpCompiler* compiler() { return compiler_; } |
| + int choice_index() { return choice_index_; } |
| + protected: |
| + AnalysisData(AnalysisData* prev) { *this = *prev; } |
| + RegExpCompiler* compiler_; |
| + DispatchTable *table_; |
| + int choice_index_; |
| +}; |
| + |
| + |
| +// A subclass of analysis data that allows fields to be set. Anyone |
| +// who needs to create new analysis data creates an instance of this |
| +// class, configures it, and then passes it on as an AnalysisData |
| +// which doesn't allow its fields to be changed. |
| +class SubAnalysisData: public AnalysisData { |
|
Lasse Reichstein
2008/11/10 16:35:45
How about AnalysisDataBuilder?
SubAnalysisData do
Christian Plesner Hansen
2008/11/11 06:12:35
The 'Sub' was meant to convey that the result is a
|
| + public: |
| + SubAnalysisData(AnalysisData* prev) : AnalysisData(prev) { } |
| + void set_table(DispatchTable* value) { table_ = value; } |
| + void set_choice_index(int value) { choice_index_ = value; } |
| +}; |
| + |
| + |
| class RegExpCompiler { |
| public: |
| explicit RegExpCompiler(int capture_count) |
| @@ -654,9 +684,7 @@ class RegExpCompiler { |
| RegExpNode* Compile(RegExpTree* tree, |
| RegExpNode* on_success, |
| - RegExpNode* on_failure) { |
| - return tree->ToNode(this, on_success, on_failure); |
| - } |
| + RegExpNode* on_failure); |
| int AllocateRegister() { return next_register_++; } |
| @@ -669,6 +697,7 @@ class RegExpNode: public ZoneObject { |
| public: |
| virtual ~RegExpNode() { } |
| virtual void EmitDot(DotPrinter* out); |
| + virtual void Analyze(AnalysisData* data) = 0; |
| }; |
| @@ -686,6 +715,7 @@ class EndNode: public RegExpNode { |
| public: |
| enum Action { ACCEPT, BACKTRACK }; |
| virtual void EmitDot(DotPrinter* out); |
| + virtual void Analyze(AnalysisData* data); |
| static EndNode* GetAccept() { return &kAccept; } |
| static EndNode* GetBacktrack() { return &kBacktrack; } |
| private: |
| @@ -709,6 +739,7 @@ class AtomNode: public SeqRegExpNode { |
| on_failure_(on_failure), |
| data_(data) { } |
| virtual void EmitDot(DotPrinter* out); |
| + virtual void Analyze(AnalysisData* data); |
| Vector<const uc16> data() { return data_; } |
| RegExpNode* on_failure() { return on_failure_; } |
| private: |
| @@ -728,6 +759,7 @@ class BackreferenceNode: public SeqRegExpNode { |
| start_reg_(start_reg), |
| end_reg_(end_reg) { } |
| virtual void EmitDot(DotPrinter* out); |
| + virtual void Analyze(AnalysisData* data); |
| RegExpNode* on_failure() { return on_failure_; } |
| int start_register() { return start_reg_; } |
| int end_register() { return end_reg_; } |
| @@ -747,6 +779,7 @@ class CharacterClassNode: public SeqRegExpNode { |
| on_failure_(on_failure), |
| ranges_(ranges) { } |
| virtual void EmitDot(DotPrinter* out); |
| + virtual void Analyze(AnalysisData* data); |
| ZoneList<CharacterRange>* ranges() { return ranges_; } |
| RegExpNode* on_failure() { return on_failure_; } |
| private: |
| @@ -795,8 +828,10 @@ class ChoiceNode: public RegExpNode { |
| public: |
| explicit ChoiceNode(int expected_size, RegExpNode* on_failure) |
| : on_failure_(on_failure), |
| - choices_(new ZoneList<GuardedAlternative>(expected_size)) { } |
| + choices_(new ZoneList<GuardedAlternative>(expected_size)), |
| + visited_(false) { } |
|
Lasse Reichstein
2008/11/10 16:35:45
Why is it only ChoiceNode that has a visited field
Christian Plesner Hansen
2008/11/11 06:12:35
Yes, but it does no harm if you don't catch it unt
|
| virtual void EmitDot(DotPrinter* out); |
| + virtual void Analyze(AnalysisData* data); |
| void AddChild(GuardedAlternative node) { choices()->Add(node); } |
| ZoneList<GuardedAlternative>* choices() { return choices_; } |
| DispatchTable* table() { return &table_; } |
| @@ -805,6 +840,7 @@ class ChoiceNode: public RegExpNode { |
| RegExpNode* on_failure_; |
| ZoneList<GuardedAlternative>* choices_; |
| DispatchTable table_; |
| + bool visited_; |
| }; |
| @@ -850,6 +886,7 @@ class ActionNode: public SeqRegExpNode { |
| return new ActionNode(END_SUBMATCH, on_success); |
| } |
| virtual void EmitDot(DotPrinter* out); |
| + virtual void Analyze(AnalysisData* data); |
| Type type() { return type_; } |
| private: |
| ActionNode(Type type, RegExpNode* on_success) |
| @@ -919,7 +956,7 @@ static void PrintOnFailure(DotPrinter* out, |
| void ChoiceNode::EmitDot(DotPrinter* out) { |
| - out->stream()->Add(" n%p [label=\"?\", shape=circle];\n", this); |
| + out->stream()->Add(" n%p [label=\"? (%p)\"];\n", this, this); |
| PrintOnFailure(out, this, this->on_failure()); |
| for (int i = 0; i < choices()->length(); i++) { |
| GuardedAlternative alt = choices()->at(i); |
| @@ -946,6 +983,8 @@ void ChoiceNode::EmitDot(DotPrinter* out) { |
| out->stream()->Add("\"];\n"); |
| out->Visit(choices()->at(i).node()); |
| } |
| + fprintf(stderr, "--- %p ---\n", static_cast<void*>(this)); |
|
Lasse Reichstein
2008/11/10 16:35:45
Debug-print?
Christian Plesner Hansen
2008/11/11 06:12:35
It turns out I can use OS::PrintError. Also, I'll
|
| + table()->Dump(); |
| } |
| @@ -1392,6 +1431,39 @@ void DispatchTable::AddRange(CharacterRange full_range, int value) { |
| } |
| +class DispatchTableDumper { |
| + public: |
| + DispatchTableDumper(StringStream* stream) : stream_(stream) { } |
| + void Call(uc16 key, DispatchTable::Entry entry); |
| + StringStream* stream() { return stream_; } |
| + private: |
| + StringStream* stream_; |
| +}; |
| + |
| + |
| +void DispatchTableDumper::Call(uc16 key, DispatchTable::Entry entry) { |
| + stream()->Add("[%k-%k]: {", key, entry.from()); |
| + OutSet set = entry.out_set(); |
| + bool first = true; |
| + for (unsigned i = 0; i < OutSet::kFirstLimit; i++) { |
| + if (set.Get(i)) { |
| + if (first) first = false; |
| + else stream()->Add(", "); |
| + stream()->Add("%i", i); |
| + } |
| + } |
| + stream()->Add("}\n"); |
| +} |
| + |
| + |
| +void DispatchTable::Dump() { |
| + HeapStringAllocator alloc; |
| + StringStream stream(&alloc); |
| + tree()->ForEach(DispatchTableDumper(&stream)); |
| + fprintf(stderr, "%s", *stream.ToCString()); |
| +} |
| + |
| + |
| OutSet DispatchTable::Get(uc16 value) { |
| ZoneSplayTree<Config>::Locator loc; |
| if (!tree()->FindGreatestLessThan(value, &loc)) |
| @@ -1404,6 +1476,75 @@ OutSet DispatchTable::Get(uc16 value) { |
| } |
| +// ------------------------------------------------------------------- |
| +// Analysis |
| + |
| + |
| +void EndNode::Analyze(AnalysisData* data) { |
| + // nothing to do |
| +} |
| + |
| + |
| +void ChoiceNode::Analyze(AnalysisData* incoming) { |
| + if (visited_) return; |
| + visited_ = true; |
| + ZoneList<GuardedAlternative>* choices = this->choices(); |
| + SubAnalysisData data(incoming); |
| + data.set_table(table()); |
| + for (int i = 0; i < choices->length(); i++) { |
| + data.set_choice_index(i); |
| + choices->at(i).node()->Analyze(&data); |
| + } |
| + visited_ = false; |
| +} |
| + |
| + |
| +void BackreferenceNode::Analyze(AnalysisData* data) { |
| + UNIMPLEMENTED(); |
| +} |
| + |
| + |
| +void CharacterClassNode::Analyze(AnalysisData* data) { |
| + if (data->table() != NULL) { |
| + int index = data->choice_index(); |
| + ZoneList<CharacterRange>* ranges = this->ranges(); |
| + for (int i = 0; i < ranges->length(); i++) { |
| + CharacterRange range = ranges->at(i); |
| + data->table()->AddRange(range, index); |
| + } |
| + } |
| + SubAnalysisData outgoing(data); |
| + outgoing.set_table(NULL); |
| + on_success()->Analyze(&outgoing); |
| +} |
| + |
| + |
| +void AtomNode::Analyze(AnalysisData* data) { |
| + if (data->table() != NULL) { |
| + uc16 c = this->data()[0]; |
| + data->table()->AddRange(CharacterRange(c, c), data->choice_index()); |
| + } |
| + SubAnalysisData outgoing(data); |
| + outgoing.set_table(NULL); |
| + on_success()->Analyze(&outgoing); |
| +} |
| + |
| + |
| +void ActionNode::Analyze(AnalysisData* data) { |
| + on_success()->Analyze(data); |
| +} |
| + |
| + |
| +RegExpNode* RegExpCompiler::Compile(RegExpTree* tree, |
| + RegExpNode* on_success, |
| + RegExpNode* on_failure) { |
| + RegExpNode* node = tree->ToNode(this, on_success, on_failure); |
| + AnalysisData data(this); |
| + node->Analyze(&data); |
| + return node; |
| +} |
| + |
| + |
| void RegExpEngine::DotPrint(const char* label, RegExpNode* node) { |
| DotPrinter printer; |
| printer.PrintNode(label, node); |