Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(330)

Unified Diff: src/jsregexp.cc

Issue 10254: Rudimentary analysis (Closed)
Patch Set: Created 12 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: src/jsregexp.cc
diff --git a/src/jsregexp.cc b/src/jsregexp.cc
index e06dd54096c01c3b13b8512ed638390d9c7fb7e5..176642ddbfbde81a67d0d0bbb34058c285cc8f34 100644
--- a/src/jsregexp.cc
+++ b/src/jsregexp.cc
@@ -647,6 +647,36 @@ class DotPrinter {
};
+// Data that is passed around during analysis.
+class AnalysisData {
+ public:
+ AnalysisData(RegExpCompiler* compiler)
+ : compiler_(compiler),
+ table_(NULL),
+ choice_index_(-1) { }
+ DispatchTable* table() { return table_; }
+ RegExpCompiler* compiler() { return compiler_; }
+ int choice_index() { return choice_index_; }
+ protected:
+ AnalysisData(AnalysisData* prev) { *this = *prev; }
+ RegExpCompiler* compiler_;
+ DispatchTable *table_;
+ int choice_index_;
+};
+
+
+// A subclass of analysis data that allows fields to be set. Anyone
+// who needs to create new analysis data creates an instance of this
+// class, configures it, and then passes it on as an AnalysisData
+// which doesn't allow its fields to be changed.
+class SubAnalysisData: public AnalysisData {
Lasse Reichstein 2008/11/10 16:35:45 How about AnalysisDataBuilder? SubAnalysisData do
Christian Plesner Hansen 2008/11/11 06:12:35 The 'Sub' was meant to convey that the result is a
+ public:
+ SubAnalysisData(AnalysisData* prev) : AnalysisData(prev) { }
+ void set_table(DispatchTable* value) { table_ = value; }
+ void set_choice_index(int value) { choice_index_ = value; }
+};
+
+
class RegExpCompiler {
public:
explicit RegExpCompiler(int capture_count)
@@ -654,9 +684,7 @@ class RegExpCompiler {
RegExpNode* Compile(RegExpTree* tree,
RegExpNode* on_success,
- RegExpNode* on_failure) {
- return tree->ToNode(this, on_success, on_failure);
- }
+ RegExpNode* on_failure);
int AllocateRegister() { return next_register_++; }
@@ -669,6 +697,7 @@ class RegExpNode: public ZoneObject {
public:
virtual ~RegExpNode() { }
virtual void EmitDot(DotPrinter* out);
+ virtual void Analyze(AnalysisData* data) = 0;
};
@@ -686,6 +715,7 @@ class EndNode: public RegExpNode {
public:
enum Action { ACCEPT, BACKTRACK };
virtual void EmitDot(DotPrinter* out);
+ virtual void Analyze(AnalysisData* data);
static EndNode* GetAccept() { return &kAccept; }
static EndNode* GetBacktrack() { return &kBacktrack; }
private:
@@ -709,6 +739,7 @@ class AtomNode: public SeqRegExpNode {
on_failure_(on_failure),
data_(data) { }
virtual void EmitDot(DotPrinter* out);
+ virtual void Analyze(AnalysisData* data);
Vector<const uc16> data() { return data_; }
RegExpNode* on_failure() { return on_failure_; }
private:
@@ -728,6 +759,7 @@ class BackreferenceNode: public SeqRegExpNode {
start_reg_(start_reg),
end_reg_(end_reg) { }
virtual void EmitDot(DotPrinter* out);
+ virtual void Analyze(AnalysisData* data);
RegExpNode* on_failure() { return on_failure_; }
int start_register() { return start_reg_; }
int end_register() { return end_reg_; }
@@ -747,6 +779,7 @@ class CharacterClassNode: public SeqRegExpNode {
on_failure_(on_failure),
ranges_(ranges) { }
virtual void EmitDot(DotPrinter* out);
+ virtual void Analyze(AnalysisData* data);
ZoneList<CharacterRange>* ranges() { return ranges_; }
RegExpNode* on_failure() { return on_failure_; }
private:
@@ -795,8 +828,10 @@ class ChoiceNode: public RegExpNode {
public:
explicit ChoiceNode(int expected_size, RegExpNode* on_failure)
: on_failure_(on_failure),
- choices_(new ZoneList<GuardedAlternative>(expected_size)) { }
+ choices_(new ZoneList<GuardedAlternative>(expected_size)),
+ visited_(false) { }
Lasse Reichstein 2008/11/10 16:35:45 Why is it only ChoiceNode that has a visited field
Christian Plesner Hansen 2008/11/11 06:12:35 Yes, but it does no harm if you don't catch it unt
virtual void EmitDot(DotPrinter* out);
+ virtual void Analyze(AnalysisData* data);
void AddChild(GuardedAlternative node) { choices()->Add(node); }
ZoneList<GuardedAlternative>* choices() { return choices_; }
DispatchTable* table() { return &table_; }
@@ -805,6 +840,7 @@ class ChoiceNode: public RegExpNode {
RegExpNode* on_failure_;
ZoneList<GuardedAlternative>* choices_;
DispatchTable table_;
+ bool visited_;
};
@@ -850,6 +886,7 @@ class ActionNode: public SeqRegExpNode {
return new ActionNode(END_SUBMATCH, on_success);
}
virtual void EmitDot(DotPrinter* out);
+ virtual void Analyze(AnalysisData* data);
Type type() { return type_; }
private:
ActionNode(Type type, RegExpNode* on_success)
@@ -919,7 +956,7 @@ static void PrintOnFailure(DotPrinter* out,
void ChoiceNode::EmitDot(DotPrinter* out) {
- out->stream()->Add(" n%p [label=\"?\", shape=circle];\n", this);
+ out->stream()->Add(" n%p [label=\"? (%p)\"];\n", this, this);
PrintOnFailure(out, this, this->on_failure());
for (int i = 0; i < choices()->length(); i++) {
GuardedAlternative alt = choices()->at(i);
@@ -946,6 +983,8 @@ void ChoiceNode::EmitDot(DotPrinter* out) {
out->stream()->Add("\"];\n");
out->Visit(choices()->at(i).node());
}
+ fprintf(stderr, "--- %p ---\n", static_cast<void*>(this));
Lasse Reichstein 2008/11/10 16:35:45 Debug-print?
Christian Plesner Hansen 2008/11/11 06:12:35 It turns out I can use OS::PrintError. Also, I'll
+ table()->Dump();
}
@@ -1392,6 +1431,39 @@ void DispatchTable::AddRange(CharacterRange full_range, int value) {
}
+class DispatchTableDumper {
+ public:
+ DispatchTableDumper(StringStream* stream) : stream_(stream) { }
+ void Call(uc16 key, DispatchTable::Entry entry);
+ StringStream* stream() { return stream_; }
+ private:
+ StringStream* stream_;
+};
+
+
+void DispatchTableDumper::Call(uc16 key, DispatchTable::Entry entry) {
+ stream()->Add("[%k-%k]: {", key, entry.from());
+ OutSet set = entry.out_set();
+ bool first = true;
+ for (unsigned i = 0; i < OutSet::kFirstLimit; i++) {
+ if (set.Get(i)) {
+ if (first) first = false;
+ else stream()->Add(", ");
+ stream()->Add("%i", i);
+ }
+ }
+ stream()->Add("}\n");
+}
+
+
+void DispatchTable::Dump() {
+ HeapStringAllocator alloc;
+ StringStream stream(&alloc);
+ tree()->ForEach(DispatchTableDumper(&stream));
+ fprintf(stderr, "%s", *stream.ToCString());
+}
+
+
OutSet DispatchTable::Get(uc16 value) {
ZoneSplayTree<Config>::Locator loc;
if (!tree()->FindGreatestLessThan(value, &loc))
@@ -1404,6 +1476,75 @@ OutSet DispatchTable::Get(uc16 value) {
}
+// -------------------------------------------------------------------
+// Analysis
+
+
+void EndNode::Analyze(AnalysisData* data) {
+ // nothing to do
+}
+
+
+void ChoiceNode::Analyze(AnalysisData* incoming) {
+ if (visited_) return;
+ visited_ = true;
+ ZoneList<GuardedAlternative>* choices = this->choices();
+ SubAnalysisData data(incoming);
+ data.set_table(table());
+ for (int i = 0; i < choices->length(); i++) {
+ data.set_choice_index(i);
+ choices->at(i).node()->Analyze(&data);
+ }
+ visited_ = false;
+}
+
+
+void BackreferenceNode::Analyze(AnalysisData* data) {
+ UNIMPLEMENTED();
+}
+
+
+void CharacterClassNode::Analyze(AnalysisData* data) {
+ if (data->table() != NULL) {
+ int index = data->choice_index();
+ ZoneList<CharacterRange>* ranges = this->ranges();
+ for (int i = 0; i < ranges->length(); i++) {
+ CharacterRange range = ranges->at(i);
+ data->table()->AddRange(range, index);
+ }
+ }
+ SubAnalysisData outgoing(data);
+ outgoing.set_table(NULL);
+ on_success()->Analyze(&outgoing);
+}
+
+
+void AtomNode::Analyze(AnalysisData* data) {
+ if (data->table() != NULL) {
+ uc16 c = this->data()[0];
+ data->table()->AddRange(CharacterRange(c, c), data->choice_index());
+ }
+ SubAnalysisData outgoing(data);
+ outgoing.set_table(NULL);
+ on_success()->Analyze(&outgoing);
+}
+
+
+void ActionNode::Analyze(AnalysisData* data) {
+ on_success()->Analyze(data);
+}
+
+
+RegExpNode* RegExpCompiler::Compile(RegExpTree* tree,
+ RegExpNode* on_success,
+ RegExpNode* on_failure) {
+ RegExpNode* node = tree->ToNode(this, on_success, on_failure);
+ AnalysisData data(this);
+ node->Analyze(&data);
+ return node;
+}
+
+
void RegExpEngine::DotPrint(const char* label, RegExpNode* node) {
DotPrinter printer;
printer.PrintNode(label, node);
« no previous file with comments | « src/jsregexp.h ('k') | src/jsregexp-inl.h » ('j') | test/cctest/test-regexp.cc » ('J')

Powered by Google App Engine
This is Rietveld 408576698