Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(409)

Unified Diff: src/jsregexp.h

Issue 13014: Rudimentary assertion expansion (Closed)
Patch Set: Created 12 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: src/jsregexp.h
diff --git a/src/jsregexp.h b/src/jsregexp.h
index a691b127aaa8560b256432a826de4ea5d5fae26f..38357d670ce86ce3aee98c27ec81f5db3046af46 100644
--- a/src/jsregexp.h
+++ b/src/jsregexp.h
@@ -185,6 +185,7 @@ class CharacterRange {
CharacterRange(void* null) { ASSERT_EQ(NULL, null); } //NOLINT
CharacterRange(uc16 from, uc16 to) : from_(from), to_(to) { }
static void AddClassEscape(uc16 type, ZoneList<CharacterRange>* ranges);
+ static Vector<const uc16> GetWordBounds();
static inline CharacterRange Singleton(uc16 value) {
return CharacterRange(value, value);
}
@@ -203,9 +204,15 @@ class CharacterRange {
bool is_valid() { return from_ <= to_; }
bool IsSingleton() { return (from_ == to_); }
void AddCaseEquivalents(ZoneList<CharacterRange>* ranges);
+ static void Split(ZoneList<CharacterRange>* base,
+ Vector<const uc16> overlay,
+ ZoneList<CharacterRange>** included,
+ ZoneList<CharacterRange>** excluded);
+
static const int kRangeCanonicalizeMax = 0x346;
static const int kStartMarker = (1 << 24);
static const int kPayloadMask = (1 << 24) - 1;
+
private:
uc16 from_;
uc16 to_;
@@ -344,7 +351,7 @@ class OutSet: public ZoneObject {
// A mapping from integers, specified as ranges, to a set of integers.
// Used for mapping character ranges to choices.
-class DispatchTable {
+class DispatchTable : public ZoneObject {
public:
class Entry {
public:
@@ -437,29 +444,50 @@ class TextElement {
struct NodeInfo {
- enum PrecedingInfo {
+ enum TriBool {
Lasse Reichstein 2008/12/01 11:55:47 Not really a better name. Not that I can think of
UNKNOWN = -1, FALSE = 0, TRUE = 1
};
NodeInfo()
: being_analyzed(false),
been_analyzed(false),
+ being_expanded(false),
+ been_expanded(false),
determine_word(false),
determine_newline(false),
determine_start(false),
+ does_determine_word(false),
+ does_determine_newline(false),
+ does_determine_start(false),
follows_word_interest(false),
follows_newline_interest(false),
follows_start_interest(false),
+ is_word(UNKNOWN),
+ is_newline(UNKNOWN),
at_end(false),
follows_word(UNKNOWN),
follows_newline(UNKNOWN),
follows_start(UNKNOWN) { }
- bool HasSameForwardInterests(NodeInfo* that) {
- return (at_end == that->at_end)
- && (follows_word_interest == that->follows_word_interest)
- && (follows_newline_interest == that->follows_newline_interest)
- && (follows_start_interest == that->follows_start_interest);
+ // Returns true if the interests and assumptions of this node
+ // matches the given one.
+ bool Matches(NodeInfo* that) {
+ return (at_end == that->at_end) &&
+ (follows_word_interest == that->follows_word_interest) &&
+ (follows_newline_interest == that->follows_newline_interest) &&
+ (follows_start_interest == that->follows_start_interest) &&
+ (follows_word == that->follows_word) &&
+ (follows_newline == that->follows_newline) &&
+ (follows_start == that->follows_start) &&
+ (does_determine_word == that->does_determine_word) &&
+ (does_determine_newline == that->does_determine_newline) &&
+ (does_determine_start == that->does_determine_start);
+ }
+
+ bool HasAssertions() {
+ return (follows_word != UNKNOWN) ||
+ (follows_newline != UNKNOWN) ||
+ (follows_start != UNKNOWN);
}
// Updates the interests of this node given the interests of the
@@ -471,6 +499,26 @@ struct NodeInfo {
follows_start_interest |= that->follows_start_interest;
}
+ void AddAssumptions(NodeInfo* that) {
+ if (that->follows_word != UNKNOWN) {
+ ASSERT(follows_word == UNKNOWN || follows_word == that->follows_word);
+ follows_word = that->follows_word;
+ }
+ if (that->follows_newline != UNKNOWN) {
+ ASSERT(follows_newline == UNKNOWN ||
+ follows_newline == that->follows_newline);
+ follows_newline = that->follows_newline;
+ }
+ if (that->follows_start != UNKNOWN) {
+ ASSERT(follows_start == UNKNOWN ||
+ follows_start == that->follows_start);
+ follows_start = that->follows_start;
+ }
+ does_determine_word = that->does_determine_word;
+ does_determine_newline = that->does_determine_newline;
+ does_determine_start = that->does_determine_start;
+ }
+
// Sets the interests of this node to include the interests of the
// following node.
void AddFromFollowing(NodeInfo* that) {
@@ -479,8 +527,17 @@ struct NodeInfo {
follows_start_interest |= that->follows_start_interest;
}
+ void ResetCompilationState() {
+ being_analyzed = false;
+ been_analyzed = false;
+ being_expanded = false;
+ been_expanded = false;
+ }
+
bool being_analyzed: 1;
bool been_analyzed: 1;
+ bool being_expanded: 1;
+ bool been_expanded: 1;
// These bits are set if this node must propagate forward information
// about the last character it consumed (or, in the case of 'start',
@@ -489,19 +546,40 @@ struct NodeInfo {
bool determine_newline: 1;
bool determine_start: 1;
+ bool does_determine_word: 1;
+ bool does_determine_newline: 1;
+ bool does_determine_start: 1;
+
// These bits are set of this node has to know what the preceding
// character was.
bool follows_word_interest: 1;
bool follows_newline_interest: 1;
bool follows_start_interest: 1;
+ TriBool is_word: 2;
+ TriBool is_newline: 2;
+
bool at_end: 1;
// These bits are set if the node can make assumptions about what
// the previous character was.
- PrecedingInfo follows_word: 2;
- PrecedingInfo follows_newline: 2;
- PrecedingInfo follows_start: 2;
+ TriBool follows_word: 2;
+ TriBool follows_newline: 2;
+ TriBool follows_start: 2;
+};
+
+
+class ExpansionGuard {
+ public:
+ explicit inline ExpansionGuard(NodeInfo* info) : info_(info) {
+ ASSERT(!info->being_expanded);
+ info->being_expanded = true;
+ }
+ inline ~ExpansionGuard() {
+ info_->being_expanded = false;
+ }
+ private:
+ NodeInfo* info_;
};
@@ -538,6 +616,10 @@ class RegExpNode: public ZoneObject {
// false for failure.
virtual bool Emit(RegExpCompiler* compiler) = 0;
+ RegExpNode* EnsureExpanded(NodeInfo* info);
+ virtual RegExpNode* ExpandLocal(NodeInfo* info) = 0;
+ virtual void ExpandChildren() = 0;
+
// Propagates the given interest information forward. When seeing
// \bfoo for instance, the \b is implemented by propagating forward
// to the 'foo' string that it should only succeed if its first
@@ -546,11 +628,40 @@ class RegExpNode: public ZoneObject {
NodeInfo* info() { return &info_; }
virtual bool IsBacktrack() { return false; }
- RegExpNode* GetSibling(NodeInfo* info);
- void EnsureSiblings() { siblings_.Ensure(this); }
+
void AddSibling(RegExpNode* node) { siblings_.Add(node); }
+
+ // Static version of EnsureSibling that expresses the fact that the
+ // result has the same type as the input.
+ template <class C>
+ static C* EnsureSibling(C* node, NodeInfo* info, bool* cloned) {
+ return static_cast<C*>(node->EnsureSibling(info, cloned));
+ }
+
+ SiblingList* siblings() { return &siblings_; }
+ void set_siblings(SiblingList* other) { siblings_ = *other; }
+
protected:
+
+ // Returns a sibling of this node whose interests and assumptions
+ // match the ones in the given node info. If no sibling exists NULL
+ // is returned.
+ RegExpNode* TryGetSibling(NodeInfo* info);
+
+ // Returns a sibling of this node whose interests match the ones in
+ // the given node info. The info must not contain any assertions.
+ // If no node exists a new one will be created by cloning the current
+ // node. The result will always be an instance of the same concrete
+ // class as this node.
+ RegExpNode* EnsureSibling(NodeInfo* info, bool* cloned);
+
+ // Returns a clone of this node initialized using the copy constructor
+ // of its concrete class. Note that the node may have to be pre-
+ // processed before it is on a useable state.
+ virtual RegExpNode* Clone() = 0;
+
inline void Bind(RegExpMacroAssembler* macro);
+
private:
Label label_;
NodeInfo info_;
@@ -593,7 +704,11 @@ class ActionNode: public SeqRegExpNode {
RegExpNode* on_success);
virtual void Accept(NodeVisitor* visitor);
virtual bool Emit(RegExpCompiler* compiler);
+ virtual RegExpNode* ExpandLocal(NodeInfo* info);
+ virtual void ExpandChildren();
virtual RegExpNode* PropagateForward(NodeInfo* info);
+ virtual ActionNode* Clone() { return new ActionNode(*this); }
+
private:
union {
struct {
@@ -627,13 +742,28 @@ class TextNode: public SeqRegExpNode {
: SeqRegExpNode(on_success),
on_failure_(on_failure),
elms_(elms) { }
+ TextNode(RegExpCharacterClass* that,
+ RegExpNode* on_success,
+ RegExpNode* on_failure)
+ : SeqRegExpNode(on_success),
+ on_failure_(on_failure),
+ elms_(new ZoneList<TextElement>(1)) {
+ elms_->Add(TextElement::CharClass(that));
+ }
virtual void Accept(NodeVisitor* visitor);
virtual RegExpNode* PropagateForward(NodeInfo* info);
+ virtual RegExpNode* ExpandLocal(NodeInfo* info);
+ virtual void ExpandChildren();
RegExpNode* on_failure() { return on_failure_; }
virtual bool Emit(RegExpCompiler* compiler);
ZoneList<TextElement>* elements() { return elms_; }
void MakeCaseIndependent();
+ virtual TextNode* Clone() { return new TextNode(*this); }
+
private:
+ void ExpandAtomChildren(RegExpAtom* that);
+ void ExpandCharClassChildren(RegExpCharacterClass* that);
+
RegExpNode* on_failure_;
ZoneList<TextElement>* elms_;
};
@@ -655,6 +785,10 @@ class BackReferenceNode: public SeqRegExpNode {
int end_register() { return end_reg_; }
virtual bool Emit(RegExpCompiler* compiler);
virtual RegExpNode* PropagateForward(NodeInfo* info);
+ virtual RegExpNode* ExpandLocal(NodeInfo* info);
+ virtual void ExpandChildren();
+ virtual BackReferenceNode* Clone() { return new BackReferenceNode(*this); }
+
private:
RegExpNode* on_failure_;
int start_reg_;
@@ -669,8 +803,12 @@ class EndNode: public RegExpNode {
virtual void Accept(NodeVisitor* visitor);
virtual bool Emit(RegExpCompiler* compiler);
virtual RegExpNode* PropagateForward(NodeInfo* info);
+ virtual RegExpNode* ExpandLocal(NodeInfo* info);
+ virtual void ExpandChildren();
virtual bool IsBacktrack() { return action_ == BACKTRACK; }
virtual bool GoTo(RegExpCompiler* compiler);
+ virtual EndNode* Clone() { return new EndNode(*this); }
+
private:
Action action_;
};
@@ -686,6 +824,7 @@ class Guard: public ZoneObject {
int reg() { return reg_; }
Relation op() { return op_; }
int value() { return value_; }
+
private:
int reg_;
Relation op_;
@@ -700,6 +839,7 @@ class GuardedAlternative {
RegExpNode* node() { return node_; }
void set_node(RegExpNode* node) { node_ = node; }
ZoneList<Guard*>* guards() { return guards_; }
+
private:
RegExpNode* node_;
ZoneList<Guard*>* guards_;
@@ -711,27 +851,31 @@ class ChoiceNode: public RegExpNode {
explicit ChoiceNode(int expected_size, RegExpNode* on_failure)
: on_failure_(on_failure),
alternatives_(new ZoneList<GuardedAlternative>(expected_size)),
- table_calculated_(false),
+ table_(NULL),
being_calculated_(false) { }
virtual void Accept(NodeVisitor* visitor);
void AddAlternative(GuardedAlternative node) { alternatives()->Add(node); }
ZoneList<GuardedAlternative>* alternatives() { return alternatives_; }
- DispatchTable* table() { return &table_; }
+ DispatchTable* GetTable(bool ignore_case);
RegExpNode* on_failure() { return on_failure_; }
virtual bool Emit(RegExpCompiler* compiler);
virtual RegExpNode* PropagateForward(NodeInfo* info);
- bool table_calculated() { return table_calculated_; }
- void set_table_calculated(bool b) { table_calculated_ = b; }
+ virtual RegExpNode* ExpandLocal(NodeInfo* info);
+ virtual void ExpandChildren();
+ virtual ChoiceNode* Clone() { return new ChoiceNode(*this); }
+
bool being_calculated() { return being_calculated_; }
void set_being_calculated(bool b) { being_calculated_ = b; }
+
private:
+ friend class DispatchTableConstructor;
+ friend class Analysis;
void GenerateGuard(RegExpMacroAssembler* macro_assembler,
Guard *guard,
Label* on_failure);
RegExpNode* on_failure_;
ZoneList<GuardedAlternative>* alternatives_;
- DispatchTable table_;
- bool table_calculated_;
+ DispatchTable* table_;
bool being_calculated_;
};
@@ -750,9 +894,10 @@ FOR_EACH_NODE_TYPE(DECLARE_VISIT)
// dispatch table of a choice node.
class DispatchTableConstructor: public NodeVisitor {
public:
- explicit DispatchTableConstructor(DispatchTable* table)
+ DispatchTableConstructor(DispatchTable* table, bool ignore_case)
: table_(table),
- choice_index_(-1) { }
+ choice_index_(-1),
+ ignore_case_(ignore_case) { }
void BuildTable(ChoiceNode* node);
@@ -773,6 +918,7 @@ FOR_EACH_NODE_TYPE(DECLARE_VISIT)
protected:
DispatchTable *table_;
int choice_index_;
+ bool ignore_case_;
};
@@ -808,7 +954,7 @@ class RegExpEngine: public AllStatic {
RegExpNode** node_return,
bool ignore_case,
bool multiline);
- static void DotPrint(const char* label, RegExpNode* node);
+ static void DotPrint(const char* label, RegExpNode* node, bool ignore_case);
};
« no previous file with comments | « src/char-predicates-inl.h ('k') | src/jsregexp.cc » ('j') | test/cctest/test-regexp.cc » ('J')

Powered by Google App Engine
This is Rietveld 408576698