Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(258)

Unified Diff: src/jsregexp.h

Issue 13343: More assertion propagation. (Closed)
Patch Set: "Does it lint?" Created 12 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/globals.h ('k') | src/jsregexp.cc » ('j') | src/jsregexp.cc » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/jsregexp.h
diff --git a/src/jsregexp.h b/src/jsregexp.h
index 3dc2bc1d115bf34f5dbb1deea42523d6887ad08f..dbeb6e23ebedbef5be48e47bddc766d1b1b9fdb3 100644
--- a/src/jsregexp.h
+++ b/src/jsregexp.h
@@ -528,6 +528,12 @@ struct NodeInfo {
does_determine_start = that->does_determine_start;
}
+ bool HasLookbehind() {
+ return follows_word_interest ||
+ follows_newline_interest ||
+ follows_start_interest;
+ }
+
// Sets the interests of this node to include the interests of the
// following node.
void AddFromFollowing(NodeInfo* that) {
@@ -894,7 +900,7 @@ class ChoiceNode: public RegExpNode {
private:
friend class DispatchTableConstructor;
- friend class Analysis;
+ friend class AssertionPropagation;
void GenerateGuard(RegExpMacroAssembler* macro_assembler,
Guard *guard,
GenerationVariant* variant);
@@ -1052,9 +1058,45 @@ FOR_EACH_NODE_TYPE(DECLARE_VISIT)
};
-class Analysis: public NodeVisitor {
+// Assertion propagation moves information about assertions such as
+// \b to the affected nodes. For instance, in /.\b./ information must
+// be propagated to the first '.' that whatever follows needs to know
+// if it matched a word or a non-word, and to the second '.' that it
+// has to check if it succeeds a word or non-word. In this case the
+// result will be something like:
+//
+// +-------+ +------------+
+// | . | | . |
+// +-------+ ---> +------------+
+// | word? | | check word |
+// +-------+ +------------+
+//
+// At a later phase all nodes that determine information for their
+// following nodes are split into several 'sibling' nodes. In this
+// case the first '.' is split into one node that only matches words
+// and one that only matches non-words. The second '.' is also split,
+// into one node that assumes that the previous character was a word
+// character and one that assumes that is was non-word. In this case
+// the result is
+//
+// +------------------+ +------------------+
+// /--> | intersect(., \w) | ---> | intersect(., \W) |
+// | +------------------+ +------------------+
+// | | follows \w |
+// | +------------------+
+// --?
+// | +------------------+ +------------------+
+// \--> | intersect(., \W) | ---> | intersect(., \w) |
+// +------------------+ +------------------+
+// | follows \W |
+// +------------------+
+//
+// This way we don't need to explicitly check the previous character
+// but can always assume that whoever consumed the previous character
+// has propagated the relevant information forward.
+class AssertionPropagation: public NodeVisitor {
public:
- explicit Analysis(bool ignore_case)
+ explicit AssertionPropagation(bool ignore_case)
: ignore_case_(ignore_case) { }
void EnsureAnalyzed(RegExpNode* node);
@@ -1066,12 +1108,20 @@ FOR_EACH_NODE_TYPE(DECLARE_VISIT)
private:
bool ignore_case_;
- DISALLOW_IMPLICIT_CONSTRUCTORS(Analysis);
+ DISALLOW_IMPLICIT_CONSTRUCTORS(AssertionPropagation);
};
-struct RegExpParseResult {
+struct RegExpCompileData {
+ RegExpCompileData()
+ : tree(NULL),
+ node(NULL),
+ has_lookbehind(false),
+ has_character_escapes(false),
+ capture_count(0) { }
RegExpTree* tree;
+ RegExpNode* node;
+ bool has_lookbehind;
bool has_character_escapes;
Handle<String> error;
int capture_count;
@@ -1080,8 +1130,7 @@ struct RegExpParseResult {
class RegExpEngine: public AllStatic {
public:
- static Handle<FixedArray> Compile(RegExpParseResult* input,
- RegExpNode** node_return,
+ static Handle<FixedArray> Compile(RegExpCompileData* input,
bool ignore_case,
bool multiline,
Handle<String> pattern,
« no previous file with comments | « src/globals.h ('k') | src/jsregexp.cc » ('j') | src/jsregexp.cc » ('J')

Powered by Google App Engine
This is Rietveld 408576698