src/jsregexp.h - Issue 13343: More assertion propagation.

Unified Diff: src/jsregexp.h

Issue 13343: More assertion propagation. (Closed)

Patch Set: "Does it lint?" Created 12 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: src/jsregexp.h

diff --git a/src/jsregexp.h b/src/jsregexp.h

index 3dc2bc1d115bf34f5dbb1deea42523d6887ad08f..dbeb6e23ebedbef5be48e47bddc766d1b1b9fdb3 100644

--- a/src/jsregexp.h

+++ b/src/jsregexp.h

@@ -528,6 +528,12 @@ struct NodeInfo {

does_determine_start = that->does_determine_start;

}

+ bool HasLookbehind() {

+ return follows_word_interest ||

+ follows_newline_interest ||

+ follows_start_interest;

+ }

// Sets the interests of this node to include the interests of the

// following node.

void AddFromFollowing(NodeInfo* that) {

@@ -894,7 +900,7 @@ class ChoiceNode: public RegExpNode {

private:

friend class DispatchTableConstructor;

- friend class Analysis;

+ friend class AssertionPropagation;

void GenerateGuard(RegExpMacroAssembler* macro_assembler,

Guard *guard,

GenerationVariant* variant);

@@ -1052,9 +1058,45 @@ FOR_EACH_NODE_TYPE(DECLARE_VISIT)

};

-class Analysis: public NodeVisitor {

+// Assertion propagation moves information about assertions such as

+// \b to the affected nodes. For instance, in /.\b./ information must

+// be propagated to the first '.' that whatever follows needs to know

+// if it matched a word or a non-word, and to the second '.' that it

+// has to check if it succeeds a word or non-word. In this case the

+// result will be something like:

+//

+// +-------+ +------------+

+// | . | | . |

+// +-------+ ---> +------------+

+// | word? | | check word |

+// +-------+ +------------+

+//

+// At a later phase all nodes that determine information for their

+// following nodes are split into several 'sibling' nodes. In this

+// case the first '.' is split into one node that only matches words

+// and one that only matches non-words. The second '.' is also split,

+// into one node that assumes that the previous character was a word

+// character and one that assumes that is was non-word. In this case

+// the result is

+//

+// +------------------+ +------------------+

+// /--> | intersect(., \w) | ---> | intersect(., \W) |

+// | +------------------+ +------------------+

+// | | follows \w |

+// | +------------------+

+// --?

+// | +------------------+ +------------------+

+// \--> | intersect(., \W) | ---> | intersect(., \w) |

+// +------------------+ +------------------+

+// | follows \W |

+// +------------------+

+//

+// This way we don't need to explicitly check the previous character

+// but can always assume that whoever consumed the previous character

+// has propagated the relevant information forward.

+class AssertionPropagation: public NodeVisitor {

public:

- explicit Analysis(bool ignore_case)

+ explicit AssertionPropagation(bool ignore_case)

: ignore_case_(ignore_case) { }

void EnsureAnalyzed(RegExpNode* node);

@@ -1066,12 +1108,20 @@ FOR_EACH_NODE_TYPE(DECLARE_VISIT)

private:

bool ignore_case_;

- DISALLOW_IMPLICIT_CONSTRUCTORS(Analysis);

+ DISALLOW_IMPLICIT_CONSTRUCTORS(AssertionPropagation);

};

-struct RegExpParseResult {

+struct RegExpCompileData {

+ RegExpCompileData()

+ : tree(NULL),

+ node(NULL),

+ has_lookbehind(false),

+ has_character_escapes(false),

+ capture_count(0) { }

RegExpTree* tree;

+ RegExpNode* node;

+ bool has_lookbehind;

bool has_character_escapes;

Handle<String> error;

int capture_count;

@@ -1080,8 +1130,7 @@ struct RegExpParseResult {

class RegExpEngine: public AllStatic {

public:

- static Handle<FixedArray> Compile(RegExpParseResult* input,

- RegExpNode** node_return,

+ static Handle<FixedArray> Compile(RegExpCompileData* input,

bool ignore_case,

bool multiline,

Handle<String> pattern,

« no previous file with comments | « src/globals.h ('k') | src/jsregexp.cc » ('j') | src/jsregexp.cc » ('J')