Index: src/jsregexp.h |
=================================================================== |
--- src/jsregexp.h (revision 11190) |
+++ src/jsregexp.h (working copy) |
@@ -190,8 +190,10 @@ |
static String* last_ascii_string_; |
static String* two_byte_cached_string_; |
- static bool CompileIrregexp(Handle<JSRegExp> re, bool is_ascii); |
- static inline bool EnsureCompiledIrregexp(Handle<JSRegExp> re, bool is_ascii); |
+ static bool CompileIrregexp( |
+ Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii); |
+ static inline bool EnsureCompiledIrregexp( |
+ Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii); |
// Set the subject cache. The previous string buffer is not deleted, so the |
@@ -419,6 +421,55 @@ |
}; |
+class BoyerMooreLookahead { |
+ public: |
+ BoyerMooreLookahead(int length, int map_length, RegExpCompiler* compiler); |
ulan
2012/03/30 16:54:51
We need comments for this class. In particular for
|
+ |
+ int length() { return length_; } |
+ int max_char() { return max_char_; } |
+ RegExpCompiler* compiler() { return compiler_; } |
+ |
+ static const int kTooManyCharacters = 32; |
+ |
+ int Count(int map_number) { |
+ ZoneList<bool>* map = bitmaps_->at(map_number); |
+ if (map == NULL) return map_length_; |
+ int count = 0; |
+ for (int i = 0; i < map_length_; i++) { |
+ if (map->at(i)) count++; |
+ } |
+ return count; |
+ } |
+ |
+ void Set(int map_number, int character) { |
+ if (character > max_char_) return; |
+ ZoneList<bool>* map = bitmaps_->at(map_number); |
+ if (map == NULL) return; |
+ map->at(character & (map_length_ - 1)) = true; |
+ } |
+ |
+ void SetAll(int map_number) { |
+ bitmaps_->at(map_number) = NULL; |
+ } |
+ |
+ void SetRest(int from_map) { |
+ for (int i = from_map; i < length_; i++) SetAll(i); |
+ } |
+ bool EmitSkipInstructions(RegExpMacroAssembler* masm); |
+ |
+ private: |
+ int length_; |
+ int map_length_; |
+ RegExpCompiler* compiler_; |
+ int max_char_; |
+ ZoneList<ZoneList<bool>*>* bitmaps_; |
+ |
+ bool TooVague(int offset, int limit); |
+ int GetSkipTable(int max_lookahead, Handle<ByteArray> boolean_skip_table); |
+ bool FindWorthwhileInterval(int* from, int* to); |
+}; |
+ |
+ |
#define FOR_EACH_NODE_TYPE(VISIT) \ |
VISIT(End) \ |
VISIT(Action) \ |
@@ -635,6 +686,22 @@ |
bool not_at_start) = 0; |
static const int kNodeIsTooComplexForGreedyLoops = -1; |
virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; } |
+ // Only returns the successor for a text node of length 1 that matches any |
+ // character and that has no guards on it. |
+ virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( |
+ RegExpCompiler* compiler) { |
+ return NULL; |
+ } |
+ |
+ // Collects information on the possible code units (mod 128) that can match if |
+ // we look forward. This is used for a Boyer-Moore-like string searching |
+ // implementation. TODO(erikcorry): This should share more code with |
+ // EatsAtLeast, GetQuickCheckDetails and ComputeFirstCharacterSet. |
+ virtual void FillInBMInfo( |
+ int offset, BoyerMooreLookahead* bm, bool not_at_start) { |
+ UNREACHABLE(); |
+ } |
+ |
Label* label() { return &label_; } |
// If non-generic code is generated for a node (i.e. the node is not at the |
// start of the trace) then it cannot be reused. This variable sets a limit |
@@ -747,6 +814,10 @@ |
: on_success_(on_success) { } |
RegExpNode* on_success() { return on_success_; } |
void set_on_success(RegExpNode* node) { on_success_ = node; } |
+ virtual void FillInBMInfo( |
+ int offset, BoyerMooreLookahead* bm, bool not_at_start) { |
+ on_success_->FillInBMInfo(offset, bm, not_at_start); |
+ } |
private: |
RegExpNode* on_success_; |
}; |
@@ -793,6 +864,8 @@ |
return on_success()->GetQuickCheckDetails( |
details, compiler, filled_in, not_at_start); |
} |
+ virtual void FillInBMInfo( |
+ int offset, BoyerMooreLookahead* bm, bool not_at_start); |
Type type() { return type_; } |
// TODO(erikcorry): We should allow some action nodes in greedy loops. |
virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; } |
@@ -860,6 +933,10 @@ |
ZoneList<TextElement>* elements() { return elms_; } |
void MakeCaseIndependent(bool is_ascii); |
virtual int GreedyLoopTextLength(); |
+ virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( |
+ RegExpCompiler* compiler); |
+ virtual void FillInBMInfo( |
+ int offset, BoyerMooreLookahead* bm, bool not_at_start); |
virtual TextNode* Clone() { |
TextNode* result = new TextNode(*this); |
result->CalculateOffsets(); |
@@ -928,6 +1005,8 @@ |
RegExpCompiler* compiler, |
int filled_in, |
bool not_at_start); |
+ virtual void FillInBMInfo( |
+ int offset, BoyerMooreLookahead* bm, bool not_at_start); |
virtual int ComputeFirstCharacterSet(int budget); |
virtual AssertionNode* Clone() { return new AssertionNode(*this); } |
AssertionNodeType type() { return type_; } |
@@ -961,6 +1040,12 @@ |
bool not_at_start) { |
return; |
} |
+ virtual void FillInBMInfo( |
+ int offset, BoyerMooreLookahead* bm, bool not_at_start) { |
+ // Working out the set of characters that a backreference can match is too |
+ // hard, so we just say that any character can match. |
+ bm->SetRest(offset); |
+ } |
virtual BackReferenceNode* Clone() { return new BackReferenceNode(*this); } |
virtual int ComputeFirstCharacterSet(int budget); |
@@ -986,6 +1071,11 @@ |
// Returning 0 from EatsAtLeast should ensure we never get here. |
UNREACHABLE(); |
} |
+ virtual void FillInBMInfo( |
+ int offset, BoyerMooreLookahead* bm, bool not_at_start) { |
+ // Returning 0 from EatsAtLeast should ensure we never get here. |
+ UNREACHABLE(); |
+ } |
virtual EndNode* Clone() { return new EndNode(*this); } |
private: |
Action action_; |
@@ -1071,6 +1161,8 @@ |
RegExpCompiler* compiler, |
int characters_filled_in, |
bool not_at_start); |
+ virtual void FillInBMInfo( |
+ int offset, BoyerMooreLookahead* bm, bool not_at_start); |
virtual ChoiceNode* Clone() { return new ChoiceNode(*this); } |
bool being_calculated() { return being_calculated_; } |
@@ -1089,7 +1181,7 @@ |
void GenerateGuard(RegExpMacroAssembler* macro_assembler, |
Guard* guard, |
Trace* trace); |
- int CalculatePreloadCharacters(RegExpCompiler* compiler, bool not_at_start); |
+ int CalculatePreloadCharacters(RegExpCompiler* compiler, int eats_at_least); |
void EmitOutOfLineContinuation(RegExpCompiler* compiler, |
Trace* trace, |
GuardedAlternative alternative, |
@@ -1119,6 +1211,10 @@ |
RegExpCompiler* compiler, |
int characters_filled_in, |
bool not_at_start); |
+ virtual void FillInBMInfo( |
+ int offset, BoyerMooreLookahead* bm, bool not_at_start) { |
+ alternatives_->at(1).node()->FillInBMInfo(offset, bm, not_at_start); |
+ } |
// For a negative lookahead we don't emit the quick check for the |
// alternative that is expected to fail. This is because quick check code |
// starts by loading enough characters for the alternative that takes fewest |
@@ -1146,6 +1242,8 @@ |
RegExpCompiler* compiler, |
int characters_filled_in, |
bool not_at_start); |
+ virtual void FillInBMInfo( |
+ int offset, BoyerMooreLookahead* bm, bool not_at_start); |
virtual int ComputeFirstCharacterSet(int budget); |
virtual LoopChoiceNode* Clone() { return new LoopChoiceNode(*this); } |
RegExpNode* loop_node() { return loop_node_; } |
@@ -1458,6 +1556,7 @@ |
bool ignore_case, |
bool multiline, |
Handle<String> pattern, |
+ Handle<String> sample_subject, |
bool is_ascii); |
static void DotPrint(const char* label, RegExpNode* node, bool ignore_case); |