Chromium Code Reviews| Index: src/jsregexp.h |
| =================================================================== |
| --- src/jsregexp.h (revision 11190) |
| +++ src/jsregexp.h (working copy) |
| @@ -190,8 +190,10 @@ |
| static String* last_ascii_string_; |
| static String* two_byte_cached_string_; |
| - static bool CompileIrregexp(Handle<JSRegExp> re, bool is_ascii); |
| - static inline bool EnsureCompiledIrregexp(Handle<JSRegExp> re, bool is_ascii); |
| + static bool CompileIrregexp( |
| + Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii); |
| + static inline bool EnsureCompiledIrregexp( |
| + Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii); |
| // Set the subject cache. The previous string buffer is not deleted, so the |
| @@ -419,6 +421,55 @@ |
| }; |
| +class BoyerMooreLookahead { |
| + public: |
| + BoyerMooreLookahead(int length, int map_length, RegExpCompiler* compiler); |
|
ulan
2012/03/30 16:54:51
We need comments for this class. In particular for
|
| + |
| + int length() { return length_; } |
| + int max_char() { return max_char_; } |
| + RegExpCompiler* compiler() { return compiler_; } |
| + |
| + static const int kTooManyCharacters = 32; |
| + |
| + int Count(int map_number) { |
| + ZoneList<bool>* map = bitmaps_->at(map_number); |
| + if (map == NULL) return map_length_; |
| + int count = 0; |
| + for (int i = 0; i < map_length_; i++) { |
| + if (map->at(i)) count++; |
| + } |
| + return count; |
| + } |
| + |
| + void Set(int map_number, int character) { |
| + if (character > max_char_) return; |
| + ZoneList<bool>* map = bitmaps_->at(map_number); |
| + if (map == NULL) return; |
| + map->at(character & (map_length_ - 1)) = true; |
| + } |
| + |
| + void SetAll(int map_number) { |
| + bitmaps_->at(map_number) = NULL; |
| + } |
| + |
| + void SetRest(int from_map) { |
| + for (int i = from_map; i < length_; i++) SetAll(i); |
| + } |
| + bool EmitSkipInstructions(RegExpMacroAssembler* masm); |
| + |
| + private: |
| + int length_; |
| + int map_length_; |
| + RegExpCompiler* compiler_; |
| + int max_char_; |
| + ZoneList<ZoneList<bool>*>* bitmaps_; |
| + |
| + bool TooVague(int offset, int limit); |
| + int GetSkipTable(int max_lookahead, Handle<ByteArray> boolean_skip_table); |
| + bool FindWorthwhileInterval(int* from, int* to); |
| +}; |
| + |
| + |
| #define FOR_EACH_NODE_TYPE(VISIT) \ |
| VISIT(End) \ |
| VISIT(Action) \ |
| @@ -635,6 +686,22 @@ |
| bool not_at_start) = 0; |
| static const int kNodeIsTooComplexForGreedyLoops = -1; |
| virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; } |
| + // Only returns the successor for a text node of length 1 that matches any |
| + // character and that has no guards on it. |
| + virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( |
| + RegExpCompiler* compiler) { |
| + return NULL; |
| + } |
| + |
| + // Collects information on the possible code units (mod 128) that can match if |
| + // we look forward. This is used for a Boyer-Moore-like string searching |
| + // implementation. TODO(erikcorry): This should share more code with |
| + // EatsAtLeast, GetQuickCheckDetails and ComputeFirstCharacterSet. |
| + virtual void FillInBMInfo( |
| + int offset, BoyerMooreLookahead* bm, bool not_at_start) { |
| + UNREACHABLE(); |
| + } |
| + |
| Label* label() { return &label_; } |
| // If non-generic code is generated for a node (i.e. the node is not at the |
| // start of the trace) then it cannot be reused. This variable sets a limit |
| @@ -747,6 +814,10 @@ |
| : on_success_(on_success) { } |
| RegExpNode* on_success() { return on_success_; } |
| void set_on_success(RegExpNode* node) { on_success_ = node; } |
| + virtual void FillInBMInfo( |
| + int offset, BoyerMooreLookahead* bm, bool not_at_start) { |
| + on_success_->FillInBMInfo(offset, bm, not_at_start); |
| + } |
| private: |
| RegExpNode* on_success_; |
| }; |
| @@ -793,6 +864,8 @@ |
| return on_success()->GetQuickCheckDetails( |
| details, compiler, filled_in, not_at_start); |
| } |
| + virtual void FillInBMInfo( |
| + int offset, BoyerMooreLookahead* bm, bool not_at_start); |
| Type type() { return type_; } |
| // TODO(erikcorry): We should allow some action nodes in greedy loops. |
| virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; } |
| @@ -860,6 +933,10 @@ |
| ZoneList<TextElement>* elements() { return elms_; } |
| void MakeCaseIndependent(bool is_ascii); |
| virtual int GreedyLoopTextLength(); |
| + virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( |
| + RegExpCompiler* compiler); |
| + virtual void FillInBMInfo( |
| + int offset, BoyerMooreLookahead* bm, bool not_at_start); |
| virtual TextNode* Clone() { |
| TextNode* result = new TextNode(*this); |
| result->CalculateOffsets(); |
| @@ -928,6 +1005,8 @@ |
| RegExpCompiler* compiler, |
| int filled_in, |
| bool not_at_start); |
| + virtual void FillInBMInfo( |
| + int offset, BoyerMooreLookahead* bm, bool not_at_start); |
| virtual int ComputeFirstCharacterSet(int budget); |
| virtual AssertionNode* Clone() { return new AssertionNode(*this); } |
| AssertionNodeType type() { return type_; } |
| @@ -961,6 +1040,12 @@ |
| bool not_at_start) { |
| return; |
| } |
| + virtual void FillInBMInfo( |
| + int offset, BoyerMooreLookahead* bm, bool not_at_start) { |
| + // Working out the set of characters that a backreference can match is too |
| + // hard, so we just say that any character can match. |
| + bm->SetRest(offset); |
| + } |
| virtual BackReferenceNode* Clone() { return new BackReferenceNode(*this); } |
| virtual int ComputeFirstCharacterSet(int budget); |
| @@ -986,6 +1071,11 @@ |
| // Returning 0 from EatsAtLeast should ensure we never get here. |
| UNREACHABLE(); |
| } |
| + virtual void FillInBMInfo( |
| + int offset, BoyerMooreLookahead* bm, bool not_at_start) { |
| + // Returning 0 from EatsAtLeast should ensure we never get here. |
| + UNREACHABLE(); |
| + } |
| virtual EndNode* Clone() { return new EndNode(*this); } |
| private: |
| Action action_; |
| @@ -1071,6 +1161,8 @@ |
| RegExpCompiler* compiler, |
| int characters_filled_in, |
| bool not_at_start); |
| + virtual void FillInBMInfo( |
| + int offset, BoyerMooreLookahead* bm, bool not_at_start); |
| virtual ChoiceNode* Clone() { return new ChoiceNode(*this); } |
| bool being_calculated() { return being_calculated_; } |
| @@ -1089,7 +1181,7 @@ |
| void GenerateGuard(RegExpMacroAssembler* macro_assembler, |
| Guard* guard, |
| Trace* trace); |
| - int CalculatePreloadCharacters(RegExpCompiler* compiler, bool not_at_start); |
| + int CalculatePreloadCharacters(RegExpCompiler* compiler, int eats_at_least); |
| void EmitOutOfLineContinuation(RegExpCompiler* compiler, |
| Trace* trace, |
| GuardedAlternative alternative, |
| @@ -1119,6 +1211,10 @@ |
| RegExpCompiler* compiler, |
| int characters_filled_in, |
| bool not_at_start); |
| + virtual void FillInBMInfo( |
| + int offset, BoyerMooreLookahead* bm, bool not_at_start) { |
| + alternatives_->at(1).node()->FillInBMInfo(offset, bm, not_at_start); |
| + } |
| // For a negative lookahead we don't emit the quick check for the |
| // alternative that is expected to fail. This is because quick check code |
| // starts by loading enough characters for the alternative that takes fewest |
| @@ -1146,6 +1242,8 @@ |
| RegExpCompiler* compiler, |
| int characters_filled_in, |
| bool not_at_start); |
| + virtual void FillInBMInfo( |
| + int offset, BoyerMooreLookahead* bm, bool not_at_start); |
| virtual int ComputeFirstCharacterSet(int budget); |
| virtual LoopChoiceNode* Clone() { return new LoopChoiceNode(*this); } |
| RegExpNode* loop_node() { return loop_node_; } |
| @@ -1458,6 +1556,7 @@ |
| bool ignore_case, |
| bool multiline, |
| Handle<String> pattern, |
| + Handle<String> sample_subject, |
| bool is_ascii); |
| static void DotPrint(const char* label, RegExpNode* node, bool ignore_case); |