Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(4)

Side by Side Diff: src/jsregexp.h

Issue 13343: More assertion propagation. (Closed)
Patch Set: "Does it lint?" Created 12 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/globals.h ('k') | src/jsregexp.cc » ('j') | src/jsregexp.cc » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 510 matching lines...) Expand 10 before | Expand all | Expand 10 after
521 if (that->follows_start != UNKNOWN) { 521 if (that->follows_start != UNKNOWN) {
522 ASSERT(follows_start == UNKNOWN || 522 ASSERT(follows_start == UNKNOWN ||
523 follows_start == that->follows_start); 523 follows_start == that->follows_start);
524 follows_start = that->follows_start; 524 follows_start = that->follows_start;
525 } 525 }
526 does_determine_word = that->does_determine_word; 526 does_determine_word = that->does_determine_word;
527 does_determine_newline = that->does_determine_newline; 527 does_determine_newline = that->does_determine_newline;
528 does_determine_start = that->does_determine_start; 528 does_determine_start = that->does_determine_start;
529 } 529 }
530 530
531 bool HasLookbehind() {
532 return follows_word_interest ||
533 follows_newline_interest ||
534 follows_start_interest;
535 }
536
531 // Sets the interests of this node to include the interests of the 537 // Sets the interests of this node to include the interests of the
532 // following node. 538 // following node.
533 void AddFromFollowing(NodeInfo* that) { 539 void AddFromFollowing(NodeInfo* that) {
534 follows_word_interest |= that->follows_word_interest; 540 follows_word_interest |= that->follows_word_interest;
535 follows_newline_interest |= that->follows_newline_interest; 541 follows_newline_interest |= that->follows_newline_interest;
536 follows_start_interest |= that->follows_start_interest; 542 follows_start_interest |= that->follows_start_interest;
537 } 543 }
538 544
539 void ResetCompilationState() { 545 void ResetCompilationState() {
540 being_analyzed = false; 546 being_analyzed = false;
(...skipping 346 matching lines...) Expand 10 before | Expand all | Expand 10 after
887 893
888 bool being_calculated() { return being_calculated_; } 894 bool being_calculated() { return being_calculated_; }
889 void set_being_calculated(bool b) { being_calculated_ = b; } 895 void set_being_calculated(bool b) { being_calculated_ = b; }
890 896
891 protected: 897 protected:
892 int GreedyLoopTextLength(GuardedAlternative *alternative); 898 int GreedyLoopTextLength(GuardedAlternative *alternative);
893 ZoneList<GuardedAlternative>* alternatives_; 899 ZoneList<GuardedAlternative>* alternatives_;
894 900
895 private: 901 private:
896 friend class DispatchTableConstructor; 902 friend class DispatchTableConstructor;
897 friend class Analysis; 903 friend class AssertionPropagation;
898 void GenerateGuard(RegExpMacroAssembler* macro_assembler, 904 void GenerateGuard(RegExpMacroAssembler* macro_assembler,
899 Guard *guard, 905 Guard *guard,
900 GenerationVariant* variant); 906 GenerationVariant* variant);
901 DispatchTable* table_; 907 DispatchTable* table_;
902 bool being_calculated_; 908 bool being_calculated_;
903 }; 909 };
904 910
905 911
906 class LoopChoiceNode: public ChoiceNode { 912 class LoopChoiceNode: public ChoiceNode {
907 public: 913 public:
(...skipping 137 matching lines...) Expand 10 before | Expand all | Expand 10 after
1045 DispatchTable* table() { return table_; } 1051 DispatchTable* table() { return table_; }
1046 void set_choice_index(int value) { choice_index_ = value; } 1052 void set_choice_index(int value) { choice_index_ = value; }
1047 1053
1048 protected: 1054 protected:
1049 DispatchTable *table_; 1055 DispatchTable *table_;
1050 int choice_index_; 1056 int choice_index_;
1051 bool ignore_case_; 1057 bool ignore_case_;
1052 }; 1058 };
1053 1059
1054 1060
1055 class Analysis: public NodeVisitor { 1061 // Assertion propagation moves information about assertions such as
1062 // \b to the affected nodes. For instance, in /.\b./ information must
1063 // be propagated to the first '.' that whatever follows needs to know
1064 // if it matched a word or a non-word, and to the second '.' that it
1065 // has to check if it succeeds a word or non-word. In this case the
1066 // result will be something like:
1067 //
1068 // +-------+ +------------+
1069 // | . | | . |
1070 // +-------+ ---> +------------+
1071 // | word? | | check word |
1072 // +-------+ +------------+
1073 //
1074 // At a later phase all nodes that determine information for their
1075 // following nodes are split into several 'sibling' nodes. In this
1076 // case the first '.' is split into one node that only matches words
1077 // and one that only matches non-words. The second '.' is also split,
1078 // into one node that assumes that the previous character was a word
1079 // character and one that assumes that is was non-word. In this case
1080 // the result is
1081 //
1082 // +------------------+ +------------------+
1083 // /--> | intersect(., \w) | ---> | intersect(., \W) |
1084 // | +------------------+ +------------------+
1085 // | | follows \w |
1086 // | +------------------+
1087 // --?
1088 // | +------------------+ +------------------+
1089 // \--> | intersect(., \W) | ---> | intersect(., \w) |
1090 // +------------------+ +------------------+
1091 // | follows \W |
1092 // +------------------+
1093 //
1094 // This way we don't need to explicitly check the previous character
1095 // but can always assume that whoever consumed the previous character
1096 // has propagated the relevant information forward.
1097 class AssertionPropagation: public NodeVisitor {
1056 public: 1098 public:
1057 explicit Analysis(bool ignore_case) 1099 explicit AssertionPropagation(bool ignore_case)
1058 : ignore_case_(ignore_case) { } 1100 : ignore_case_(ignore_case) { }
1059 void EnsureAnalyzed(RegExpNode* node); 1101 void EnsureAnalyzed(RegExpNode* node);
1060 1102
1061 #define DECLARE_VISIT(Type) \ 1103 #define DECLARE_VISIT(Type) \
1062 virtual void Visit##Type(Type##Node* that); 1104 virtual void Visit##Type(Type##Node* that);
1063 FOR_EACH_NODE_TYPE(DECLARE_VISIT) 1105 FOR_EACH_NODE_TYPE(DECLARE_VISIT)
1064 #undef DECLARE_VISIT 1106 #undef DECLARE_VISIT
1065 1107
1066 private: 1108 private:
1067 bool ignore_case_; 1109 bool ignore_case_;
1068 1110
1069 DISALLOW_IMPLICIT_CONSTRUCTORS(Analysis); 1111 DISALLOW_IMPLICIT_CONSTRUCTORS(AssertionPropagation);
1070 }; 1112 };
1071 1113
1072 1114
1073 struct RegExpParseResult { 1115 struct RegExpCompileData {
1116 RegExpCompileData()
1117 : tree(NULL),
1118 node(NULL),
1119 has_lookbehind(false),
1120 has_character_escapes(false),
1121 capture_count(0) { }
1074 RegExpTree* tree; 1122 RegExpTree* tree;
1123 RegExpNode* node;
1124 bool has_lookbehind;
1075 bool has_character_escapes; 1125 bool has_character_escapes;
1076 Handle<String> error; 1126 Handle<String> error;
1077 int capture_count; 1127 int capture_count;
1078 }; 1128 };
1079 1129
1080 1130
1081 class RegExpEngine: public AllStatic { 1131 class RegExpEngine: public AllStatic {
1082 public: 1132 public:
1083 static Handle<FixedArray> Compile(RegExpParseResult* input, 1133 static Handle<FixedArray> Compile(RegExpCompileData* input,
1084 RegExpNode** node_return,
1085 bool ignore_case, 1134 bool ignore_case,
1086 bool multiline, 1135 bool multiline,
1087 Handle<String> pattern, 1136 Handle<String> pattern,
1088 bool is_ascii); 1137 bool is_ascii);
1089 1138
1090 static void DotPrint(const char* label, RegExpNode* node, bool ignore_case); 1139 static void DotPrint(const char* label, RegExpNode* node, bool ignore_case);
1091 }; 1140 };
1092 1141
1093 1142
1094 } } // namespace v8::internal 1143 } } // namespace v8::internal
1095 1144
1096 #endif // V8_JSREGEXP_H_ 1145 #endif // V8_JSREGEXP_H_
OLDNEW
« no previous file with comments | « src/globals.h ('k') | src/jsregexp.cc » ('j') | src/jsregexp.cc » ('J')

Powered by Google App Engine
This is Rietveld 408576698