OLD | NEW |
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
86 Handle<String> pattern, | 86 Handle<String> pattern, |
87 JSRegExp::Flags flags, | 87 JSRegExp::Flags flags, |
88 int capture_register_count); | 88 int capture_register_count); |
89 | 89 |
90 | 90 |
91 static void AtomCompile(Handle<JSRegExp> re, | 91 static void AtomCompile(Handle<JSRegExp> re, |
92 Handle<String> pattern, | 92 Handle<String> pattern, |
93 JSRegExp::Flags flags, | 93 JSRegExp::Flags flags, |
94 Handle<String> match_pattern); | 94 Handle<String> match_pattern); |
95 | 95 |
| 96 |
| 97 static int AtomExecRaw(Handle<JSRegExp> regexp, |
| 98 Handle<String> subject, |
| 99 int index, |
| 100 int32_t* output, |
| 101 int output_size); |
| 102 |
| 103 |
96 static Handle<Object> AtomExec(Handle<JSRegExp> regexp, | 104 static Handle<Object> AtomExec(Handle<JSRegExp> regexp, |
97 Handle<String> subject, | 105 Handle<String> subject, |
98 int index, | 106 int index, |
99 Handle<JSArray> lastMatchInfo); | 107 Handle<JSArray> lastMatchInfo); |
100 | 108 |
101 enum IrregexpResult { RE_FAILURE = 0, RE_SUCCESS = 1, RE_EXCEPTION = -1 }; | 109 enum IrregexpResult { RE_FAILURE = 0, RE_SUCCESS = 1, RE_EXCEPTION = -1 }; |
102 | 110 |
103 // Prepare a RegExp for being executed one or more times (using | 111 // Prepare a RegExp for being executed one or more times (using |
104 // IrregexpExecOnce) on the subject. | 112 // IrregexpExecOnce) on the subject. |
105 // This ensures that the regexp is compiled for the subject, and that | 113 // This ensures that the regexp is compiled for the subject, and that |
106 // the subject is flat. | 114 // the subject is flat. |
107 // Returns the number of integer spaces required by IrregexpExecOnce | 115 // Returns the number of integer spaces required by IrregexpExecOnce |
108 // as its "registers" argument. If the regexp cannot be compiled, | 116 // as its "registers" argument. If the regexp cannot be compiled, |
109 // an exception is set as pending, and this function returns negative. | 117 // an exception is set as pending, and this function returns negative. |
110 static int IrregexpPrepare(Handle<JSRegExp> regexp, | 118 static int IrregexpPrepare(Handle<JSRegExp> regexp, |
111 Handle<String> subject); | 119 Handle<String> subject); |
112 | 120 |
113 // Calculate the size of offsets vector for the case of global regexp | |
114 // and the number of matches this vector is able to store. | |
115 static int GlobalOffsetsVectorSize(Handle<JSRegExp> regexp, | |
116 int registers_per_match, | |
117 int* max_matches); | |
118 | |
119 // Execute a regular expression on the subject, starting from index. | 121 // Execute a regular expression on the subject, starting from index. |
120 // If matching succeeds, return the number of matches. This can be larger | 122 // If matching succeeds, return the number of matches. This can be larger |
121 // than one in the case of global regular expressions. | 123 // than one in the case of global regular expressions. |
122 // The captures and subcaptures are stored into the registers vector. | 124 // The captures and subcaptures are stored into the registers vector. |
123 // If matching fails, returns RE_FAILURE. | 125 // If matching fails, returns RE_FAILURE. |
124 // If execution fails, sets a pending exception and returns RE_EXCEPTION. | 126 // If execution fails, sets a pending exception and returns RE_EXCEPTION. |
125 static int IrregexpExecRaw(Handle<JSRegExp> regexp, | 127 static int IrregexpExecRaw(Handle<JSRegExp> regexp, |
126 Handle<String> subject, | 128 Handle<String> subject, |
127 int index, | 129 int index, |
128 Vector<int> registers); | 130 int32_t* output, |
| 131 int output_size); |
129 | 132 |
130 // Execute an Irregexp bytecode pattern. | 133 // Execute an Irregexp bytecode pattern. |
131 // On a successful match, the result is a JSArray containing | 134 // On a successful match, the result is a JSArray containing |
132 // captured positions. On a failure, the result is the null value. | 135 // captured positions. On a failure, the result is the null value. |
133 // Returns an empty handle in case of an exception. | 136 // Returns an empty handle in case of an exception. |
134 static Handle<Object> IrregexpExec(Handle<JSRegExp> regexp, | 137 static Handle<Object> IrregexpExec(Handle<JSRegExp> regexp, |
135 Handle<String> subject, | 138 Handle<String> subject, |
136 int index, | 139 int index, |
137 Handle<JSArray> lastMatchInfo); | 140 Handle<JSArray> lastMatchInfo); |
138 | 141 |
| 142 // Set last match info. If match is NULL, then setting captures is omitted. |
| 143 static Handle<JSArray> SetLastMatchInfo(Handle<JSArray> last_match_info, |
| 144 Handle<String> subject, |
| 145 int capture_count, |
| 146 int32_t* match); |
| 147 |
| 148 |
| 149 class GlobalCache { |
| 150 public: |
| 151 GlobalCache(Handle<JSRegExp> regexp, |
| 152 Handle<String> subject, |
| 153 bool is_global, |
| 154 Isolate* isolate); |
| 155 |
| 156 ~GlobalCache(); |
| 157 |
| 158 // Fetch the next entry in the cache for global regexp match results. |
| 159 // This does not set the last match info. Upon failure, NULL is returned. |
| 160 // The cause can be checked with Result(). The previous |
| 161 // result is still in available in memory when a failure happens. |
| 162 int32_t* FetchNext(); |
| 163 |
| 164 int32_t* LastSuccessfulMatch(); |
| 165 |
| 166 inline bool HasException() { return num_matches_ < 0; } |
| 167 |
| 168 private: |
| 169 int num_matches_; |
| 170 int max_matches_; |
| 171 int current_match_index_; |
| 172 int registers_per_match_; |
| 173 // Pointer to the last set of captures. |
| 174 int32_t* register_array_; |
| 175 int register_array_size_; |
| 176 Handle<JSRegExp> regexp_; |
| 177 Handle<String> subject_; |
| 178 }; |
| 179 |
| 180 |
| 181 |
139 // Array index in the lastMatchInfo array. | 182 // Array index in the lastMatchInfo array. |
140 static const int kLastCaptureCount = 0; | 183 static const int kLastCaptureCount = 0; |
141 static const int kLastSubject = 1; | 184 static const int kLastSubject = 1; |
142 static const int kLastInput = 2; | 185 static const int kLastInput = 2; |
143 static const int kFirstCapture = 3; | 186 static const int kFirstCapture = 3; |
144 static const int kLastMatchOverhead = 3; | 187 static const int kLastMatchOverhead = 3; |
145 | 188 |
146 // Direct offset into the lastMatchInfo array. | 189 // Direct offset into the lastMatchInfo array. |
147 static const int kLastCaptureCountOffset = | 190 static const int kLastCaptureCountOffset = |
148 FixedArray::kHeaderSize + kLastCaptureCount * kPointerSize; | 191 FixedArray::kHeaderSize + kLastCaptureCount * kPointerSize; |
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
188 | 231 |
189 // Limit the space regexps take up on the heap. In order to limit this we | 232 // Limit the space regexps take up on the heap. In order to limit this we |
190 // would like to keep track of the amount of regexp code on the heap. This | 233 // would like to keep track of the amount of regexp code on the heap. This |
191 // is not tracked, however. As a conservative approximation we track the | 234 // is not tracked, however. As a conservative approximation we track the |
192 // total regexp code compiled including code that has subsequently been freed | 235 // total regexp code compiled including code that has subsequently been freed |
193 // and the total executable memory at any point. | 236 // and the total executable memory at any point. |
194 static const int kRegExpExecutableMemoryLimit = 16 * MB; | 237 static const int kRegExpExecutableMemoryLimit = 16 * MB; |
195 static const int kRegWxpCompiledLimit = 1 * MB; | 238 static const int kRegWxpCompiledLimit = 1 * MB; |
196 | 239 |
197 private: | 240 private: |
198 static String* last_ascii_string_; | |
199 static String* two_byte_cached_string_; | |
200 | |
201 static bool CompileIrregexp( | 241 static bool CompileIrregexp( |
202 Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii); | 242 Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii); |
203 static inline bool EnsureCompiledIrregexp( | 243 static inline bool EnsureCompiledIrregexp( |
204 Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii); | 244 Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii); |
205 | |
206 | |
207 // Set the subject cache. The previous string buffer is not deleted, so the | |
208 // caller should ensure that it doesn't leak. | |
209 static void SetSubjectCache(String* subject, | |
210 char* utf8_subject, | |
211 int uft8_length, | |
212 int character_position, | |
213 int utf8_position); | |
214 | |
215 // A one element cache of the last utf8_subject string and its length. The | |
216 // subject JS String object is cached in the heap. We also cache a | |
217 // translation between position and utf8 position. | |
218 static char* utf8_subject_cache_; | |
219 static int utf8_length_cache_; | |
220 static int utf8_position_; | |
221 static int character_position_; | |
222 }; | 245 }; |
223 | 246 |
224 | 247 |
225 // Represents the location of one element relative to the intersection of | 248 // Represents the location of one element relative to the intersection of |
226 // two sets. Corresponds to the four areas of a Venn diagram. | 249 // two sets. Corresponds to the four areas of a Venn diagram. |
227 enum ElementInSetsRelation { | 250 enum ElementInSetsRelation { |
228 kInsideNone = 0, | 251 kInsideNone = 0, |
229 kInsideFirst = 1, | 252 kInsideFirst = 1, |
230 kInsideSecond = 2, | 253 kInsideSecond = 2, |
231 kInsideBoth = 3 | 254 kInsideBoth = 3 |
(...skipping 1383 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1615 bool global, | 1638 bool global, |
1616 bool multiline, | 1639 bool multiline, |
1617 Handle<String> pattern, | 1640 Handle<String> pattern, |
1618 Handle<String> sample_subject, | 1641 Handle<String> sample_subject, |
1619 bool is_ascii, Zone* zone); | 1642 bool is_ascii, Zone* zone); |
1620 | 1643 |
1621 static void DotPrint(const char* label, RegExpNode* node, bool ignore_case); | 1644 static void DotPrint(const char* label, RegExpNode* node, bool ignore_case); |
1622 }; | 1645 }; |
1623 | 1646 |
1624 | 1647 |
1625 class OffsetsVector { | |
1626 public: | |
1627 inline OffsetsVector(int num_registers, Isolate* isolate) | |
1628 : offsets_vector_length_(num_registers) { | |
1629 if (offsets_vector_length_ > Isolate::kJSRegexpStaticOffsetsVectorSize) { | |
1630 vector_ = NewArray<int>(offsets_vector_length_); | |
1631 } else { | |
1632 vector_ = isolate->jsregexp_static_offsets_vector(); | |
1633 } | |
1634 } | |
1635 inline ~OffsetsVector() { | |
1636 if (offsets_vector_length_ > Isolate::kJSRegexpStaticOffsetsVectorSize) { | |
1637 DeleteArray(vector_); | |
1638 vector_ = NULL; | |
1639 } | |
1640 } | |
1641 inline int* vector() { return vector_; } | |
1642 inline int length() { return offsets_vector_length_; } | |
1643 | |
1644 static const int kStaticOffsetsVectorSize = | |
1645 Isolate::kJSRegexpStaticOffsetsVectorSize; | |
1646 | |
1647 private: | |
1648 static Address static_offsets_vector_address(Isolate* isolate) { | |
1649 return reinterpret_cast<Address>(isolate->jsregexp_static_offsets_vector()); | |
1650 } | |
1651 | |
1652 int* vector_; | |
1653 int offsets_vector_length_; | |
1654 | |
1655 friend class ExternalReference; | |
1656 }; | |
1657 | |
1658 | |
1659 } } // namespace v8::internal | 1648 } } // namespace v8::internal |
1660 | 1649 |
1661 #endif // V8_JSREGEXP_H_ | 1650 #endif // V8_JSREGEXP_H_ |
OLD | NEW |