Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(150)

Side by Side Diff: src/jsregexp.h

Issue 10831126: Take advantage of batched results when matching global regexp. (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: Created 8 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/isolate.h ('k') | src/jsregexp.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after
86 Handle<String> pattern, 86 Handle<String> pattern,
87 JSRegExp::Flags flags, 87 JSRegExp::Flags flags,
88 int capture_register_count); 88 int capture_register_count);
89 89
90 90
91 static void AtomCompile(Handle<JSRegExp> re, 91 static void AtomCompile(Handle<JSRegExp> re,
92 Handle<String> pattern, 92 Handle<String> pattern,
93 JSRegExp::Flags flags, 93 JSRegExp::Flags flags,
94 Handle<String> match_pattern); 94 Handle<String> match_pattern);
95 95
96
97 static int AtomExecRaw(Handle<JSRegExp> regexp,
98 Handle<String> subject,
99 int index,
100 int32_t* output,
101 int output_size);
102
103
96 static Handle<Object> AtomExec(Handle<JSRegExp> regexp, 104 static Handle<Object> AtomExec(Handle<JSRegExp> regexp,
97 Handle<String> subject, 105 Handle<String> subject,
98 int index, 106 int index,
99 Handle<JSArray> lastMatchInfo); 107 Handle<JSArray> lastMatchInfo);
100 108
101 enum IrregexpResult { RE_FAILURE = 0, RE_SUCCESS = 1, RE_EXCEPTION = -1 }; 109 enum IrregexpResult { RE_FAILURE = 0, RE_SUCCESS = 1, RE_EXCEPTION = -1 };
102 110
103 // Prepare a RegExp for being executed one or more times (using 111 // Prepare a RegExp for being executed one or more times (using
104 // IrregexpExecOnce) on the subject. 112 // IrregexpExecOnce) on the subject.
105 // This ensures that the regexp is compiled for the subject, and that 113 // This ensures that the regexp is compiled for the subject, and that
106 // the subject is flat. 114 // the subject is flat.
107 // Returns the number of integer spaces required by IrregexpExecOnce 115 // Returns the number of integer spaces required by IrregexpExecOnce
108 // as its "registers" argument. If the regexp cannot be compiled, 116 // as its "registers" argument. If the regexp cannot be compiled,
109 // an exception is set as pending, and this function returns negative. 117 // an exception is set as pending, and this function returns negative.
110 static int IrregexpPrepare(Handle<JSRegExp> regexp, 118 static int IrregexpPrepare(Handle<JSRegExp> regexp,
111 Handle<String> subject); 119 Handle<String> subject);
112 120
113 // Calculate the size of offsets vector for the case of global regexp 121 // Calculate the size of offsets vector for the case of global regexp
114 // and the number of matches this vector is able to store. 122 // and the number of matches this vector is able to store.
115 static int GlobalOffsetsVectorSize(Handle<JSRegExp> regexp, 123 static int GlobalOffsetsVectorSize(Handle<JSRegExp> regexp,
ulan 2012/08/03 07:44:27 Looks like this function is not used anymore.
Yang 2012/08/03 11:37:42 Done.
116 int registers_per_match, 124 int registers_per_match,
117 int* max_matches); 125 int* max_matches);
118 126
119 // Execute a regular expression on the subject, starting from index. 127 // Execute a regular expression on the subject, starting from index.
120 // If matching succeeds, return the number of matches. This can be larger 128 // If matching succeeds, return the number of matches. This can be larger
121 // than one in the case of global regular expressions. 129 // than one in the case of global regular expressions.
122 // The captures and subcaptures are stored into the registers vector. 130 // The captures and subcaptures are stored into the registers vector.
123 // If matching fails, returns RE_FAILURE. 131 // If matching fails, returns RE_FAILURE.
124 // If execution fails, sets a pending exception and returns RE_EXCEPTION. 132 // If execution fails, sets a pending exception and returns RE_EXCEPTION.
125 static int IrregexpExecRaw(Handle<JSRegExp> regexp, 133 static int IrregexpExecRaw(Handle<JSRegExp> regexp,
126 Handle<String> subject, 134 Handle<String> subject,
127 int index, 135 int index,
128 Vector<int> registers); 136 int32_t* output,
137 int output_size);
129 138
130 // Execute an Irregexp bytecode pattern. 139 // Execute an Irregexp bytecode pattern.
131 // On a successful match, the result is a JSArray containing 140 // On a successful match, the result is a JSArray containing
132 // captured positions. On a failure, the result is the null value. 141 // captured positions. On a failure, the result is the null value.
133 // Returns an empty handle in case of an exception. 142 // Returns an empty handle in case of an exception.
134 static Handle<Object> IrregexpExec(Handle<JSRegExp> regexp, 143 static Handle<Object> IrregexpExec(Handle<JSRegExp> regexp,
135 Handle<String> subject, 144 Handle<String> subject,
136 int index, 145 int index,
137 Handle<JSArray> lastMatchInfo); 146 Handle<JSArray> lastMatchInfo);
138 147
148 // Set last match info. If match is NULL, then setting captures is omitted.
ulan 2012/08/03 07:44:27 Double spaces in the comments for the functions be
Yang 2012/08/03 11:37:42 Done.
149 static Handle<JSArray> SetLastMatchInfo(Handle<JSArray> last_match_info,
150 Handle<String> subject,
151 int capture_count,
152 int32_t* match);
153
154 // Initialize a run of global regexp matching. Return either
155 // RE_SUCCESS or RE_EXCEPTION.
156 static int GlobalCacheInitialize(Handle<JSRegExp> regexp,
157 Handle<String> subject,
158 bool is_global,
159 Isolate* isolate);
160
161 // Fetch the next entry in the cache for global regexp match results.
162 // This does not set the last match info. Upon failure, NULL is returned.
163 // The cause can be checked with GlobalCacheResult(). The previous
164 // result is still in available in memory when a failure happens.
165 INLINE(static int32_t* GlobalCacheFetchNext(Handle<JSRegExp> regexp,
ulan 2012/08/03 07:44:27 Can we put this function in .cc file, it seems to
Yang 2012/08/03 11:37:42 Done.
166 Handle<String> subject)) {
167 global_cache_current_match_index_++;
168 // The cache must not be clobbered.
ulan 2012/08/03 07:44:27 Stale comment?
Yang 2012/08/03 11:37:42 Done.
169 ASSERT(global_cache_register_array_ != NULL);
170 if (global_cache_current_match_index_ >= global_cache_num_matches_) {
171 // Current batch of results exhausted.
172 // Fail if last batch was not even fully filled.
173 if (global_cache_num_matches_ < global_cache_max_matches_) {
174 global_cache_num_matches_ = 0;
175 return NULL;
176 }
177
178 int32_t* last_match =
179 &global_cache_register_array_[global_cache_register_array_size_ -
180 global_cache_registers_per_match_];
181 int last_end_index = last_match[1];
182
183 if (global_cache_current_type_ == JSRegExp::ATOM) {
184 global_cache_num_matches_ =
185 RegExpImpl::AtomExecRaw(regexp,
186 subject,
187 last_end_index,
188 global_cache_register_array_,
189 global_cache_register_array_size_);
190 } else {
191 int last_start_index = last_match[0];
192 if (last_start_index == last_end_index) last_end_index++;
193 if (last_end_index > subject->length()) {
194 global_cache_num_matches_ = 0;
195 return NULL;
196 }
197 global_cache_num_matches_ =
198 RegExpImpl::IrregexpExecRaw(regexp,
199 subject,
200 last_end_index,
201 global_cache_register_array_,
202 global_cache_register_array_size_);
203 }
204
205 if (global_cache_num_matches_ <= 0) return NULL;
206 global_cache_current_match_index_ = 0;
207 return global_cache_register_array_;
208 } else {
209 return &global_cache_register_array_[
210 global_cache_current_match_index_ *
211 global_cache_registers_per_match_];
212 }
213 }
214
215 INLINE(static IrregexpResult GlobalCacheResult()) {
216 if (global_cache_num_matches_ > 0) return RE_SUCCESS;
217 if (global_cache_num_matches_ == 0) return RE_FAILURE;
218 return RE_EXCEPTION;
219 }
220
221
139 // Array index in the lastMatchInfo array. 222 // Array index in the lastMatchInfo array.
140 static const int kLastCaptureCount = 0; 223 static const int kLastCaptureCount = 0;
141 static const int kLastSubject = 1; 224 static const int kLastSubject = 1;
142 static const int kLastInput = 2; 225 static const int kLastInput = 2;
143 static const int kFirstCapture = 3; 226 static const int kFirstCapture = 3;
144 static const int kLastMatchOverhead = 3; 227 static const int kLastMatchOverhead = 3;
145 228
146 // Direct offset into the lastMatchInfo array. 229 // Direct offset into the lastMatchInfo array.
147 static const int kLastCaptureCountOffset = 230 static const int kLastCaptureCountOffset =
148 FixedArray::kHeaderSize + kLastCaptureCount * kPointerSize; 231 FixedArray::kHeaderSize + kLastCaptureCount * kPointerSize;
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
188 271
189 // Limit the space regexps take up on the heap. In order to limit this we 272 // Limit the space regexps take up on the heap. In order to limit this we
190 // would like to keep track of the amount of regexp code on the heap. This 273 // would like to keep track of the amount of regexp code on the heap. This
191 // is not tracked, however. As a conservative approximation we track the 274 // is not tracked, however. As a conservative approximation we track the
192 // total regexp code compiled including code that has subsequently been freed 275 // total regexp code compiled including code that has subsequently been freed
193 // and the total executable memory at any point. 276 // and the total executable memory at any point.
194 static const int kRegExpExecutableMemoryLimit = 16 * MB; 277 static const int kRegExpExecutableMemoryLimit = 16 * MB;
195 static const int kRegWxpCompiledLimit = 1 * MB; 278 static const int kRegWxpCompiledLimit = 1 * MB;
196 279
197 private: 280 private:
198 static String* last_ascii_string_;
199 static String* two_byte_cached_string_;
200
201 static bool CompileIrregexp( 281 static bool CompileIrregexp(
202 Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii); 282 Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii);
203 static inline bool EnsureCompiledIrregexp( 283 static inline bool EnsureCompiledIrregexp(
204 Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii); 284 Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii);
205 285
206 286 // State of the global cache.
207 // Set the subject cache. The previous string buffer is not deleted, so the 287 static int global_cache_num_matches_;
208 // caller should ensure that it doesn't leak. 288 static int global_cache_max_matches_;
209 static void SetSubjectCache(String* subject, 289 static int global_cache_current_match_index_;
210 char* utf8_subject, 290 static int global_cache_registers_per_match_;
211 int uft8_length, 291 // Pointer to the last set of captures.
212 int character_position, 292 static JSRegExp::Type global_cache_current_type_;
213 int utf8_position); 293 static int32_t* global_cache_register_array_;
214 294 static int global_cache_register_array_size_;
215 // A one element cache of the last utf8_subject string and its length. The
216 // subject JS String object is cached in the heap. We also cache a
217 // translation between position and utf8 position.
218 static char* utf8_subject_cache_;
219 static int utf8_length_cache_;
220 static int utf8_position_;
221 static int character_position_;
222 }; 295 };
223 296
224 297
225 // Represents the location of one element relative to the intersection of 298 // Represents the location of one element relative to the intersection of
226 // two sets. Corresponds to the four areas of a Venn diagram. 299 // two sets. Corresponds to the four areas of a Venn diagram.
227 enum ElementInSetsRelation { 300 enum ElementInSetsRelation {
228 kInsideNone = 0, 301 kInsideNone = 0,
229 kInsideFirst = 1, 302 kInsideFirst = 1,
230 kInsideSecond = 2, 303 kInsideSecond = 2,
231 kInsideBoth = 3 304 kInsideBoth = 3
(...skipping 1383 matching lines...) Expand 10 before | Expand all | Expand 10 after
1615 bool global, 1688 bool global,
1616 bool multiline, 1689 bool multiline,
1617 Handle<String> pattern, 1690 Handle<String> pattern,
1618 Handle<String> sample_subject, 1691 Handle<String> sample_subject,
1619 bool is_ascii, Zone* zone); 1692 bool is_ascii, Zone* zone);
1620 1693
1621 static void DotPrint(const char* label, RegExpNode* node, bool ignore_case); 1694 static void DotPrint(const char* label, RegExpNode* node, bool ignore_case);
1622 }; 1695 };
1623 1696
1624 1697
1625 class OffsetsVector {
1626 public:
1627 inline OffsetsVector(int num_registers, Isolate* isolate)
1628 : offsets_vector_length_(num_registers) {
1629 if (offsets_vector_length_ > Isolate::kJSRegexpStaticOffsetsVectorSize) {
1630 vector_ = NewArray<int>(offsets_vector_length_);
1631 } else {
1632 vector_ = isolate->jsregexp_static_offsets_vector();
1633 }
1634 }
1635 inline ~OffsetsVector() {
1636 if (offsets_vector_length_ > Isolate::kJSRegexpStaticOffsetsVectorSize) {
1637 DeleteArray(vector_);
1638 vector_ = NULL;
1639 }
1640 }
1641 inline int* vector() { return vector_; }
1642 inline int length() { return offsets_vector_length_; }
1643
1644 static const int kStaticOffsetsVectorSize =
1645 Isolate::kJSRegexpStaticOffsetsVectorSize;
1646
1647 private:
1648 static Address static_offsets_vector_address(Isolate* isolate) {
1649 return reinterpret_cast<Address>(isolate->jsregexp_static_offsets_vector());
1650 }
1651
1652 int* vector_;
1653 int offsets_vector_length_;
1654
1655 friend class ExternalReference;
1656 };
1657
1658
1659 } } // namespace v8::internal 1698 } } // namespace v8::internal
1660 1699
1661 #endif // V8_JSREGEXP_H_ 1700 #endif // V8_JSREGEXP_H_
OLDNEW
« no previous file with comments | « src/isolate.h ('k') | src/jsregexp.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698