Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(114)

Unified Diff: src/jsregexp.h

Issue 10831126: Take advantage of batched results when matching global regexp. (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: Created 8 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/isolate.h ('k') | src/jsregexp.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/jsregexp.h
diff --git a/src/jsregexp.h b/src/jsregexp.h
index 9a84237fde95ba829715e03d6947acd7444959e9..4a7e493dbe1994d15decaf3be005b5970fd55536 100644
--- a/src/jsregexp.h
+++ b/src/jsregexp.h
@@ -93,6 +93,14 @@ class RegExpImpl {
JSRegExp::Flags flags,
Handle<String> match_pattern);
+
+ static int AtomExecRaw(Handle<JSRegExp> regexp,
+ Handle<String> subject,
+ int index,
+ int32_t* output,
+ int output_size);
+
+
static Handle<Object> AtomExec(Handle<JSRegExp> regexp,
Handle<String> subject,
int index,
@@ -125,7 +133,8 @@ class RegExpImpl {
static int IrregexpExecRaw(Handle<JSRegExp> regexp,
Handle<String> subject,
int index,
- Vector<int> registers);
+ int32_t* output,
+ int output_size);
// Execute an Irregexp bytecode pattern.
// On a successful match, the result is a JSArray containing
@@ -136,6 +145,80 @@ class RegExpImpl {
int index,
Handle<JSArray> lastMatchInfo);
+ // Set last match info. If match is NULL, then setting captures is omitted.
ulan 2012/08/03 07:44:27 Double spaces in the comments for the functions be
Yang 2012/08/03 11:37:42 Done.
+ static Handle<JSArray> SetLastMatchInfo(Handle<JSArray> last_match_info,
+ Handle<String> subject,
+ int capture_count,
+ int32_t* match);
+
+ // Initialize a run of global regexp matching. Return either
+ // RE_SUCCESS or RE_EXCEPTION.
+ static int GlobalCacheInitialize(Handle<JSRegExp> regexp,
+ Handle<String> subject,
+ bool is_global,
+ Isolate* isolate);
+
+ // Fetch the next entry in the cache for global regexp match results.
+ // This does not set the last match info. Upon failure, NULL is returned.
+ // The cause can be checked with GlobalCacheResult(). The previous
+ // result is still in available in memory when a failure happens.
+ INLINE(static int32_t* GlobalCacheFetchNext(Handle<JSRegExp> regexp,
ulan 2012/08/03 07:44:27 Can we put this function in .cc file, it seems to
Yang 2012/08/03 11:37:42 Done.
+ Handle<String> subject)) {
+ global_cache_current_match_index_++;
+ // The cache must not be clobbered.
ulan 2012/08/03 07:44:27 Stale comment?
Yang 2012/08/03 11:37:42 Done.
+ ASSERT(global_cache_register_array_ != NULL);
+ if (global_cache_current_match_index_ >= global_cache_num_matches_) {
+ // Current batch of results exhausted.
+ // Fail if last batch was not even fully filled.
+ if (global_cache_num_matches_ < global_cache_max_matches_) {
+ global_cache_num_matches_ = 0;
+ return NULL;
+ }
+
+ int32_t* last_match =
+ &global_cache_register_array_[global_cache_register_array_size_ -
+ global_cache_registers_per_match_];
+ int last_end_index = last_match[1];
+
+ if (global_cache_current_type_ == JSRegExp::ATOM) {
+ global_cache_num_matches_ =
+ RegExpImpl::AtomExecRaw(regexp,
+ subject,
+ last_end_index,
+ global_cache_register_array_,
+ global_cache_register_array_size_);
+ } else {
+ int last_start_index = last_match[0];
+ if (last_start_index == last_end_index) last_end_index++;
+ if (last_end_index > subject->length()) {
+ global_cache_num_matches_ = 0;
+ return NULL;
+ }
+ global_cache_num_matches_ =
+ RegExpImpl::IrregexpExecRaw(regexp,
+ subject,
+ last_end_index,
+ global_cache_register_array_,
+ global_cache_register_array_size_);
+ }
+
+ if (global_cache_num_matches_ <= 0) return NULL;
+ global_cache_current_match_index_ = 0;
+ return global_cache_register_array_;
+ } else {
+ return &global_cache_register_array_[
+ global_cache_current_match_index_ *
+ global_cache_registers_per_match_];
+ }
+ }
+
+ INLINE(static IrregexpResult GlobalCacheResult()) {
+ if (global_cache_num_matches_ > 0) return RE_SUCCESS;
+ if (global_cache_num_matches_ == 0) return RE_FAILURE;
+ return RE_EXCEPTION;
+ }
+
+
// Array index in the lastMatchInfo array.
static const int kLastCaptureCount = 0;
static const int kLastSubject = 1;
@@ -195,30 +278,20 @@ class RegExpImpl {
static const int kRegWxpCompiledLimit = 1 * MB;
private:
- static String* last_ascii_string_;
- static String* two_byte_cached_string_;
-
static bool CompileIrregexp(
Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii);
static inline bool EnsureCompiledIrregexp(
Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii);
-
- // Set the subject cache. The previous string buffer is not deleted, so the
- // caller should ensure that it doesn't leak.
- static void SetSubjectCache(String* subject,
- char* utf8_subject,
- int uft8_length,
- int character_position,
- int utf8_position);
-
- // A one element cache of the last utf8_subject string and its length. The
- // subject JS String object is cached in the heap. We also cache a
- // translation between position and utf8 position.
- static char* utf8_subject_cache_;
- static int utf8_length_cache_;
- static int utf8_position_;
- static int character_position_;
+ // State of the global cache.
+ static int global_cache_num_matches_;
+ static int global_cache_max_matches_;
+ static int global_cache_current_match_index_;
+ static int global_cache_registers_per_match_;
+ // Pointer to the last set of captures.
+ static JSRegExp::Type global_cache_current_type_;
+ static int32_t* global_cache_register_array_;
+ static int global_cache_register_array_size_;
};
@@ -1622,40 +1695,6 @@ class RegExpEngine: public AllStatic {
};
-class OffsetsVector {
- public:
- inline OffsetsVector(int num_registers, Isolate* isolate)
- : offsets_vector_length_(num_registers) {
- if (offsets_vector_length_ > Isolate::kJSRegexpStaticOffsetsVectorSize) {
- vector_ = NewArray<int>(offsets_vector_length_);
- } else {
- vector_ = isolate->jsregexp_static_offsets_vector();
- }
- }
- inline ~OffsetsVector() {
- if (offsets_vector_length_ > Isolate::kJSRegexpStaticOffsetsVectorSize) {
- DeleteArray(vector_);
- vector_ = NULL;
- }
- }
- inline int* vector() { return vector_; }
- inline int length() { return offsets_vector_length_; }
-
- static const int kStaticOffsetsVectorSize =
- Isolate::kJSRegexpStaticOffsetsVectorSize;
-
- private:
- static Address static_offsets_vector_address(Isolate* isolate) {
- return reinterpret_cast<Address>(isolate->jsregexp_static_offsets_vector());
- }
-
- int* vector_;
- int offsets_vector_length_;
-
- friend class ExternalReference;
-};
-
-
} } // namespace v8::internal
#endif // V8_JSREGEXP_H_
« no previous file with comments | « src/isolate.h ('k') | src/jsregexp.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698