Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(202)

Side by Side Diff: src/jsregexp.cc

Issue 7348008: Merge up to 8597 to experimental/gc from the bleeding edge. (Closed) Base URL: http://v8.googlecode.com/svn/branches/experimental/gc/
Patch Set: '' Created 9 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/jsregexp.h ('k') | src/lithium.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2009 the V8 project authors. All rights reserved. 1 // Copyright 2006-2009 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 109 matching lines...) Expand 10 before | Expand all | Expand 10 after
120 Handle<FixedArray> cached = compilation_cache->LookupRegExp(pattern, flags); 120 Handle<FixedArray> cached = compilation_cache->LookupRegExp(pattern, flags);
121 bool in_cache = !cached.is_null(); 121 bool in_cache = !cached.is_null();
122 LOG(isolate, RegExpCompileEvent(re, in_cache)); 122 LOG(isolate, RegExpCompileEvent(re, in_cache));
123 123
124 Handle<Object> result; 124 Handle<Object> result;
125 if (in_cache) { 125 if (in_cache) {
126 re->set_data(*cached); 126 re->set_data(*cached);
127 return re; 127 return re;
128 } 128 }
129 pattern = FlattenGetString(pattern); 129 pattern = FlattenGetString(pattern);
130 CompilationZoneScope zone_scope(isolate, DELETE_ON_EXIT); 130 ZoneScope zone_scope(isolate, DELETE_ON_EXIT);
131 PostponeInterruptsScope postpone(isolate); 131 PostponeInterruptsScope postpone(isolate);
132 RegExpCompileData parse_result; 132 RegExpCompileData parse_result;
133 FlatStringReader reader(isolate, pattern); 133 FlatStringReader reader(isolate, pattern);
134 if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(), 134 if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(),
135 &parse_result)) { 135 &parse_result)) {
136 // Throw an exception if we fail to parse the pattern. 136 // Throw an exception if we fail to parse the pattern.
137 ThrowRegExpException(re, 137 ThrowRegExpException(re,
138 pattern, 138 pattern,
139 parse_result.error, 139 parse_result.error,
140 "malformed_regexp"); 140 "malformed_regexp");
(...skipping 147 matching lines...) Expand 10 before | Expand all | Expand 10 after
288 // from the source pattern. 288 // from the source pattern.
289 // If compilation fails, an exception is thrown and this function 289 // If compilation fails, an exception is thrown and this function
290 // returns false. 290 // returns false.
291 bool RegExpImpl::EnsureCompiledIrregexp(Handle<JSRegExp> re, bool is_ascii) { 291 bool RegExpImpl::EnsureCompiledIrregexp(Handle<JSRegExp> re, bool is_ascii) {
292 Object* compiled_code = re->DataAt(JSRegExp::code_index(is_ascii)); 292 Object* compiled_code = re->DataAt(JSRegExp::code_index(is_ascii));
293 #ifdef V8_INTERPRETED_REGEXP 293 #ifdef V8_INTERPRETED_REGEXP
294 if (compiled_code->IsByteArray()) return true; 294 if (compiled_code->IsByteArray()) return true;
295 #else // V8_INTERPRETED_REGEXP (RegExp native code) 295 #else // V8_INTERPRETED_REGEXP (RegExp native code)
296 if (compiled_code->IsCode()) return true; 296 if (compiled_code->IsCode()) return true;
297 #endif 297 #endif
298 // We could potentially have marked this as flushable, but have kept
299 // a saved version if we did not flush it yet.
300 Object* saved_code = re->DataAt(JSRegExp::saved_code_index(is_ascii));
301 if (saved_code->IsCode()) {
302 // Reinstate the code in the original place.
303 re->SetDataAt(JSRegExp::code_index(is_ascii), saved_code);
304 ASSERT(compiled_code->IsSmi());
305 return true;
306 }
298 return CompileIrregexp(re, is_ascii); 307 return CompileIrregexp(re, is_ascii);
299 } 308 }
300 309
301 310
311 static bool CreateRegExpErrorObjectAndThrow(Handle<JSRegExp> re,
312 bool is_ascii,
313 Handle<String> error_message,
314 Isolate* isolate) {
315 Factory* factory = isolate->factory();
316 Handle<FixedArray> elements = factory->NewFixedArray(2);
317 elements->set(0, re->Pattern());
318 elements->set(1, *error_message);
319 Handle<JSArray> array = factory->NewJSArrayWithElements(elements);
320 Handle<Object> regexp_err =
321 factory->NewSyntaxError("malformed_regexp", array);
322 isolate->Throw(*regexp_err);
323 return false;
324 }
325
326
302 bool RegExpImpl::CompileIrregexp(Handle<JSRegExp> re, bool is_ascii) { 327 bool RegExpImpl::CompileIrregexp(Handle<JSRegExp> re, bool is_ascii) {
303 // Compile the RegExp. 328 // Compile the RegExp.
304 Isolate* isolate = re->GetIsolate(); 329 Isolate* isolate = re->GetIsolate();
305 CompilationZoneScope zone_scope(isolate, DELETE_ON_EXIT); 330 ZoneScope zone_scope(isolate, DELETE_ON_EXIT);
306 PostponeInterruptsScope postpone(isolate); 331 PostponeInterruptsScope postpone(isolate);
332 // If we had a compilation error the last time this is saved at the
333 // saved code index.
307 Object* entry = re->DataAt(JSRegExp::code_index(is_ascii)); 334 Object* entry = re->DataAt(JSRegExp::code_index(is_ascii));
308 if (entry->IsJSObject()) { 335 // When arriving here entry can only be a smi, either representing an
309 // If it's a JSObject, a previous compilation failed and threw this object. 336 // uncompiled regexp, a previous compilation error, or code that has
310 // Re-throw the object without trying again. 337 // been flushed.
311 isolate->Throw(entry); 338 ASSERT(entry->IsSmi());
339 int entry_value = Smi::cast(entry)->value();
340 ASSERT(entry_value == JSRegExp::kUninitializedValue ||
341 entry_value == JSRegExp::kCompilationErrorValue ||
342 (entry_value < JSRegExp::kCodeAgeMask && entry_value >= 0));
343
344 if (entry_value == JSRegExp::kCompilationErrorValue) {
345 // A previous compilation failed and threw an error which we store in
346 // the saved code index (we store the error message, not the actual
347 // error). Recreate the error object and throw it.
348 Object* error_string = re->DataAt(JSRegExp::saved_code_index(is_ascii));
349 ASSERT(error_string->IsString());
350 Handle<String> error_message(String::cast(error_string));
351 CreateRegExpErrorObjectAndThrow(re, is_ascii, error_message, isolate);
312 return false; 352 return false;
313 } 353 }
314 ASSERT(entry->IsTheHole());
315 354
316 JSRegExp::Flags flags = re->GetFlags(); 355 JSRegExp::Flags flags = re->GetFlags();
317 356
318 Handle<String> pattern(re->Pattern()); 357 Handle<String> pattern(re->Pattern());
319 if (!pattern->IsFlat()) { 358 if (!pattern->IsFlat()) {
320 FlattenString(pattern); 359 FlattenString(pattern);
321 } 360 }
322 361
323 RegExpCompileData compile_data; 362 RegExpCompileData compile_data;
324 FlatStringReader reader(isolate, pattern); 363 FlatStringReader reader(isolate, pattern);
325 if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(), 364 if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(),
326 &compile_data)) { 365 &compile_data)) {
327 // Throw an exception if we fail to parse the pattern. 366 // Throw an exception if we fail to parse the pattern.
328 // THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once. 367 // THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once.
329 ThrowRegExpException(re, 368 ThrowRegExpException(re,
330 pattern, 369 pattern,
331 compile_data.error, 370 compile_data.error,
332 "malformed_regexp"); 371 "malformed_regexp");
333 return false; 372 return false;
334 } 373 }
335 RegExpEngine::CompilationResult result = 374 RegExpEngine::CompilationResult result =
336 RegExpEngine::Compile(&compile_data, 375 RegExpEngine::Compile(&compile_data,
337 flags.is_ignore_case(), 376 flags.is_ignore_case(),
338 flags.is_multiline(), 377 flags.is_multiline(),
339 pattern, 378 pattern,
340 is_ascii); 379 is_ascii);
341 if (result.error_message != NULL) { 380 if (result.error_message != NULL) {
342 // Unable to compile regexp. 381 // Unable to compile regexp.
343 Factory* factory = isolate->factory();
344 Handle<FixedArray> elements = factory->NewFixedArray(2);
345 elements->set(0, *pattern);
346 Handle<String> error_message = 382 Handle<String> error_message =
347 factory->NewStringFromUtf8(CStrVector(result.error_message)); 383 isolate->factory()->NewStringFromUtf8(CStrVector(result.error_message));
348 elements->set(1, *error_message); 384 CreateRegExpErrorObjectAndThrow(re, is_ascii, error_message, isolate);
349 Handle<JSArray> array = factory->NewJSArrayWithElements(elements);
350 Handle<Object> regexp_err =
351 factory->NewSyntaxError("malformed_regexp", array);
352 isolate->Throw(*regexp_err);
353 re->SetDataAt(JSRegExp::code_index(is_ascii), *regexp_err);
354 return false; 385 return false;
355 } 386 }
356 387
357 Handle<FixedArray> data = Handle<FixedArray>(FixedArray::cast(re->data())); 388 Handle<FixedArray> data = Handle<FixedArray>(FixedArray::cast(re->data()));
358 data->set(JSRegExp::code_index(is_ascii), result.code); 389 data->set(JSRegExp::code_index(is_ascii), result.code);
359 int register_max = IrregexpMaxRegisterCount(*data); 390 int register_max = IrregexpMaxRegisterCount(*data);
360 if (result.num_registers > register_max) { 391 if (result.num_registers > register_max) {
361 SetIrregexpMaxRegisterCount(*data, result.num_registers); 392 SetIrregexpMaxRegisterCount(*data, result.num_registers);
362 } 393 }
363 394
(...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after
453 484
454 // A flat ASCII string might have a two-byte first part. 485 // A flat ASCII string might have a two-byte first part.
455 if (subject->IsConsString()) { 486 if (subject->IsConsString()) {
456 subject = Handle<String>(ConsString::cast(*subject)->first(), isolate); 487 subject = Handle<String>(ConsString::cast(*subject)->first(), isolate);
457 } 488 }
458 489
459 #ifndef V8_INTERPRETED_REGEXP 490 #ifndef V8_INTERPRETED_REGEXP
460 ASSERT(output.length() >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2); 491 ASSERT(output.length() >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2);
461 do { 492 do {
462 bool is_ascii = subject->IsAsciiRepresentation(); 493 bool is_ascii = subject->IsAsciiRepresentation();
494 EnsureCompiledIrregexp(regexp, is_ascii);
463 Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii), isolate); 495 Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii), isolate);
464 NativeRegExpMacroAssembler::Result res = 496 NativeRegExpMacroAssembler::Result res =
465 NativeRegExpMacroAssembler::Match(code, 497 NativeRegExpMacroAssembler::Match(code,
466 subject, 498 subject,
467 output.start(), 499 output.start(),
468 output.length(), 500 output.length(),
469 index, 501 index,
470 isolate); 502 isolate);
471 if (res != NativeRegExpMacroAssembler::RETRY) { 503 if (res != NativeRegExpMacroAssembler::RETRY) {
472 ASSERT(res != NativeRegExpMacroAssembler::EXCEPTION || 504 ASSERT(res != NativeRegExpMacroAssembler::EXCEPTION ||
(...skipping 330 matching lines...) Expand 10 before | Expand all | Expand 10 after
803 static const int kMaxRecursion = 100; 835 static const int kMaxRecursion = 100;
804 inline int recursion_depth() { return recursion_depth_; } 836 inline int recursion_depth() { return recursion_depth_; }
805 inline void IncrementRecursionDepth() { recursion_depth_++; } 837 inline void IncrementRecursionDepth() { recursion_depth_++; }
806 inline void DecrementRecursionDepth() { recursion_depth_--; } 838 inline void DecrementRecursionDepth() { recursion_depth_--; }
807 839
808 void SetRegExpTooBig() { reg_exp_too_big_ = true; } 840 void SetRegExpTooBig() { reg_exp_too_big_ = true; }
809 841
810 inline bool ignore_case() { return ignore_case_; } 842 inline bool ignore_case() { return ignore_case_; }
811 inline bool ascii() { return ascii_; } 843 inline bool ascii() { return ascii_; }
812 844
845 int current_expansion_factor() { return current_expansion_factor_; }
846 void set_current_expansion_factor(int value) {
847 current_expansion_factor_ = value;
848 }
849
813 static const int kNoRegister = -1; 850 static const int kNoRegister = -1;
851
814 private: 852 private:
815 EndNode* accept_; 853 EndNode* accept_;
816 int next_register_; 854 int next_register_;
817 List<RegExpNode*>* work_list_; 855 List<RegExpNode*>* work_list_;
818 int recursion_depth_; 856 int recursion_depth_;
819 RegExpMacroAssembler* macro_assembler_; 857 RegExpMacroAssembler* macro_assembler_;
820 bool ignore_case_; 858 bool ignore_case_;
821 bool ascii_; 859 bool ascii_;
822 bool reg_exp_too_big_; 860 bool reg_exp_too_big_;
861 int current_expansion_factor_;
823 }; 862 };
824 863
825 864
826 class RecursionCheck { 865 class RecursionCheck {
827 public: 866 public:
828 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) { 867 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) {
829 compiler->IncrementRecursionDepth(); 868 compiler->IncrementRecursionDepth();
830 } 869 }
831 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); } 870 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); }
832 private: 871 private:
833 RegExpCompiler* compiler_; 872 RegExpCompiler* compiler_;
834 }; 873 };
835 874
836 875
837 static RegExpEngine::CompilationResult IrregexpRegExpTooBig() { 876 static RegExpEngine::CompilationResult IrregexpRegExpTooBig() {
838 return RegExpEngine::CompilationResult("RegExp too big"); 877 return RegExpEngine::CompilationResult("RegExp too big");
839 } 878 }
840 879
841 880
842 // Attempts to compile the regexp using an Irregexp code generator. Returns 881 // Attempts to compile the regexp using an Irregexp code generator. Returns
843 // a fixed array or a null handle depending on whether it succeeded. 882 // a fixed array or a null handle depending on whether it succeeded.
844 RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case, bool ascii) 883 RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case, bool ascii)
845 : next_register_(2 * (capture_count + 1)), 884 : next_register_(2 * (capture_count + 1)),
846 work_list_(NULL), 885 work_list_(NULL),
847 recursion_depth_(0), 886 recursion_depth_(0),
848 ignore_case_(ignore_case), 887 ignore_case_(ignore_case),
849 ascii_(ascii), 888 ascii_(ascii),
850 reg_exp_too_big_(false) { 889 reg_exp_too_big_(false),
890 current_expansion_factor_(1) {
851 accept_ = new EndNode(EndNode::ACCEPT); 891 accept_ = new EndNode(EndNode::ACCEPT);
852 ASSERT(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister); 892 ASSERT(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister);
853 } 893 }
854 894
855 895
856 RegExpEngine::CompilationResult RegExpCompiler::Assemble( 896 RegExpEngine::CompilationResult RegExpCompiler::Assemble(
857 RegExpMacroAssembler* macro_assembler, 897 RegExpMacroAssembler* macro_assembler,
858 RegExpNode* start, 898 RegExpNode* start,
859 int capture_count, 899 int capture_count,
860 Handle<String> pattern) { 900 Handle<String> pattern) {
(...skipping 1899 matching lines...) Expand 10 before | Expand all | Expand 10 after
2760 ~AlternativeGenerationList() { 2800 ~AlternativeGenerationList() {
2761 for (int i = kAFew; i < alt_gens_.length(); i++) { 2801 for (int i = kAFew; i < alt_gens_.length(); i++) {
2762 delete alt_gens_[i]; 2802 delete alt_gens_[i];
2763 alt_gens_[i] = NULL; 2803 alt_gens_[i] = NULL;
2764 } 2804 }
2765 } 2805 }
2766 2806
2767 AlternativeGeneration* at(int i) { 2807 AlternativeGeneration* at(int i) {
2768 return alt_gens_[i]; 2808 return alt_gens_[i];
2769 } 2809 }
2810
2770 private: 2811 private:
2771 static const int kAFew = 10; 2812 static const int kAFew = 10;
2772 ZoneList<AlternativeGeneration*> alt_gens_; 2813 ZoneList<AlternativeGeneration*> alt_gens_;
2773 AlternativeGeneration a_few_alt_gens_[kAFew]; 2814 AlternativeGeneration a_few_alt_gens_[kAFew];
2774 }; 2815 };
2775 2816
2776 2817
2777 /* Code generation for choice nodes. 2818 /* Code generation for choice nodes.
2778 * 2819 *
2779 * We generate quick checks that do a mask and compare to eliminate a 2820 * We generate quick checks that do a mask and compare to eliminate a
(...skipping 539 matching lines...) Expand 10 before | Expand all | Expand 10 after
3319 int priority = 0; 3360 int priority = 0;
3320 for (unsigned i = 0; i < OutSet::kFirstLimit; i++) { 3361 for (unsigned i = 0; i < OutSet::kFirstLimit; i++) {
3321 if (out_set->Get(i)) { 3362 if (out_set->Get(i)) {
3322 if (priority > 0) stream()->Add("|"); 3363 if (priority > 0) stream()->Add("|");
3323 stream()->Add("<s%io%i> %i", from, i, priority); 3364 stream()->Add("<s%io%i> %i", from, i, priority);
3324 priority++; 3365 priority++;
3325 } 3366 }
3326 } 3367 }
3327 stream()->Add("}}"); 3368 stream()->Add("}}");
3328 } 3369 }
3370
3329 private: 3371 private:
3330 bool first_; 3372 bool first_;
3331 StringStream* stream() { return stream_; } 3373 StringStream* stream() { return stream_; }
3332 StringStream* stream_; 3374 StringStream* stream_;
3333 }; 3375 };
3334 3376
3335 3377
3336 class AttributePrinter { 3378 class AttributePrinter {
3337 public: 3379 public:
3338 explicit AttributePrinter(DotPrinter* out) 3380 explicit AttributePrinter(DotPrinter* out)
(...skipping 381 matching lines...) Expand 10 before | Expand all | Expand 10 after
3720 RegExpNode* on_success) { 3762 RegExpNode* on_success) {
3721 return ToNode(min(), 3763 return ToNode(min(),
3722 max(), 3764 max(),
3723 is_greedy(), 3765 is_greedy(),
3724 body(), 3766 body(),
3725 compiler, 3767 compiler,
3726 on_success); 3768 on_success);
3727 } 3769 }
3728 3770
3729 3771
3772 // Scoped object to keep track of how much we unroll quantifier loops in the
3773 // regexp graph generator.
3774 class RegExpExpansionLimiter {
3775 public:
3776 static const int kMaxExpansionFactor = 6;
3777 RegExpExpansionLimiter(RegExpCompiler* compiler, int factor)
3778 : compiler_(compiler),
3779 saved_expansion_factor_(compiler->current_expansion_factor()),
3780 ok_to_expand_(saved_expansion_factor_ <= kMaxExpansionFactor) {
3781 ASSERT(factor > 0);
3782 if (ok_to_expand_) {
3783 if (factor > kMaxExpansionFactor) {
3784 // Avoid integer overflow of the current expansion factor.
3785 ok_to_expand_ = false;
3786 compiler->set_current_expansion_factor(kMaxExpansionFactor + 1);
3787 } else {
3788 int new_factor = saved_expansion_factor_ * factor;
3789 ok_to_expand_ = (new_factor <= kMaxExpansionFactor);
3790 compiler->set_current_expansion_factor(new_factor);
3791 }
3792 }
3793 }
3794
3795 ~RegExpExpansionLimiter() {
3796 compiler_->set_current_expansion_factor(saved_expansion_factor_);
3797 }
3798
3799 bool ok_to_expand() { return ok_to_expand_; }
3800
3801 private:
3802 RegExpCompiler* compiler_;
3803 int saved_expansion_factor_;
3804 bool ok_to_expand_;
3805
3806 DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpExpansionLimiter);
3807 };
3808
3809
3730 RegExpNode* RegExpQuantifier::ToNode(int min, 3810 RegExpNode* RegExpQuantifier::ToNode(int min,
3731 int max, 3811 int max,
3732 bool is_greedy, 3812 bool is_greedy,
3733 RegExpTree* body, 3813 RegExpTree* body,
3734 RegExpCompiler* compiler, 3814 RegExpCompiler* compiler,
3735 RegExpNode* on_success, 3815 RegExpNode* on_success,
3736 bool not_at_start) { 3816 bool not_at_start) {
3737 // x{f, t} becomes this: 3817 // x{f, t} becomes this:
3738 // 3818 //
3739 // (r++)<-. 3819 // (r++)<-.
(...skipping 19 matching lines...) Expand all
3759 if (max == 0) return on_success; // This can happen due to recursion. 3839 if (max == 0) return on_success; // This can happen due to recursion.
3760 bool body_can_be_empty = (body->min_match() == 0); 3840 bool body_can_be_empty = (body->min_match() == 0);
3761 int body_start_reg = RegExpCompiler::kNoRegister; 3841 int body_start_reg = RegExpCompiler::kNoRegister;
3762 Interval capture_registers = body->CaptureRegisters(); 3842 Interval capture_registers = body->CaptureRegisters();
3763 bool needs_capture_clearing = !capture_registers.is_empty(); 3843 bool needs_capture_clearing = !capture_registers.is_empty();
3764 if (body_can_be_empty) { 3844 if (body_can_be_empty) {
3765 body_start_reg = compiler->AllocateRegister(); 3845 body_start_reg = compiler->AllocateRegister();
3766 } else if (FLAG_regexp_optimization && !needs_capture_clearing) { 3846 } else if (FLAG_regexp_optimization && !needs_capture_clearing) {
3767 // Only unroll if there are no captures and the body can't be 3847 // Only unroll if there are no captures and the body can't be
3768 // empty. 3848 // empty.
3769 if (min > 0 && min <= kMaxUnrolledMinMatches) { 3849 {
3770 int new_max = (max == kInfinity) ? max : max - min; 3850 RegExpExpansionLimiter limiter(
3771 // Recurse once to get the loop or optional matches after the fixed ones. 3851 compiler, min + ((max != min) ? 1 : 0));
3772 RegExpNode* answer = ToNode( 3852 if (min > 0 && min <= kMaxUnrolledMinMatches && limiter.ok_to_expand()) {
3773 0, new_max, is_greedy, body, compiler, on_success, true); 3853 int new_max = (max == kInfinity) ? max : max - min;
3774 // Unroll the forced matches from 0 to min. This can cause chains of 3854 // Recurse once to get the loop or optional matches after the fixed
3775 // TextNodes (which the parser does not generate). These should be 3855 // ones.
3776 // combined if it turns out they hinder good code generation. 3856 RegExpNode* answer = ToNode(
3777 for (int i = 0; i < min; i++) { 3857 0, new_max, is_greedy, body, compiler, on_success, true);
3778 answer = body->ToNode(compiler, answer); 3858 // Unroll the forced matches from 0 to min. This can cause chains of
3859 // TextNodes (which the parser does not generate). These should be
3860 // combined if it turns out they hinder good code generation.
3861 for (int i = 0; i < min; i++) {
3862 answer = body->ToNode(compiler, answer);
3863 }
3864 return answer;
3779 } 3865 }
3780 return answer;
3781 } 3866 }
3782 if (max <= kMaxUnrolledMaxMatches) { 3867 if (max <= kMaxUnrolledMaxMatches && min == 0) {
3783 ASSERT(min == 0); 3868 ASSERT(max > 0); // Due to the 'if' above.
3784 // Unroll the optional matches up to max. 3869 RegExpExpansionLimiter limiter(compiler, max);
3785 RegExpNode* answer = on_success; 3870 if (limiter.ok_to_expand()) {
3786 for (int i = 0; i < max; i++) { 3871 // Unroll the optional matches up to max.
3787 ChoiceNode* alternation = new ChoiceNode(2); 3872 RegExpNode* answer = on_success;
3788 if (is_greedy) { 3873 for (int i = 0; i < max; i++) {
3789 alternation->AddAlternative(GuardedAlternative(body->ToNode(compiler, 3874 ChoiceNode* alternation = new ChoiceNode(2);
3790 answer))); 3875 if (is_greedy) {
3791 alternation->AddAlternative(GuardedAlternative(on_success)); 3876 alternation->AddAlternative(
3792 } else { 3877 GuardedAlternative(body->ToNode(compiler, answer)));
3793 alternation->AddAlternative(GuardedAlternative(on_success)); 3878 alternation->AddAlternative(GuardedAlternative(on_success));
3794 alternation->AddAlternative(GuardedAlternative(body->ToNode(compiler, 3879 } else {
3795 answer))); 3880 alternation->AddAlternative(GuardedAlternative(on_success));
3881 alternation->AddAlternative(
3882 GuardedAlternative(body->ToNode(compiler, answer)));
3883 }
3884 answer = alternation;
3885 if (not_at_start) alternation->set_not_at_start();
3796 } 3886 }
3797 answer = alternation; 3887 return answer;
3798 if (not_at_start) alternation->set_not_at_start();
3799 } 3888 }
3800 return answer;
3801 } 3889 }
3802 } 3890 }
3803 bool has_min = min > 0; 3891 bool has_min = min > 0;
3804 bool has_max = max < RegExpTree::kInfinity; 3892 bool has_max = max < RegExpTree::kInfinity;
3805 bool needs_counter = has_min || has_max; 3893 bool needs_counter = has_min || has_max;
3806 int reg_ctr = needs_counter 3894 int reg_ctr = needs_counter
3807 ? compiler->AllocateRegister() 3895 ? compiler->AllocateRegister()
3808 : RegExpCompiler::kNoRegister; 3896 : RegExpCompiler::kNoRegister;
3809 LoopChoiceNode* center = new LoopChoiceNode(body->min_match() == 0); 3897 LoopChoiceNode* center = new LoopChoiceNode(body->min_match() == 0);
3810 if (not_at_start) center->set_not_at_start(); 3898 if (not_at_start) center->set_not_at_start();
(...skipping 305 matching lines...) Expand 10 before | Expand all | Expand 10 after
4116 table.AddRange(base->at(i), CharacterRangeSplitter::kInBase); 4204 table.AddRange(base->at(i), CharacterRangeSplitter::kInBase);
4117 for (int i = 0; i < overlay.length(); i += 2) { 4205 for (int i = 0; i < overlay.length(); i += 2) {
4118 table.AddRange(CharacterRange(overlay[i], overlay[i+1]), 4206 table.AddRange(CharacterRange(overlay[i], overlay[i+1]),
4119 CharacterRangeSplitter::kInOverlay); 4207 CharacterRangeSplitter::kInOverlay);
4120 } 4208 }
4121 CharacterRangeSplitter callback(included, excluded); 4209 CharacterRangeSplitter callback(included, excluded);
4122 table.ForEach(&callback); 4210 table.ForEach(&callback);
4123 } 4211 }
4124 4212
4125 4213
4126 static void AddUncanonicals(Isolate* isolate,
4127 ZoneList<CharacterRange>* ranges,
4128 int bottom,
4129 int top);
4130
4131
4132 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges, 4214 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges,
4133 bool is_ascii) { 4215 bool is_ascii) {
4134 Isolate* isolate = Isolate::Current(); 4216 Isolate* isolate = Isolate::Current();
4135 uc16 bottom = from(); 4217 uc16 bottom = from();
4136 uc16 top = to(); 4218 uc16 top = to();
4137 if (is_ascii) { 4219 if (is_ascii) {
4138 if (bottom > String::kMaxAsciiCharCode) return; 4220 if (bottom > String::kMaxAsciiCharCode) return;
4139 if (top > String::kMaxAsciiCharCode) top = String::kMaxAsciiCharCode; 4221 if (top > String::kMaxAsciiCharCode) top = String::kMaxAsciiCharCode;
4140 } 4222 }
4141 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 4223 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
(...skipping 140 matching lines...) Expand 10 before | Expand all | Expand 10 after
4282 result.SetElementsInSecondSet(); 4364 result.SetElementsInSecondSet();
4283 } else if (j < range->length()) { 4365 } else if (j < range->length()) {
4284 // Argument range contains something not in word range. 4366 // Argument range contains something not in word range.
4285 result.SetElementsInFirstSet(); 4367 result.SetElementsInFirstSet();
4286 } 4368 }
4287 4369
4288 return result; 4370 return result;
4289 } 4371 }
4290 4372
4291 4373
4292 static void AddUncanonicals(Isolate* isolate,
4293 ZoneList<CharacterRange>* ranges,
4294 int bottom,
4295 int top) {
4296 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
4297 // Zones with no case mappings. There is a DEBUG-mode loop to assert that
4298 // this table is correct.
4299 // 0x0600 - 0x0fff
4300 // 0x1100 - 0x1cff
4301 // 0x2000 - 0x20ff
4302 // 0x2200 - 0x23ff
4303 // 0x2500 - 0x2bff
4304 // 0x2e00 - 0xa5ff
4305 // 0xa800 - 0xfaff
4306 // 0xfc00 - 0xfeff
4307 const int boundary_count = 18;
4308 int boundaries[] = {
4309 0x600, 0x1000, 0x1100, 0x1d00, 0x2000, 0x2100, 0x2200, 0x2400, 0x2500,
4310 0x2c00, 0x2e00, 0xa600, 0xa800, 0xfb00, 0xfc00, 0xff00};
4311
4312 // Special ASCII rule from spec can save us some work here.
4313 if (bottom == 0x80 && top == 0xffff) return;
4314
4315 if (top <= boundaries[0]) {
4316 CharacterRange range(bottom, top);
4317 range.AddCaseEquivalents(ranges, false);
4318 return;
4319 }
4320
4321 // Split up very large ranges. This helps remove ranges where there are no
4322 // case mappings.
4323 for (int i = 0; i < boundary_count; i++) {
4324 if (bottom < boundaries[i] && top >= boundaries[i]) {
4325 AddUncanonicals(isolate, ranges, bottom, boundaries[i] - 1);
4326 AddUncanonicals(isolate, ranges, boundaries[i], top);
4327 return;
4328 }
4329 }
4330
4331 // If we are completely in a zone with no case mappings then we are done.
4332 for (int i = 0; i < boundary_count; i += 2) {
4333 if (bottom >= boundaries[i] && top < boundaries[i + 1]) {
4334 #ifdef DEBUG
4335 for (int j = bottom; j <= top; j++) {
4336 unsigned current_char = j;
4337 int length = isolate->jsregexp_uncanonicalize()->get(current_char,
4338 '\0', chars);
4339 for (int k = 0; k < length; k++) {
4340 ASSERT(chars[k] == current_char);
4341 }
4342 }
4343 #endif
4344 return;
4345 }
4346 }
4347
4348 // Step through the range finding equivalent characters.
4349 ZoneList<unibrow::uchar> *characters = new ZoneList<unibrow::uchar>(100);
4350 for (int i = bottom; i <= top; i++) {
4351 int length = isolate->jsregexp_uncanonicalize()->get(i, '\0', chars);
4352 for (int j = 0; j < length; j++) {
4353 uc32 chr = chars[j];
4354 if (chr != i && (chr < bottom || chr > top)) {
4355 characters->Add(chr);
4356 }
4357 }
4358 }
4359
4360 // Step through the equivalent characters finding simple ranges and
4361 // adding ranges to the character class.
4362 if (characters->length() > 0) {
4363 int new_from = characters->at(0);
4364 int new_to = new_from;
4365 for (int i = 1; i < characters->length(); i++) {
4366 int chr = characters->at(i);
4367 if (chr == new_to + 1) {
4368 new_to++;
4369 } else {
4370 if (new_to == new_from) {
4371 ranges->Add(CharacterRange::Singleton(new_from));
4372 } else {
4373 ranges->Add(CharacterRange(new_from, new_to));
4374 }
4375 new_from = new_to = chr;
4376 }
4377 }
4378 if (new_to == new_from) {
4379 ranges->Add(CharacterRange::Singleton(new_from));
4380 } else {
4381 ranges->Add(CharacterRange(new_from, new_to));
4382 }
4383 }
4384 }
4385
4386
4387 ZoneList<CharacterRange>* CharacterSet::ranges() { 4374 ZoneList<CharacterRange>* CharacterSet::ranges() {
4388 if (ranges_ == NULL) { 4375 if (ranges_ == NULL) {
4389 ranges_ = new ZoneList<CharacterRange>(2); 4376 ranges_ = new ZoneList<CharacterRange>(2);
4390 CharacterRange::AddClassEscape(standard_set_type_, ranges_); 4377 CharacterRange::AddClassEscape(standard_set_type_, ranges_);
4391 } 4378 }
4392 return ranges_; 4379 return ranges_;
4393 } 4380 }
4394 4381
4395 4382
4396 // Move a number of elements in a zonelist to another position 4383 // Move a number of elements in a zonelist to another position
(...skipping 979 matching lines...) Expand 10 before | Expand all | Expand 10 after
5376 } 5363 }
5377 5364
5378 return compiler.Assemble(&macro_assembler, 5365 return compiler.Assemble(&macro_assembler,
5379 node, 5366 node,
5380 data->capture_count, 5367 data->capture_count,
5381 pattern); 5368 pattern);
5382 } 5369 }
5383 5370
5384 5371
5385 }} // namespace v8::internal 5372 }} // namespace v8::internal
OLDNEW
« no previous file with comments | « src/jsregexp.h ('k') | src/lithium.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698