Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(41)

Side by Side Diff: src/jsregexp.cc

Issue 799403003: Limit code size generated for very large regexps (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: Add assert to check constant in test matches implementation Created 6 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/jsregexp.h ('k') | test/cctest/test-heap.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/v8.h" 5 #include "src/v8.h"
6 6
7 #include "src/ast.h" 7 #include "src/ast.h"
8 #include "src/base/platform/platform.h" 8 #include "src/base/platform/platform.h"
9 #include "src/compilation-cache.h" 9 #include "src/compilation-cache.h"
10 #include "src/compiler.h" 10 #include "src/compiler.h"
(...skipping 1009 matching lines...) Expand 10 before | Expand all | Expand 10 after
1020 1020
1021 static const int kMaxRecursion = 100; 1021 static const int kMaxRecursion = 100;
1022 inline int recursion_depth() { return recursion_depth_; } 1022 inline int recursion_depth() { return recursion_depth_; }
1023 inline void IncrementRecursionDepth() { recursion_depth_++; } 1023 inline void IncrementRecursionDepth() { recursion_depth_++; }
1024 inline void DecrementRecursionDepth() { recursion_depth_--; } 1024 inline void DecrementRecursionDepth() { recursion_depth_--; }
1025 1025
1026 void SetRegExpTooBig() { reg_exp_too_big_ = true; } 1026 void SetRegExpTooBig() { reg_exp_too_big_ = true; }
1027 1027
1028 inline bool ignore_case() { return ignore_case_; } 1028 inline bool ignore_case() { return ignore_case_; }
1029 inline bool one_byte() { return one_byte_; } 1029 inline bool one_byte() { return one_byte_; }
1030 inline bool optimize() { return optimize_; }
1031 inline void set_optimize(bool value) { optimize_ = value; }
1030 FrequencyCollator* frequency_collator() { return &frequency_collator_; } 1032 FrequencyCollator* frequency_collator() { return &frequency_collator_; }
1031 1033
1032 int current_expansion_factor() { return current_expansion_factor_; } 1034 int current_expansion_factor() { return current_expansion_factor_; }
1033 void set_current_expansion_factor(int value) { 1035 void set_current_expansion_factor(int value) {
1034 current_expansion_factor_ = value; 1036 current_expansion_factor_ = value;
1035 } 1037 }
1036 1038
1037 Zone* zone() const { return zone_; } 1039 Zone* zone() const { return zone_; }
1038 1040
1039 static const int kNoRegister = -1; 1041 static const int kNoRegister = -1;
1040 1042
1041 private: 1043 private:
1042 EndNode* accept_; 1044 EndNode* accept_;
1043 int next_register_; 1045 int next_register_;
1044 List<RegExpNode*>* work_list_; 1046 List<RegExpNode*>* work_list_;
1045 int recursion_depth_; 1047 int recursion_depth_;
1046 RegExpMacroAssembler* macro_assembler_; 1048 RegExpMacroAssembler* macro_assembler_;
1047 bool ignore_case_; 1049 bool ignore_case_;
1048 bool one_byte_; 1050 bool one_byte_;
1049 bool reg_exp_too_big_; 1051 bool reg_exp_too_big_;
1052 bool optimize_;
1050 int current_expansion_factor_; 1053 int current_expansion_factor_;
1051 FrequencyCollator frequency_collator_; 1054 FrequencyCollator frequency_collator_;
1052 Zone* zone_; 1055 Zone* zone_;
1053 }; 1056 };
1054 1057
1055 1058
1056 class RecursionCheck { 1059 class RecursionCheck {
1057 public: 1060 public:
1058 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) { 1061 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) {
1059 compiler->IncrementRecursionDepth(); 1062 compiler->IncrementRecursionDepth();
(...skipping 12 matching lines...) Expand all
1072 // Attempts to compile the regexp using an Irregexp code generator. Returns 1075 // Attempts to compile the regexp using an Irregexp code generator. Returns
1073 // a fixed array or a null handle depending on whether it succeeded. 1076 // a fixed array or a null handle depending on whether it succeeded.
1074 RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case, 1077 RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case,
1075 bool one_byte, Zone* zone) 1078 bool one_byte, Zone* zone)
1076 : next_register_(2 * (capture_count + 1)), 1079 : next_register_(2 * (capture_count + 1)),
1077 work_list_(NULL), 1080 work_list_(NULL),
1078 recursion_depth_(0), 1081 recursion_depth_(0),
1079 ignore_case_(ignore_case), 1082 ignore_case_(ignore_case),
1080 one_byte_(one_byte), 1083 one_byte_(one_byte),
1081 reg_exp_too_big_(false), 1084 reg_exp_too_big_(false),
1085 optimize_(FLAG_regexp_optimization),
1082 current_expansion_factor_(1), 1086 current_expansion_factor_(1),
1083 frequency_collator_(), 1087 frequency_collator_(),
1084 zone_(zone) { 1088 zone_(zone) {
1085 accept_ = new(zone) EndNode(EndNode::ACCEPT, zone); 1089 accept_ = new(zone) EndNode(EndNode::ACCEPT, zone);
1086 DCHECK(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister); 1090 DCHECK(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister);
1087 } 1091 }
1088 1092
1089 1093
1090 RegExpEngine::CompilationResult RegExpCompiler::Assemble( 1094 RegExpEngine::CompilationResult RegExpCompiler::Assemble(
1091 RegExpMacroAssembler* macro_assembler, 1095 RegExpMacroAssembler* macro_assembler,
1092 RegExpNode* start, 1096 RegExpNode* start,
1093 int capture_count, 1097 int capture_count,
1094 Handle<String> pattern) { 1098 Handle<String> pattern) {
1095 Heap* heap = pattern->GetHeap(); 1099 Heap* heap = pattern->GetHeap();
1096 1100
1097 bool use_slow_safe_regexp_compiler = false;
1098 if (heap->total_regexp_code_generated() >
1099 RegExpImpl::kRegWxpCompiledLimit &&
1100 heap->isolate()->memory_allocator()->SizeExecutable() >
1101 RegExpImpl::kRegExpExecutableMemoryLimit) {
1102 use_slow_safe_regexp_compiler = true;
1103 }
1104
1105 macro_assembler->set_slow_safe(use_slow_safe_regexp_compiler);
1106
1107 #ifdef DEBUG 1101 #ifdef DEBUG
1108 if (FLAG_trace_regexp_assembler) 1102 if (FLAG_trace_regexp_assembler)
1109 macro_assembler_ = new RegExpMacroAssemblerTracer(macro_assembler); 1103 macro_assembler_ = new RegExpMacroAssemblerTracer(macro_assembler);
1110 else 1104 else
1111 #endif 1105 #endif
1112 macro_assembler_ = macro_assembler; 1106 macro_assembler_ = macro_assembler;
1113 1107
1114 List <RegExpNode*> work_list(0); 1108 List <RegExpNode*> work_list(0);
1115 work_list_ = &work_list; 1109 work_list_ = &work_list;
1116 Label fail; 1110 Label fail;
(...skipping 1133 matching lines...) Expand 10 before | Expand all | Expand 10 after
2250 return DONE; 2244 return DONE;
2251 } 2245 }
2252 // Generate generic version of the node and bind the label for later use. 2246 // Generate generic version of the node and bind the label for later use.
2253 macro_assembler->Bind(&label_); 2247 macro_assembler->Bind(&label_);
2254 return CONTINUE; 2248 return CONTINUE;
2255 } 2249 }
2256 2250
2257 // We are being asked to make a non-generic version. Keep track of how many 2251 // We are being asked to make a non-generic version. Keep track of how many
2258 // non-generic versions we generate so as not to overdo it. 2252 // non-generic versions we generate so as not to overdo it.
2259 trace_count_++; 2253 trace_count_++;
2260 if (FLAG_regexp_optimization && 2254 if (compiler->optimize() && trace_count_ < kMaxCopiesCodeGenerated &&
2261 trace_count_ < kMaxCopiesCodeGenerated &&
2262 compiler->recursion_depth() <= RegExpCompiler::kMaxRecursion) { 2255 compiler->recursion_depth() <= RegExpCompiler::kMaxRecursion) {
2263 return CONTINUE; 2256 return CONTINUE;
2264 } 2257 }
2265 2258
2266 // If we get here code has been generated for this node too many times or 2259 // If we get here code has been generated for this node too many times or
2267 // recursion is too deep. Time to switch to a generic version. The code for 2260 // recursion is too deep. Time to switch to a generic version. The code for
2268 // generic versions above can handle deep recursion properly. 2261 // generic versions above can handle deep recursion properly.
2269 trace->Flush(compiler, this); 2262 trace->Flush(compiler, this);
2270 return DONE; 2263 return DONE;
2271 } 2264 }
(...skipping 1858 matching lines...) Expand 10 before | Expand all | Expand 10 after
4130 if (preload->preload_has_checked_bounds_) { 4123 if (preload->preload_has_checked_bounds_) {
4131 new_trace.set_bound_checked_up_to(preload->preload_characters_); 4124 new_trace.set_bound_checked_up_to(preload->preload_characters_);
4132 } 4125 }
4133 new_trace.quick_check_performed()->Clear(); 4126 new_trace.quick_check_performed()->Clear();
4134 if (not_at_start_) new_trace.set_at_start(Trace::FALSE_VALUE); 4127 if (not_at_start_) new_trace.set_at_start(Trace::FALSE_VALUE);
4135 if (!is_last) { 4128 if (!is_last) {
4136 new_trace.set_backtrack(&alt_gen->after); 4129 new_trace.set_backtrack(&alt_gen->after);
4137 } 4130 }
4138 alt_gen->expects_preload = preload->preload_is_current_; 4131 alt_gen->expects_preload = preload->preload_is_current_;
4139 bool generate_full_check_inline = false; 4132 bool generate_full_check_inline = false;
4140 if (FLAG_regexp_optimization && 4133 if (compiler->optimize() &&
4141 try_to_emit_quick_check_for_alternative(i == 0) && 4134 try_to_emit_quick_check_for_alternative(i == 0) &&
4142 alternative.node()->EmitQuickCheck(compiler, 4135 alternative.node()->EmitQuickCheck(
4143 trace, 4136 compiler, trace, &new_trace, preload->preload_has_checked_bounds_,
4144 &new_trace, 4137 &alt_gen->possible_success, &alt_gen->quick_check_details,
4145 preload->preload_has_checked_bounds_, 4138 fall_through_on_failure)) {
4146 &alt_gen->possible_success,
4147 &alt_gen->quick_check_details,
4148 fall_through_on_failure)) {
4149 // Quick check was generated for this choice. 4139 // Quick check was generated for this choice.
4150 preload->preload_is_current_ = true; 4140 preload->preload_is_current_ = true;
4151 preload->preload_has_checked_bounds_ = true; 4141 preload->preload_has_checked_bounds_ = true;
4152 // If we generated the quick check to fall through on possible success, 4142 // If we generated the quick check to fall through on possible success,
4153 // we now need to generate the full check inline. 4143 // we now need to generate the full check inline.
4154 if (!fall_through_on_failure) { 4144 if (!fall_through_on_failure) {
4155 macro_assembler->Bind(&alt_gen->possible_success); 4145 macro_assembler->Bind(&alt_gen->possible_success);
4156 new_trace.set_quick_check_performed(&alt_gen->quick_check_details); 4146 new_trace.set_quick_check_performed(&alt_gen->quick_check_details);
4157 new_trace.set_characters_preloaded(preload->preload_characters_); 4147 new_trace.set_characters_preloaded(preload->preload_characters_);
4158 new_trace.set_bound_checked_up_to(preload->preload_characters_); 4148 new_trace.set_bound_checked_up_to(preload->preload_characters_);
(...skipping 777 matching lines...) Expand 10 before | Expand all | Expand 10 after
4936 static const int kMaxUnrolledMaxMatches = 3; // Unroll (foo)? and (foo){x,3} 4926 static const int kMaxUnrolledMaxMatches = 3; // Unroll (foo)? and (foo){x,3}
4937 if (max == 0) return on_success; // This can happen due to recursion. 4927 if (max == 0) return on_success; // This can happen due to recursion.
4938 bool body_can_be_empty = (body->min_match() == 0); 4928 bool body_can_be_empty = (body->min_match() == 0);
4939 int body_start_reg = RegExpCompiler::kNoRegister; 4929 int body_start_reg = RegExpCompiler::kNoRegister;
4940 Interval capture_registers = body->CaptureRegisters(); 4930 Interval capture_registers = body->CaptureRegisters();
4941 bool needs_capture_clearing = !capture_registers.is_empty(); 4931 bool needs_capture_clearing = !capture_registers.is_empty();
4942 Zone* zone = compiler->zone(); 4932 Zone* zone = compiler->zone();
4943 4933
4944 if (body_can_be_empty) { 4934 if (body_can_be_empty) {
4945 body_start_reg = compiler->AllocateRegister(); 4935 body_start_reg = compiler->AllocateRegister();
4946 } else if (FLAG_regexp_optimization && !needs_capture_clearing) { 4936 } else if (compiler->optimize() && !needs_capture_clearing) {
4947 // Only unroll if there are no captures and the body can't be 4937 // Only unroll if there are no captures and the body can't be
4948 // empty. 4938 // empty.
4949 { 4939 {
4950 RegExpExpansionLimiter limiter( 4940 RegExpExpansionLimiter limiter(
4951 compiler, min + ((max != min) ? 1 : 0)); 4941 compiler, min + ((max != min) ? 1 : 0));
4952 if (min > 0 && min <= kMaxUnrolledMinMatches && limiter.ok_to_expand()) { 4942 if (min > 0 && min <= kMaxUnrolledMinMatches && limiter.ok_to_expand()) {
4953 int new_max = (max == kInfinity) ? max : max - min; 4943 int new_max = (max == kInfinity) ? max : max - min;
4954 // Recurse once to get the loop or optional matches after the fixed 4944 // Recurse once to get the loop or optional matches after the fixed
4955 // ones. 4945 // ones.
4956 RegExpNode* answer = ToNode( 4946 RegExpNode* answer = ToNode(
(...skipping 1077 matching lines...) Expand 10 before | Expand all | Expand 10 after
6034 6024
6035 RegExpEngine::CompilationResult RegExpEngine::Compile( 6025 RegExpEngine::CompilationResult RegExpEngine::Compile(
6036 RegExpCompileData* data, bool ignore_case, bool is_global, 6026 RegExpCompileData* data, bool ignore_case, bool is_global,
6037 bool is_multiline, bool is_sticky, Handle<String> pattern, 6027 bool is_multiline, bool is_sticky, Handle<String> pattern,
6038 Handle<String> sample_subject, bool is_one_byte, Zone* zone) { 6028 Handle<String> sample_subject, bool is_one_byte, Zone* zone) {
6039 if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) { 6029 if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) {
6040 return IrregexpRegExpTooBig(zone->isolate()); 6030 return IrregexpRegExpTooBig(zone->isolate());
6041 } 6031 }
6042 RegExpCompiler compiler(data->capture_count, ignore_case, is_one_byte, zone); 6032 RegExpCompiler compiler(data->capture_count, ignore_case, is_one_byte, zone);
6043 6033
6034 compiler.set_optimize(!TooMuchRegExpCode(pattern));
6035
6044 // Sample some characters from the middle of the string. 6036 // Sample some characters from the middle of the string.
6045 static const int kSampleSize = 128; 6037 static const int kSampleSize = 128;
6046 6038
6047 sample_subject = String::Flatten(sample_subject); 6039 sample_subject = String::Flatten(sample_subject);
6048 int chars_sampled = 0; 6040 int chars_sampled = 0;
6049 int half_way = (sample_subject->length() - kSampleSize) / 2; 6041 int half_way = (sample_subject->length() - kSampleSize) / 2;
6050 for (int i = Max(0, half_way); 6042 for (int i = Max(0, half_way);
6051 i < sample_subject->length() && chars_sampled < kSampleSize; 6043 i < sample_subject->length() && chars_sampled < kSampleSize;
6052 i++, chars_sampled++) { 6044 i++, chars_sampled++) {
6053 compiler.frequency_collator()->CountCharacter(sample_subject->Get(i)); 6045 compiler.frequency_collator()->CountCharacter(sample_subject->Get(i));
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after
6136 #else 6128 #else
6137 #error "Unsupported architecture" 6129 #error "Unsupported architecture"
6138 #endif 6130 #endif
6139 6131
6140 #else // V8_INTERPRETED_REGEXP 6132 #else // V8_INTERPRETED_REGEXP
6141 // Interpreted regexp implementation. 6133 // Interpreted regexp implementation.
6142 EmbeddedVector<byte, 1024> codes; 6134 EmbeddedVector<byte, 1024> codes;
6143 RegExpMacroAssemblerIrregexp macro_assembler(codes, zone); 6135 RegExpMacroAssemblerIrregexp macro_assembler(codes, zone);
6144 #endif // V8_INTERPRETED_REGEXP 6136 #endif // V8_INTERPRETED_REGEXP
6145 6137
6138 macro_assembler.set_slow_safe(TooMuchRegExpCode(pattern));
6139
6146 // Inserted here, instead of in Assembler, because it depends on information 6140 // Inserted here, instead of in Assembler, because it depends on information
6147 // in the AST that isn't replicated in the Node structure. 6141 // in the AST that isn't replicated in the Node structure.
6148 static const int kMaxBacksearchLimit = 1024; 6142 static const int kMaxBacksearchLimit = 1024;
6149 if (is_end_anchored && 6143 if (is_end_anchored &&
6150 !is_start_anchored && 6144 !is_start_anchored &&
6151 max_length < kMaxBacksearchLimit) { 6145 max_length < kMaxBacksearchLimit) {
6152 macro_assembler.SetCurrentPositionFromEnd(max_length); 6146 macro_assembler.SetCurrentPositionFromEnd(max_length);
6153 } 6147 }
6154 6148
6155 if (is_global) { 6149 if (is_global) {
6156 macro_assembler.set_global_mode( 6150 macro_assembler.set_global_mode(
6157 (data->tree->min_match() > 0) 6151 (data->tree->min_match() > 0)
6158 ? RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK 6152 ? RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK
6159 : RegExpMacroAssembler::GLOBAL); 6153 : RegExpMacroAssembler::GLOBAL);
6160 } 6154 }
6161 6155
6162 return compiler.Assemble(&macro_assembler, 6156 return compiler.Assemble(&macro_assembler,
6163 node, 6157 node,
6164 data->capture_count, 6158 data->capture_count,
6165 pattern); 6159 pattern);
6166 } 6160 }
6167 6161
6168 6162
6163 bool RegExpEngine::TooMuchRegExpCode(Handle<String> pattern) {
6164 Heap* heap = pattern->GetHeap();
6165 bool too_much = pattern->length() > RegExpImpl::kRegExpTooLargeToOptimize;
6166 if (heap->total_regexp_code_generated() > RegExpImpl::kRegExpCompiledLimit &&
6167 heap->isolate()->memory_allocator()->SizeExecutable() >
6168 RegExpImpl::kRegExpExecutableMemoryLimit) {
6169 too_much = true;
6170 }
6171 return too_much;
6172 }
6169 }} // namespace v8::internal 6173 }} // namespace v8::internal
OLDNEW
« no previous file with comments | « src/jsregexp.h ('k') | test/cctest/test-heap.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698