Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(114)

Side by Side Diff: src/jsregexp.cc

Issue 17409: Check for empty repetitions. (Closed)
Patch Set: Created 11 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 1204 matching lines...) Expand 10 before | Expand all | Expand 10 after
1215 EndNode* accept() { return accept_; } 1215 EndNode* accept() { return accept_; }
1216 1216
1217 static const int kMaxRecursion = 100; 1217 static const int kMaxRecursion = 100;
1218 inline int recursion_depth() { return recursion_depth_; } 1218 inline int recursion_depth() { return recursion_depth_; }
1219 inline void IncrementRecursionDepth() { recursion_depth_++; } 1219 inline void IncrementRecursionDepth() { recursion_depth_++; }
1220 inline void DecrementRecursionDepth() { recursion_depth_--; } 1220 inline void DecrementRecursionDepth() { recursion_depth_--; }
1221 1221
1222 inline bool ignore_case() { return ignore_case_; } 1222 inline bool ignore_case() { return ignore_case_; }
1223 inline bool ascii() { return ascii_; } 1223 inline bool ascii() { return ascii_; }
1224 1224
1225 static const int kNoRegister = -1;
1225 private: 1226 private:
1226 EndNode* accept_; 1227 EndNode* accept_;
1227 int next_register_; 1228 int next_register_;
1228 List<RegExpNode*>* work_list_; 1229 List<RegExpNode*>* work_list_;
1229 int recursion_depth_; 1230 int recursion_depth_;
1230 RegExpMacroAssembler* macro_assembler_; 1231 RegExpMacroAssembler* macro_assembler_;
1231 bool ignore_case_; 1232 bool ignore_case_;
1232 bool ascii_; 1233 bool ascii_;
1233 }; 1234 };
1234 1235
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after
1306 bool GenerationVariant::mentions_reg(int reg) { 1307 bool GenerationVariant::mentions_reg(int reg) {
1307 for (DeferredAction* action = actions_; 1308 for (DeferredAction* action = actions_;
1308 action != NULL; 1309 action != NULL;
1309 action = action->next()) { 1310 action = action->next()) {
1310 if (reg == action->reg()) return true; 1311 if (reg == action->reg()) return true;
1311 } 1312 }
1312 return false; 1313 return false;
1313 } 1314 }
1314 1315
1315 1316
1317 bool GenerationVariant::GetStoredPosition(int reg, int* cp_offset) {
1318 ASSERT_EQ(0, *cp_offset);
1319 for (DeferredAction* action = actions_;
1320 action != NULL;
1321 action = action->next()) {
1322 if (reg == action->reg()) {
1323 if (action->type() == ActionNode::STORE_POSITION) {
1324 *cp_offset = static_cast<DeferredCapture*>(action)->cp_offset();
1325 return true;
1326 } else {
1327 return false;
1328 }
1329 }
1330 }
1331 return false;
1332 }
1333
1334
1316 int GenerationVariant::FindAffectedRegisters(OutSet* affected_registers) { 1335 int GenerationVariant::FindAffectedRegisters(OutSet* affected_registers) {
1317 int max_register = -1; 1336 int max_register = RegExpCompiler::kNoRegister;
1318 for (DeferredAction* action = actions_; 1337 for (DeferredAction* action = actions_;
1319 action != NULL; 1338 action != NULL;
1320 action = action->next()) { 1339 action = action->next()) {
1321 affected_registers->Set(action->reg()); 1340 affected_registers->Set(action->reg());
1322 if (action->reg() > max_register) max_register = action->reg(); 1341 if (action->reg() > max_register) max_register = action->reg();
1323 } 1342 }
1324 return max_register; 1343 return max_register;
1325 } 1344 }
1326 1345
1327 1346
(...skipping 241 matching lines...) Expand 10 before | Expand all | Expand 10 after
1569 ActionNode* ActionNode::PositiveSubmatchSuccess(int stack_reg, 1588 ActionNode* ActionNode::PositiveSubmatchSuccess(int stack_reg,
1570 int position_reg, 1589 int position_reg,
1571 RegExpNode* on_success) { 1590 RegExpNode* on_success) {
1572 ActionNode* result = new ActionNode(POSITIVE_SUBMATCH_SUCCESS, on_success); 1591 ActionNode* result = new ActionNode(POSITIVE_SUBMATCH_SUCCESS, on_success);
1573 result->data_.u_submatch.stack_pointer_register = stack_reg; 1592 result->data_.u_submatch.stack_pointer_register = stack_reg;
1574 result->data_.u_submatch.current_position_register = position_reg; 1593 result->data_.u_submatch.current_position_register = position_reg;
1575 return result; 1594 return result;
1576 } 1595 }
1577 1596
1578 1597
1598 ActionNode* ActionNode::EmptyMatchCheck(int start_register,
1599 int repetition_register,
1600 int repetition_limit,
1601 RegExpNode* on_success) {
1602 ActionNode* result = new ActionNode(EMPTY_MATCH_CHECK, on_success);
1603 result->data_.u_empty_match_check.start_register = start_register;
1604 result->data_.u_empty_match_check.repetition_register = repetition_register;
1605 result->data_.u_empty_match_check.repetition_limit = repetition_limit;
1606 return result;
1607 }
1608
1609
1579 #define DEFINE_ACCEPT(Type) \ 1610 #define DEFINE_ACCEPT(Type) \
1580 void Type##Node::Accept(NodeVisitor* visitor) { \ 1611 void Type##Node::Accept(NodeVisitor* visitor) { \
1581 visitor->Visit##Type(this); \ 1612 visitor->Visit##Type(this); \
1582 } 1613 }
1583 FOR_EACH_NODE_TYPE(DEFINE_ACCEPT) 1614 FOR_EACH_NODE_TYPE(DEFINE_ACCEPT)
1584 #undef DEFINE_ACCEPT 1615 #undef DEFINE_ACCEPT
1585 1616
1586 1617
1587 void LoopChoiceNode::Accept(NodeVisitor* visitor) { 1618 void LoopChoiceNode::Accept(NodeVisitor* visitor) {
1588 visitor->VisitLoopChoice(this); 1619 visitor->VisitLoopChoice(this);
(...skipping 1371 matching lines...) Expand 10 before | Expand all | Expand 10 after
2960 new_variant.add_action(&new_set); 2991 new_variant.add_action(&new_set);
2961 return on_success()->Emit(compiler, &new_variant); 2992 return on_success()->Emit(compiler, &new_variant);
2962 } 2993 }
2963 case BEGIN_SUBMATCH: 2994 case BEGIN_SUBMATCH:
2964 if (!variant->is_trivial()) return variant->Flush(compiler, this); 2995 if (!variant->is_trivial()) return variant->Flush(compiler, this);
2965 assembler->WriteCurrentPositionToRegister( 2996 assembler->WriteCurrentPositionToRegister(
2966 data_.u_submatch.current_position_register, 0); 2997 data_.u_submatch.current_position_register, 0);
2967 assembler->WriteStackPointerToRegister( 2998 assembler->WriteStackPointerToRegister(
2968 data_.u_submatch.stack_pointer_register); 2999 data_.u_submatch.stack_pointer_register);
2969 return on_success()->Emit(compiler, variant); 3000 return on_success()->Emit(compiler, variant);
3001 case EMPTY_MATCH_CHECK: {
3002 int start_pos_reg = data_.u_empty_match_check.start_register;
3003 int stored_pos = 0;
3004 int rep_reg = data_.u_empty_match_check.repetition_register;
3005 bool has_minimum = (rep_reg != RegExpCompiler::kNoRegister);
3006 if (!has_minimum &&
Erik Corry 2009/01/08 11:10:16 If we have a minimum but we know that we didn't ma
Christian Plesner Hansen 2009/01/08 12:40:49 Good point, fixed.
3007 variant->GetStoredPosition(start_pos_reg, &stored_pos)) {
3008 // If we know the position offset stored in the start register
3009 // and the current position offset we can just bactrack or
3010 // proceed immediately without checking.
3011 if (stored_pos == variant->cp_offset()) {
3012 assembler->GoTo(variant->backtrack());
3013 return true;
3014 } else {
3015 return on_success()->Emit(compiler, variant);
3016 }
3017 }
3018 if (!variant->is_trivial()) return variant->Flush(compiler, this);
3019 Label skip_empty_check;
3020 // If we have a minimum number of repetitions we check the current
3021 // number first and skip the empty check if it's not enough.
Erik Corry 2009/01/08 11:10:16 Ideally if we matched empty we should 'zoom' up to
Christian Plesner Hansen 2009/01/08 12:40:49 We should but I'm not quite sure how to do that.
3022 if (has_minimum) {
3023 int limit = data_.u_empty_match_check.repetition_limit;
3024 assembler->IfRegisterLT(rep_reg, limit, &skip_empty_check);
3025 }
3026 // If the match is empty we bail out, otherwise we fall through
3027 // to the on-success continuation.
3028 assembler->IfRegisterEqPos(data_.u_empty_match_check.start_register,
3029 variant->backtrack());
3030 assembler->Bind(&skip_empty_check);
3031 return on_success()->Emit(compiler, variant);
3032 }
2970 case POSITIVE_SUBMATCH_SUCCESS: 3033 case POSITIVE_SUBMATCH_SUCCESS:
2971 if (!variant->is_trivial()) return variant->Flush(compiler, this); 3034 if (!variant->is_trivial()) return variant->Flush(compiler, this);
2972 // TODO(erikcorry): Implement support. 3035 // TODO(erikcorry): Implement support.
2973 if (info()->follows_word_interest || 3036 if (info()->follows_word_interest ||
2974 info()->follows_newline_interest || 3037 info()->follows_newline_interest ||
2975 info()->follows_start_interest) { 3038 info()->follows_start_interest) {
2976 return false; 3039 return false;
2977 } 3040 }
2978 if (info()->at_end) { 3041 if (info()->at_end) {
2979 Label at_end; 3042 Label at_end;
(...skipping 299 matching lines...) Expand 10 before | Expand all | Expand 10 after
3279 stream()->Add("label=\"$%i:=$pos\", shape=octagon", 3342 stream()->Add("label=\"$%i:=$pos\", shape=octagon",
3280 that->data_.u_position_register.reg); 3343 that->data_.u_position_register.reg);
3281 break; 3344 break;
3282 case ActionNode::BEGIN_SUBMATCH: 3345 case ActionNode::BEGIN_SUBMATCH:
3283 stream()->Add("label=\"$%i:=$pos,begin\", shape=septagon", 3346 stream()->Add("label=\"$%i:=$pos,begin\", shape=septagon",
3284 that->data_.u_submatch.current_position_register); 3347 that->data_.u_submatch.current_position_register);
3285 break; 3348 break;
3286 case ActionNode::POSITIVE_SUBMATCH_SUCCESS: 3349 case ActionNode::POSITIVE_SUBMATCH_SUCCESS:
3287 stream()->Add("label=\"escape\", shape=septagon"); 3350 stream()->Add("label=\"escape\", shape=septagon");
3288 break; 3351 break;
3352 case ActionNode::EMPTY_MATCH_CHECK:
3353 stream()->Add("label=\"$%i=$pos?,$%i<%i?\", shape=septagon",
3354 that->data_.u_empty_match_check.start_register,
3355 that->data_.u_empty_match_check.repetition_register,
3356 that->data_.u_empty_match_check.repetition_limit);
3357 break;
3289 } 3358 }
3290 stream()->Add("];\n"); 3359 stream()->Add("];\n");
3291 PrintAttributes(that); 3360 PrintAttributes(that);
3292 RegExpNode* successor = that->on_success(); 3361 RegExpNode* successor = that->on_success();
3293 stream()->Add(" n%p -> n%p;\n", that, successor); 3362 stream()->Add(" n%p -> n%p;\n", that, successor);
3294 Visit(successor); 3363 Visit(successor);
3295 } 3364 }
3296 3365
3297 3366
3298 class DispatchTableDumper { 3367 class DispatchTableDumper {
(...skipping 205 matching lines...) Expand 10 before | Expand all | Expand 10 after
3504 // where max_match is zero the parser has removed the quantifier if min was 3573 // where max_match is zero the parser has removed the quantifier if min was
3505 // > 0 and removed the atom if min was 0. See AddQuantifierToAtom. 3574 // > 0 and removed the atom if min was 0. See AddQuantifierToAtom.
3506 3575
3507 // If we know that we cannot match zero length then things are a little 3576 // If we know that we cannot match zero length then things are a little
3508 // simpler since we don't need to make the special zero length match check 3577 // simpler since we don't need to make the special zero length match check
3509 // from step 2.1. If the min and max are small we can unroll a little in 3578 // from step 2.1. If the min and max are small we can unroll a little in
3510 // this case. 3579 // this case.
3511 static const int kMaxUnrolledMinMatches = 3; // Unroll (foo)+ and (foo){3,} 3580 static const int kMaxUnrolledMinMatches = 3; // Unroll (foo)+ and (foo){3,}
3512 static const int kMaxUnrolledMaxMatches = 3; // Unroll (foo)? and (foo){x,3} 3581 static const int kMaxUnrolledMaxMatches = 3; // Unroll (foo)? and (foo){x,3}
3513 if (max == 0) return on_success; // This can happen due to recursion. 3582 if (max == 0) return on_success; // This can happen due to recursion.
3514 if (body->min_match() > 0) { 3583 bool body_can_be_empty = (body->min_match() == 0);
3584 int body_start_reg = RegExpCompiler::kNoRegister;
3585 if (body_can_be_empty) {
3586 body_start_reg = compiler->AllocateRegister();
3587 } else {
3515 if (min > 0 && min <= kMaxUnrolledMinMatches) { 3588 if (min > 0 && min <= kMaxUnrolledMinMatches) {
3516 int new_max = (max == kInfinity) ? max : max - min; 3589 int new_max = (max == kInfinity) ? max : max - min;
3517 // Recurse once to get the loop or optional matches after the fixed ones. 3590 // Recurse once to get the loop or optional matches after the fixed ones.
3518 RegExpNode* answer = 3591 RegExpNode* answer =
3519 ToNode(0, new_max, is_greedy, body, compiler, on_success); 3592 ToNode(0, new_max, is_greedy, body, compiler, on_success);
3520 // Unroll the forced matches from 0 to min. This can cause chains of 3593 // Unroll the forced matches from 0 to min. This can cause chains of
3521 // TextNodes (which the parser does not generate). These should be 3594 // TextNodes (which the parser does not generate). These should be
3522 // combined if it turns out they hinder good code generation. 3595 // combined if it turns out they hinder good code generation.
3523 for (int i = 0; i < min; i++) { 3596 for (int i = 0; i < min; i++) {
3524 answer = body->ToNode(compiler, answer); 3597 answer = body->ToNode(compiler, answer);
(...skipping 16 matching lines...) Expand all
3541 answer))); 3614 answer)));
3542 } 3615 }
3543 answer = alternation; 3616 answer = alternation;
3544 } 3617 }
3545 return answer; 3618 return answer;
3546 } 3619 }
3547 } 3620 }
3548 bool has_min = min > 0; 3621 bool has_min = min > 0;
3549 bool has_max = max < RegExpTree::kInfinity; 3622 bool has_max = max < RegExpTree::kInfinity;
3550 bool needs_counter = has_min || has_max; 3623 bool needs_counter = has_min || has_max;
3551 int reg_ctr = needs_counter ? compiler->AllocateRegister() : -1; 3624 int reg_ctr = needs_counter
3625 ? compiler->AllocateRegister()
3626 : RegExpCompiler::kNoRegister;
3552 LoopChoiceNode* center = new LoopChoiceNode(body->min_match() == 0); 3627 LoopChoiceNode* center = new LoopChoiceNode(body->min_match() == 0);
3553 RegExpNode* loop_return = needs_counter 3628 RegExpNode* loop_return = needs_counter
3554 ? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center)) 3629 ? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center))
3555 : static_cast<RegExpNode*>(center); 3630 : static_cast<RegExpNode*>(center);
3631 if (body_can_be_empty) {
3632 // If the body can be empty we need to check if it was and then
3633 // backtrack.
3634 loop_return = ActionNode::EmptyMatchCheck(body_start_reg,
3635 reg_ctr,
3636 min,
3637 loop_return);
3638 }
3556 RegExpNode* body_node = body->ToNode(compiler, loop_return); 3639 RegExpNode* body_node = body->ToNode(compiler, loop_return);
3640 if (body_can_be_empty) {
3641 // If the body can be empty we need to store the start position
3642 // so we can bail out if it was empty.
3643 body_node = ActionNode::StorePosition(body_start_reg, body_node);
3644 }
3557 GuardedAlternative body_alt(body_node); 3645 GuardedAlternative body_alt(body_node);
3558 if (has_max) { 3646 if (has_max) {
3559 Guard* body_guard = new Guard(reg_ctr, Guard::LT, max); 3647 Guard* body_guard = new Guard(reg_ctr, Guard::LT, max);
3560 body_alt.AddGuard(body_guard); 3648 body_alt.AddGuard(body_guard);
3561 } 3649 }
3562 GuardedAlternative rest_alt(on_success); 3650 GuardedAlternative rest_alt(on_success);
3563 if (has_min) { 3651 if (has_min) {
3564 Guard* rest_guard = new Guard(reg_ctr, Guard::GEQ, min); 3652 Guard* rest_guard = new Guard(reg_ctr, Guard::GEQ, min);
3565 rest_alt.AddGuard(rest_guard); 3653 rest_alt.AddGuard(rest_guard);
3566 } 3654 }
(...skipping 820 matching lines...) Expand 10 before | Expand all | Expand 10 after
4387 EmbeddedVector<byte, 1024> codes; 4475 EmbeddedVector<byte, 1024> codes;
4388 RegExpMacroAssemblerIrregexp macro_assembler(codes); 4476 RegExpMacroAssemblerIrregexp macro_assembler(codes);
4389 return compiler.Assemble(&macro_assembler, 4477 return compiler.Assemble(&macro_assembler,
4390 node, 4478 node,
4391 data->capture_count, 4479 data->capture_count,
4392 pattern); 4480 pattern);
4393 } 4481 }
4394 4482
4395 4483
4396 }} // namespace v8::internal 4484 }} // namespace v8::internal
OLDNEW
« no previous file with comments | « src/jsregexp.h ('k') | src/regexp-macro-assembler.h » ('j') | src/regexp-macro-assembler.h » ('J')

Powered by Google App Engine
This is Rietveld 408576698