| OLD | NEW |
| 1 // Copyright (c) 2015, the Fletch project authors. Please see the AUTHORS file | 1 // Copyright (c) 2015, the Fletch project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE.md file. | 3 // BSD-style license that can be found in the LICENSE.md file. |
| 4 | 4 |
| 5 part of dart.core_patch; | 5 part of dart.core_patch; |
| 6 | 6 |
| 7 class MiniExpLabel { | 7 class MiniExpLabel { |
| 8 // Initially points to -1 to indicate the label is neither linked (used) nor | 8 // Initially points to -1 to indicate the label is neither linked (used) nor |
| 9 // bound (fixed to a location). When the label is linked, but not bound, it | 9 // bound (fixed to a location). When the label is linked, but not bound, it |
| 10 // has a negative value, determined by fixupLocation(l), that indicates the | 10 // has a negative value, determined by fixupLocation(l), that indicates the |
| (...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 90 | 90 |
| 91 // Byte codes. | 91 // Byte codes. |
| 92 const GOTO = 0; // label | 92 const GOTO = 0; // label |
| 93 const PUSH_REGISTER = 1; // reg | 93 const PUSH_REGISTER = 1; // reg |
| 94 const PUSH_BACKTRACK = 2; // const | 94 const PUSH_BACKTRACK = 2; // const |
| 95 const POP_REGISTER = 3; // reg | 95 const POP_REGISTER = 3; // reg |
| 96 const BACKTRACK_EQ = 4; // reg reg | 96 const BACKTRACK_EQ = 4; // reg reg |
| 97 const BACKTRACK_NE = 5; // reg reg | 97 const BACKTRACK_NE = 5; // reg reg |
| 98 const BACKTRACK_GT = 6; // reg reg | 98 const BACKTRACK_GT = 6; // reg reg |
| 99 const BACKTRACK_IF_NO_MATCH = 7; // constant-pool-offset | 99 const BACKTRACK_IF_NO_MATCH = 7; // constant-pool-offset |
| 100 const BACKTRACK_IF_IN_RANGE = 8; // from to | 100 const BACKTRACK_IF_PREVIOUS_NO_MATCH = 8; // constant-pool-offset |
| 101 const GOTO_IF_MATCH = 9; // charCode label | 101 const BACKTRACK_IF_IN_RANGE = 9; // from to |
| 102 const GOTO_IF_IN_RANGE = 10; // from to label | 102 const BACKTRACK_IF_PREVIOUS_IN_RANGE = 10; // from to |
| 103 const GOTO_EQ = 11; // reg reg label | 103 const GOTO_IF_MATCH = 11; // charCode label |
| 104 const GOTO_GE = 12; // reg reg label | 104 const GOTO_IF_PREVIOUS_MATCH = 12; // charCode label |
| 105 const GOTO_IF_WORD_CHARACTER = 13; // position-offset label | 105 const GOTO_IF_IN_RANGE = 13; // from to label |
| 106 const ADD_TO_REGISTER = 14; // reg const | 106 const GOTO_IF_PREVIOUS_IN_RANGE = 14; // from to label |
| 107 const COPY_REGISTER = 15; // dest-reg source-reg | 107 const GOTO_EQ = 15; // reg reg label |
| 108 const BACKTRACK_ON_BACK_REFERENCE = 16; // capture-reg | 108 const GOTO_GE = 16; // reg reg label |
| 109 const BACKTRACK = 17; | 109 const GOTO_IF_WORD_CHARACTER = 17; // position-offset label |
| 110 const SUCCEED = 18; | 110 const ADD_TO_REGISTER = 18; // reg const |
| 111 const FAIL = 19; | 111 const COPY_REGISTER = 19; // dest-reg source-reg |
| 112 const BACKTRACK_ON_BACK_REFERENCE = 20; // capture-reg |
| 113 const BACKTRACK_ON_BACK_REFERENCE_BACKWARDS = 21; // capture-reg |
| 114 const BACKTRACK = 22; |
| 115 const SUCCEED = 23; |
| 116 const FAIL = 24; |
| 112 | 117 |
| 113 // Format is name, number of register arguments, number of other arguments. | 118 // Format is name, number of register arguments, number of other arguments. |
| 114 const BYTE_CODE_NAMES = const [ | 119 const BYTE_CODE_NAMES = const [ |
| 115 "GOTO", 0, 1, | 120 "GOTO", 0, 1, |
| 116 "PUSH_REGISTER", 1, 0, | 121 "PUSH_REGISTER", 1, 0, |
| 117 "PUSH_BACKTRACK", 0, 1, | 122 "PUSH_BACKTRACK", 0, 1, |
| 118 "POP_REGISTER", 1, 0, | 123 "POP_REGISTER", 1, 0, |
| 119 "BACKTRACK_EQ", 2, 0, | 124 "BACKTRACK_EQ", 2, 0, |
| 120 "BACKTRACK_NE", 2, 0, | 125 "BACKTRACK_NE", 2, 0, |
| 121 "BACKTRACK_GT", 2, 0, | 126 "BACKTRACK_GT", 2, 0, |
| 122 "BACKTRACK_IF_NO_MATCH", 0, 1, | 127 "BACKTRACK_IF_NO_MATCH", 0, 1, |
| 128 "BACKTRACK_IF_PREVIOUS_NO_MATCH", 0, 1, |
| 123 "BACKTRACK_IF_IN_RANGE", 0, 2, | 129 "BACKTRACK_IF_IN_RANGE", 0, 2, |
| 130 "BACKTRACK_IF_PREVIOUS_IN_RANGE", 0, 2, |
| 124 "GOTO_IF_MATCH", 0, 2, | 131 "GOTO_IF_MATCH", 0, 2, |
| 132 "GOTO_IF_PREVIOUS_MATCH", 0, 2, |
| 125 "GOTO_IF_IN_RANGE", 0, 3, | 133 "GOTO_IF_IN_RANGE", 0, 3, |
| 134 "GOTO_IF_PREVIOUS_IN_RANGE", 0, 3, |
| 126 "GOTO_EQ", 2, 1, | 135 "GOTO_EQ", 2, 1, |
| 127 "GOTO_GE", 2, 1, | 136 "GOTO_GE", 2, 1, |
| 128 "GOTO_IF_WORD_CHARACTER", 0, 2, | 137 "GOTO_IF_WORD_CHARACTER", 0, 2, |
| 129 "ADD_TO_REGISTER", 1, 1, | 138 "ADD_TO_REGISTER", 1, 1, |
| 130 "COPY_REGISTER", 2, 0, | 139 "COPY_REGISTER", 2, 0, |
| 131 "BACKTRACK_ON_BACK_REFERENCE", 1, 1, | 140 "BACKTRACK_ON_BACK_REFERENCE", 1, 1, |
| 141 "BACKTRACK_ON_BACK_REFERENCE_BACKWARDS", 1, 1, |
| 132 "BACKTRACK", 0, 0, | 142 "BACKTRACK", 0, 0, |
| 133 "SUCCEED", 0, 0, | 143 "SUCCEED", 0, 0, |
| 134 "FAIL", 0, 0]; | 144 "FAIL", 0, 0]; |
| 135 | 145 |
| 136 const CHAR_CODE_NUL = 0; | 146 const CHAR_CODE_NUL = 0; |
| 137 const CHAR_CODE_BACKSPACE = 8; | 147 const CHAR_CODE_BACKSPACE = 8; |
| 138 const CHAR_CODE_TAB = 9; | 148 const CHAR_CODE_TAB = 9; |
| 139 const CHAR_CODE_NEWLINE = 10; | 149 const CHAR_CODE_NEWLINE = 10; |
| 140 const CHAR_CODE_VERTICAL_TAB = 11; | 150 const CHAR_CODE_VERTICAL_TAB = 11; |
| 141 const CHAR_CODE_FORM_FEED = 12; | 151 const CHAR_CODE_FORM_FEED = 12; |
| 142 const CHAR_CODE_CARRIAGE_RETURN = 13; | 152 const CHAR_CODE_CARRIAGE_RETURN = 13; |
| 143 const CHAR_CODE_SPACE = 32; | 153 const CHAR_CODE_SPACE = 32; |
| 144 const CHAR_CODE_BANG = 33; | 154 const CHAR_CODE_BANG = 33; |
| 145 const CHAR_CODE_ASTERISK = 42; | 155 const CHAR_CODE_ASTERISK = 42; |
| 146 const CHAR_CODE_PLUS = 43; | 156 const CHAR_CODE_PLUS = 43; |
| 147 const CHAR_CODE_COMMA = 44; | 157 const CHAR_CODE_COMMA = 44; |
| 148 const CHAR_CODE_DASH = 45; | 158 const CHAR_CODE_DASH = 45; |
| 149 const CHAR_CODE_0 = 48; | 159 const CHAR_CODE_0 = 48; |
| 150 const CHAR_CODE_7 = 55; | 160 const CHAR_CODE_7 = 55; |
| 151 const CHAR_CODE_9 = 57; | 161 const CHAR_CODE_9 = 57; |
| 152 const CHAR_CODE_COLON = 58; | 162 const CHAR_CODE_COLON = 58; |
| 163 const CHAR_CODE_LESS_THAN = 60; |
| 153 const CHAR_CODE_EQUALS = 61; | 164 const CHAR_CODE_EQUALS = 61; |
| 154 const CHAR_CODE_QUERY = 63; | 165 const CHAR_CODE_QUERY = 63; |
| 155 const CHAR_CODE_UPPER_A = 65; | 166 const CHAR_CODE_UPPER_A = 65; |
| 156 const CHAR_CODE_UPPER_B = 66; | 167 const CHAR_CODE_UPPER_B = 66; |
| 157 const CHAR_CODE_UPPER_D = 68; | 168 const CHAR_CODE_UPPER_D = 68; |
| 158 const CHAR_CODE_UPPER_F = 70; | 169 const CHAR_CODE_UPPER_F = 70; |
| 159 const CHAR_CODE_UPPER_S = 83; | 170 const CHAR_CODE_UPPER_S = 83; |
| 160 const CHAR_CODE_UPPER_W = 87; | 171 const CHAR_CODE_UPPER_W = 87; |
| 161 const CHAR_CODE_UPPER_Z = 90; | 172 const CHAR_CODE_UPPER_Z = 90; |
| 162 const CHAR_CODE_BACKSLASH = 92; | 173 const CHAR_CODE_BACKSLASH = 92; |
| (...skipping 30 matching lines...) Expand all Loading... |
| 193 class MiniExpCompiler { | 204 class MiniExpCompiler { |
| 194 final String pattern; | 205 final String pattern; |
| 195 final bool caseSensitive; | 206 final bool caseSensitive; |
| 196 final List<int> registers = new List<int>(); | 207 final List<int> registers = new List<int>(); |
| 197 int captureRegisterCount = 0; | 208 int captureRegisterCount = 0; |
| 198 int firstCaptureRegister; | 209 int firstCaptureRegister; |
| 199 final List<int> _codes = new List<int>(); | 210 final List<int> _codes = new List<int>(); |
| 200 final List<int> _extraConstants = new List<int>(); | 211 final List<int> _extraConstants = new List<int>(); |
| 201 final List<BackReference> _backReferences = new List<BackReference>(); | 212 final List<BackReference> _backReferences = new List<BackReference>(); |
| 202 MiniExpLabel _pendingGoto; | 213 MiniExpLabel _pendingGoto; |
| 214 bool stepBackwards = false; |
| 215 int get oneIfBackwards => stepBackwards ? 1 : 0; |
| 216 int get direction => stepBackwards ? -1 : 1; |
| 203 | 217 |
| 204 MiniExpCompiler(this.pattern, this.caseSensitive) { | 218 MiniExpCompiler(this.pattern, this.caseSensitive) { |
| 205 for (int i = 0; i < FIXED_REGISTERS; i++) { | 219 for (int i = 0; i < FIXED_REGISTERS; i++) { |
| 206 registers.add(i == NO_POSITION_REGISTER ? NO_POSITION : 0); | 220 registers.add(i == NO_POSITION_REGISTER ? NO_POSITION : 0); |
| 207 } | 221 } |
| 208 } | 222 } |
| 209 | 223 |
| 210 List<int> get codes { | 224 List<int> get codes { |
| 211 flushPendingGoto(); | 225 flushPendingGoto(); |
| 226 //disassemble(_codes); |
| 212 return _codes; | 227 return _codes; |
| 213 } | 228 } |
| 214 | 229 |
| 215 String get constantPool { | 230 String get constantPool { |
| 216 if (_extraConstants.isEmpty) { | 231 if (_extraConstants.isEmpty) { |
| 217 return pattern; | 232 return pattern; |
| 218 } else { | 233 } else { |
| 219 return pattern + new String.fromCharCodes(_extraConstants); | 234 return pattern + new String.fromCharCodes(_extraConstants); |
| 220 } | 235 } |
| 221 } | 236 } |
| (...skipping 150 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 372 void addToRegister(int reg, int offset) { | 387 void addToRegister(int reg, int offset) { |
| 373 _emit(ADD_TO_REGISTER, registerNumber(reg), offset); | 388 _emit(ADD_TO_REGISTER, registerNumber(reg), offset); |
| 374 } | 389 } |
| 375 | 390 |
| 376 void copyRegister(int destRegister, int sourceRegister) { | 391 void copyRegister(int destRegister, int sourceRegister) { |
| 377 _emit(COPY_REGISTER, registerNumber(destRegister), | 392 _emit(COPY_REGISTER, registerNumber(destRegister), |
| 378 registerNumber(sourceRegister)); | 393 registerNumber(sourceRegister)); |
| 379 } | 394 } |
| 380 | 395 |
| 381 void backtrackOnBackReferenceFail(int register, bool caseSensitive) { | 396 void backtrackOnBackReferenceFail(int register, bool caseSensitive) { |
| 382 _emit(BACKTRACK_ON_BACK_REFERENCE, | 397 _emit(BACKTRACK_ON_BACK_REFERENCE + oneIfBackwards, |
| 383 registerNumber(register), caseSensitive ? 1 : 0); | 398 registerNumber(register), caseSensitive ? 1 : 0); |
| 384 } | 399 } |
| 385 | 400 |
| 386 void backtrackIfGreater(int register1, int register2) { | 401 void backtrackIfGreater(int register1, int register2) { |
| 387 _emit(BACKTRACK_GT, registerNumber(register1), registerNumber(register2)); | 402 _emit(BACKTRACK_GT, registerNumber(register1), registerNumber(register2)); |
| 388 } | 403 } |
| 389 | 404 |
| 390 void gotoIfGreaterEqual(int register1, int register2, MiniExpLabel label) { | 405 void gotoIfGreaterEqual(int register1, int register2, MiniExpLabel label) { |
| 391 _emit(GOTO_GE, registerNumber(register1), registerNumber(register2)); | 406 _emit(GOTO_GE, registerNumber(register1), registerNumber(register2)); |
| 392 link(label); | 407 link(label); |
| 393 } | 408 } |
| 394 | 409 |
| 395 void backtrackIfNoMatch(int constant_pool_offset) { | 410 void backtrackIfNoMatch(int constant_pool_offset) { |
| 396 _emit(BACKTRACK_IF_NO_MATCH, constant_pool_offset); | 411 _emit(BACKTRACK_IF_NO_MATCH + oneIfBackwards, constant_pool_offset); |
| 397 } | 412 } |
| 398 | 413 |
| 399 void backtrackIfInRange(int from, int to) { | 414 void backtrackIfInRange(int from, int to) { |
| 400 _emit(BACKTRACK_IF_IN_RANGE, from, to); | 415 _emit(BACKTRACK_IF_IN_RANGE + oneIfBackwards, from, to); |
| 401 } | 416 } |
| 402 | 417 |
| 403 void gotoIfMatches(int charCode, MiniExpLabel label) { | 418 void gotoIfMatches(int charCode, MiniExpLabel label) { |
| 404 _emit(GOTO_IF_MATCH, charCode); | 419 _emit(GOTO_IF_MATCH + oneIfBackwards, charCode); |
| 405 link(label); | 420 link(label); |
| 406 } | 421 } |
| 407 | 422 |
| 408 void gotoIfInRange(int from, int to, MiniExpLabel label) { | 423 void gotoIfInRange(int from, int to, MiniExpLabel label) { |
| 409 if (from == to) { | 424 if (from == to) { |
| 410 gotoIfMatches(from, label); | 425 gotoIfMatches(from, label); |
| 411 } else { | 426 } else { |
| 412 _emit(GOTO_IF_IN_RANGE, from, to); | 427 _emit(GOTO_IF_IN_RANGE + oneIfBackwards, from, to); |
| 413 link(label); | 428 link(label); |
| 414 } | 429 } |
| 415 } | 430 } |
| 416 | 431 |
| 417 void backtrackIfNotAtWordBoundary() { | 432 void backtrackIfNotAtWordBoundary() { |
| 418 MiniExpLabel non_word_on_left = new MiniExpLabel(); | 433 MiniExpLabel non_word_on_left = new MiniExpLabel(); |
| 419 MiniExpLabel word_on_left = new MiniExpLabel(); | 434 MiniExpLabel word_on_left = new MiniExpLabel(); |
| 420 MiniExpLabel at_word_boundary = new MiniExpLabel(); | 435 MiniExpLabel at_word_boundary = new MiniExpLabel(); |
| 421 MiniExpLabel do_backtrack = new MiniExpLabel(); | 436 MiniExpLabel do_backtrack = new MiniExpLabel(); |
| 422 | 437 |
| (...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 503 fixedLength = 0, | 518 fixedLength = 0, |
| 504 anchored = false, | 519 anchored = false, |
| 505 registersToSave = null; | 520 registersToSave = null; |
| 506 | 521 |
| 507 const MiniExpAnalysis.atStart() | 522 const MiniExpAnalysis.atStart() |
| 508 : canMatchEmpty = true, | 523 : canMatchEmpty = true, |
| 509 fixedLength = 0, | 524 fixedLength = 0, |
| 510 anchored = true, | 525 anchored = true, |
| 511 registersToSave = null; | 526 registersToSave = null; |
| 512 | 527 |
| 513 MiniExpAnalysis.lookahead(MiniExpAnalysis bodyAnalysis, bool positive) | 528 MiniExpAnalysis.lookahead( |
| 529 MiniExpAnalysis bodyAnalysis, bool positive, bool behind) |
| 514 : canMatchEmpty = true, | 530 : canMatchEmpty = true, |
| 515 fixedLength = 0, | 531 fixedLength = 0, |
| 516 anchored = positive && bodyAnalysis.anchored, | 532 anchored = positive && !behind && bodyAnalysis.anchored, |
| 517 registersToSave = bodyAnalysis.registersToSave; | 533 registersToSave = bodyAnalysis.registersToSave; |
| 518 | 534 |
| 519 MiniExpAnalysis.quantifier( | 535 MiniExpAnalysis.quantifier( |
| 520 MiniExpAnalysis bodyAnalysis, int min, int max, List<int> regs) | 536 MiniExpAnalysis bodyAnalysis, int min, int max, List<int> regs) |
| 521 : canMatchEmpty = min == 0 || bodyAnalysis.canMatchEmpty, | 537 : canMatchEmpty = min == 0 || bodyAnalysis.canMatchEmpty, |
| 522 fixedLength = (min == 1 && max == 1) ? bodyAnalysis.fixedLength : null, | 538 fixedLength = (min == 1 && max == 1) ? bodyAnalysis.fixedLength : null, |
| 523 anchored = min > 0 && bodyAnalysis.anchored, | 539 anchored = min > 0 && bodyAnalysis.anchored, |
| 524 registersToSave = combineRegisters(bodyAnalysis.registersToSave, regs); | 540 registersToSave = combineRegisters(bodyAnalysis.registersToSave, regs); |
| 525 | 541 |
| 526 const MiniExpAnalysis.atom() | 542 const MiniExpAnalysis.atom() |
| (...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 594 } | 610 } |
| 595 } | 611 } |
| 596 | 612 |
| 597 class Alternative extends MiniExpAst { | 613 class Alternative extends MiniExpAst { |
| 598 final MiniExpAst _left; | 614 final MiniExpAst _left; |
| 599 final MiniExpAst _right; | 615 final MiniExpAst _right; |
| 600 | 616 |
| 601 Alternative(this._left, this._right); | 617 Alternative(this._left, this._right); |
| 602 | 618 |
| 603 void generate(MiniExpCompiler compiler, MiniExpLabel onSuccess) { | 619 void generate(MiniExpCompiler compiler, MiniExpLabel onSuccess) { |
| 604 compiler.generate(_left, _right.label); | 620 MiniExpAst first = compiler.stepBackwards ? _right : _left; |
| 605 compiler.generate(_right, onSuccess); | 621 MiniExpAst second = compiler.stepBackwards ? _left : _right; |
| 622 |
| 623 compiler.generate(first, second.label); |
| 624 compiler.generate(second, onSuccess); |
| 606 } | 625 } |
| 607 | 626 |
| 608 MiniExpAnalysis analyze(MiniExpCompiler compiler) { | 627 MiniExpAnalysis analyze(MiniExpCompiler compiler) { |
| 609 return new MiniExpAnalysis.and( | 628 return new MiniExpAnalysis.and( |
| 610 _left.analyze(compiler), _right.analyze(compiler)); | 629 _left.analyze(compiler), _right.analyze(compiler)); |
| 611 } | 630 } |
| 612 } | 631 } |
| 613 | 632 |
| 614 abstract class Assertion extends MiniExpAst { | 633 abstract class Assertion extends MiniExpAst { |
| 615 MiniExpAnalysis analyze(MiniExpCompiler) => const MiniExpAnalysis.empty(); | 634 MiniExpAnalysis analyze(MiniExpCompiler) => const MiniExpAnalysis.empty(); |
| (...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 683 compiler.pop(CURRENT_POSITION); | 702 compiler.pop(CURRENT_POSITION); |
| 684 compiler.pop(CURRENT_POSITION); | 703 compiler.pop(CURRENT_POSITION); |
| 685 // This overwrites the current position with the correct value. | 704 // This overwrites the current position with the correct value. |
| 686 compiler.backtrack(); | 705 compiler.backtrack(); |
| 687 } | 706 } |
| 688 } | 707 } |
| 689 } | 708 } |
| 690 | 709 |
| 691 class LookAhead extends Assertion { | 710 class LookAhead extends Assertion { |
| 692 final bool _positive; | 711 final bool _positive; |
| 712 final bool _behind; |
| 693 final MiniExpAst _body; | 713 final MiniExpAst _body; |
| 694 List<int> _subtreeRegisters; | 714 List<int> _subtreeRegisters; |
| 695 | 715 |
| 696 int _savedStackPointerRegister; | 716 int _savedStackPointerRegister; |
| 697 int _savedPosition; | 717 int _savedPosition; |
| 698 | 718 |
| 699 LookAhead(this._positive, this._body, MiniExpCompiler compiler) { | 719 LookAhead( |
| 720 this._positive, this._behind, this._body, MiniExpCompiler compiler) { |
| 700 _savedStackPointerRegister = compiler.allocateWorkingRegister(); | 721 _savedStackPointerRegister = compiler.allocateWorkingRegister(); |
| 701 _savedPosition = compiler.allocateWorkingRegister(); | 722 _savedPosition = compiler.allocateWorkingRegister(); |
| 702 } | 723 } |
| 703 | 724 |
| 704 void generate(MiniExpCompiler compiler, MiniExpLabel onSuccess) { | 725 void generate(MiniExpCompiler compiler, MiniExpLabel onSuccess) { |
| 705 // Lookahead. Even if the subexpression succeeds, the current position is | 726 // Lookahead. Even if the subexpression succeeds, the current position is |
| 706 // reset, and the backtracking stack is unwound so that we can never | 727 // reset, and the backtracking stack is unwound so that we can never |
| 707 // backtrack into the lookahead. On a failure of the subexpression, the | 728 // backtrack into the lookahead. On a failure of the subexpression, the |
| 708 // stack will be naturally unwound. | 729 // stack will be naturally unwound. |
| 709 MiniExpLabel body_succeeded = new MiniExpLabel(); | 730 MiniExpLabel body_succeeded = new MiniExpLabel(); |
| 710 MiniExpLabel succeed_on_failure = new MiniExpLabel(); | 731 MiniExpLabel succeed_on_failure = new MiniExpLabel(); |
| 711 MiniExpLabel undoCaptures; | 732 MiniExpLabel undoCaptures; |
| 712 compiler.copyRegister(_savedStackPointerRegister, STACK_POINTER); | 733 compiler.copyRegister(_savedStackPointerRegister, STACK_POINTER); |
| 713 compiler.copyRegister(_savedPosition, CURRENT_POSITION); | 734 compiler.copyRegister(_savedPosition, CURRENT_POSITION); |
| 714 if (!_positive) { | 735 if (!_positive) { |
| 715 compiler.pushBacktrack(succeed_on_failure); | 736 compiler.pushBacktrack(succeed_on_failure); |
| 716 } | 737 } |
| 738 bool stepBackwards = compiler.stepBackwards; |
| 739 compiler.stepBackwards = _behind; |
| 717 compiler.generate(_body, body_succeeded); | 740 compiler.generate(_body, body_succeeded); |
| 741 compiler.stepBackwards = stepBackwards; |
| 718 | 742 |
| 719 compiler.bind(body_succeeded); | 743 compiler.bind(body_succeeded); |
| 720 compiler.copyRegister(STACK_POINTER, _savedStackPointerRegister); | 744 compiler.copyRegister(STACK_POINTER, _savedStackPointerRegister); |
| 721 compiler.copyRegister(CURRENT_POSITION, _savedPosition); | 745 compiler.copyRegister(CURRENT_POSITION, _savedPosition); |
| 722 if (!_positive) { | 746 if (!_positive) { |
| 723 // For negative lookahead always zap the captures when the body succeeds | 747 // For negative lookahead always zap the captures when the body succeeds |
| 724 // and the lookahead thus fails. The captures are only needed for any | 748 // and the lookahead thus fails. The captures are only needed for any |
| 725 // backrefs inside the negative lookahead. | 749 // backrefs inside the negative lookahead. |
| 726 if (_subtreeRegisters != null) { | 750 if (_subtreeRegisters != null) { |
| 727 for (int register in _subtreeRegisters) { | 751 for (int register in _subtreeRegisters) { |
| (...skipping 20 matching lines...) Expand all Loading... |
| 748 for (int register in _subtreeRegisters) { | 772 for (int register in _subtreeRegisters) { |
| 749 compiler.copyRegister(register, NO_POSITION_REGISTER); | 773 compiler.copyRegister(register, NO_POSITION_REGISTER); |
| 750 } | 774 } |
| 751 compiler.backtrack(); | 775 compiler.backtrack(); |
| 752 } | 776 } |
| 753 } | 777 } |
| 754 | 778 |
| 755 MiniExpAnalysis analyze(compiler) { | 779 MiniExpAnalysis analyze(compiler) { |
| 756 MiniExpAnalysis bodyAnalysis = _body.analyze(compiler); | 780 MiniExpAnalysis bodyAnalysis = _body.analyze(compiler); |
| 757 _subtreeRegisters = bodyAnalysis.registersToSave; | 781 _subtreeRegisters = bodyAnalysis.registersToSave; |
| 758 return new MiniExpAnalysis.lookahead(bodyAnalysis, _positive); | 782 return new MiniExpAnalysis.lookahead(bodyAnalysis, _positive, _behind); |
| 759 } | 783 } |
| 760 } | 784 } |
| 761 | 785 |
| 762 class Quantifier extends MiniExpAst { | 786 class Quantifier extends MiniExpAst { |
| 763 final int _min; | 787 final int _min; |
| 764 final int _max; | 788 final int _max; |
| 765 final bool _greedy; | 789 final bool _greedy; |
| 766 final MiniExpAst _body; | 790 final MiniExpAst _body; |
| 767 int _counterRegister; | 791 int _counterRegister; |
| 768 int _startOfMatchRegister; // Implements 21.2.2.5.1 note 4. | 792 int _startOfMatchRegister; // Implements 21.2.2.5.1 note 4. |
| (...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 841 void generateFixedLengthGreedy( | 865 void generateFixedLengthGreedy( |
| 842 MiniExpCompiler compiler, MiniExpLabel onSuccess) { | 866 MiniExpCompiler compiler, MiniExpLabel onSuccess) { |
| 843 MiniExpLabel newIteration = new MiniExpLabel(); | 867 MiniExpLabel newIteration = new MiniExpLabel(); |
| 844 MiniExpLabel cantAdvanceMore = new MiniExpLabel(); | 868 MiniExpLabel cantAdvanceMore = new MiniExpLabel(); |
| 845 MiniExpLabel continuationFailed = new MiniExpLabel(); | 869 MiniExpLabel continuationFailed = new MiniExpLabel(); |
| 846 | 870 |
| 847 // Save the current position, so we know when the quantifier has been | 871 // Save the current position, so we know when the quantifier has been |
| 848 // unwound enough. | 872 // unwound enough. |
| 849 compiler.copyRegister(_optimizedGreedyRegister, CURRENT_POSITION); | 873 compiler.copyRegister(_optimizedGreedyRegister, CURRENT_POSITION); |
| 850 if (_min != 0) { | 874 if (_min != 0) { |
| 851 compiler.addToRegister(_optimizedGreedyRegister, _min * _bodyLength); | 875 int offset = compiler.direction * _bodyLength; |
| 876 compiler.addToRegister(_optimizedGreedyRegister, _min * offset); |
| 852 } | 877 } |
| 853 | 878 |
| 854 // This backtrack doesn't trigger until the quantifier has eaten as much as | 879 // This backtrack doesn't trigger until the quantifier has eaten as much as |
| 855 // possible. Unfortunately, whenever we backtrack in this simple system, | 880 // possible. Unfortunately, whenever we backtrack in this simple system, |
| 856 // the old current position in the subject string is also rewound. That's | 881 // the old current position in the subject string is also rewound. That's |
| 857 // not so convenient here, so we will have to save the current position in | 882 // not so convenient here, so we will have to save the current position in |
| 858 // a special register. It would be faster to generate the body in a | 883 // a special register. It would be faster to generate the body in a |
| 859 // special mode where we use a different backtrack instruction that just | 884 // special mode where we use a different backtrack instruction that just |
| 860 // rewinds the current position a certain number of characters instead of | 885 // rewinds the current position a certain number of characters instead of |
| 861 // popping it. | 886 // popping it. |
| 862 compiler.pushBacktrack(cantAdvanceMore); | 887 compiler.pushBacktrack(cantAdvanceMore); |
| 863 | 888 |
| 864 // The loop. | 889 // The loop. |
| 865 compiler.bind(newIteration); | 890 compiler.bind(newIteration); |
| 866 compiler.copyRegister(_savePositionRegister, CURRENT_POSITION); | 891 compiler.copyRegister(_savePositionRegister, CURRENT_POSITION); |
| 867 compiler.generate(_body, newIteration); | 892 compiler.generate(_body, newIteration); |
| 868 | 893 |
| 869 // The greedy quantifier has eaten as much as it can. Time to try the | 894 // The greedy quantifier has eaten as much as it can. Time to try the |
| 870 // continuation of the regexp after the quantifier. | 895 // continuation of the regexp after the quantifier. |
| 871 compiler.bind(cantAdvanceMore); | 896 compiler.bind(cantAdvanceMore); |
| 872 | 897 |
| 873 if (_min != 0) { | 898 if (_min != 0) { |
| 874 compiler.backtrackIfGreater( | 899 if (compiler.stepBackwards) { |
| 875 _optimizedGreedyRegister, _savePositionRegister); | 900 compiler.backtrackIfGreater( |
| 901 _savePositionRegister, _optimizedGreedyRegister); |
| 902 } else { |
| 903 compiler.backtrackIfGreater( |
| 904 _optimizedGreedyRegister, _savePositionRegister); |
| 905 } |
| 876 } | 906 } |
| 877 | 907 |
| 878 compiler.addToRegister(_savePositionRegister, _bodyLength); | 908 compiler.addToRegister( |
| 909 _savePositionRegister, compiler.direction * _bodyLength); |
| 879 compiler.copyRegister(CURRENT_POSITION, _savePositionRegister); | 910 compiler.copyRegister(CURRENT_POSITION, _savePositionRegister); |
| 880 | 911 |
| 881 // The continuation of the regexp failed. We backtrack the greedy | 912 // The continuation of the regexp failed. We backtrack the greedy |
| 882 // quantifier by one step and retry. | 913 // quantifier by one step and retry. |
| 883 compiler.bind(continuationFailed); | 914 compiler.bind(continuationFailed); |
| 884 compiler.addToRegister(CURRENT_POSITION, -_bodyLength); | 915 compiler.addToRegister( |
| 916 CURRENT_POSITION, compiler.direction * -_bodyLength); |
| 885 // If we got back to where the quantifier started matching, then jump | 917 // If we got back to where the quantifier started matching, then jump |
| 886 // to the continuation (we haven't pushed a backtrack, so if that fails, it | 918 // to the continuation (we haven't pushed a backtrack, so if that fails, it |
| 887 // will backtrack further). | 919 // will backtrack further). |
| 888 // We don't have gotoIfEqual, so use gotoIfGreaterEqual. | 920 // We don't have gotoIfEqual, so use gotoIfGreaterEqual. |
| 889 compiler.gotoIfGreaterEqual( | 921 if (compiler.stepBackwards) { |
| 890 _optimizedGreedyRegister, CURRENT_POSITION, onSuccess); | 922 compiler.gotoIfGreaterEqual( |
| 923 CURRENT_POSITION, _optimizedGreedyRegister, onSuccess); |
| 924 } else { |
| 925 compiler.gotoIfGreaterEqual( |
| 926 _optimizedGreedyRegister, CURRENT_POSITION, onSuccess); |
| 927 } |
| 891 compiler.pushBacktrack(continuationFailed); | 928 compiler.pushBacktrack(continuationFailed); |
| 892 compiler.goto(onSuccess); | 929 compiler.goto(onSuccess); |
| 893 } | 930 } |
| 894 | 931 |
| 895 void generate(MiniExpCompiler compiler, MiniExpLabel onSuccess) { | 932 void generate(MiniExpCompiler compiler, MiniExpLabel onSuccess) { |
| 896 // We optimize loops of the form .* to avoid big backtrack stacks. | 933 // We optimize loops of the form .* to avoid big backtrack stacks. |
| 897 if (_isOptimizedGreedy) { | 934 if (_isOptimizedGreedy) { |
| 898 generateFixedLengthGreedy(compiler, onSuccess); | 935 generateFixedLengthGreedy(compiler, onSuccess); |
| 899 return; | 936 return; |
| 900 } | 937 } |
| (...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 995 _subtreeRegistersThatNeedSaving.isNotEmpty; | 1032 _subtreeRegistersThatNeedSaving.isNotEmpty; |
| 996 } | 1033 } |
| 997 } | 1034 } |
| 998 | 1035 |
| 999 class Atom extends MiniExpAst { | 1036 class Atom extends MiniExpAst { |
| 1000 final int _constantIndex; | 1037 final int _constantIndex; |
| 1001 | 1038 |
| 1002 Atom(this._constantIndex); | 1039 Atom(this._constantIndex); |
| 1003 | 1040 |
| 1004 void generate(MiniExpCompiler compiler, MiniExpLabel onSuccess) { | 1041 void generate(MiniExpCompiler compiler, MiniExpLabel onSuccess) { |
| 1005 compiler.backtrackIfEqual(CURRENT_POSITION, STRING_LENGTH); | 1042 if (compiler.stepBackwards) { |
| 1043 compiler.backtrackIfEqual(CURRENT_POSITION, 0); |
| 1044 } else { |
| 1045 compiler.backtrackIfEqual(CURRENT_POSITION, STRING_LENGTH); |
| 1046 } |
| 1006 MiniExpLabel match; | 1047 MiniExpLabel match; |
| 1007 int charCode = compiler.constantPoolEntry(_constantIndex); | 1048 int charCode = compiler.constantPoolEntry(_constantIndex); |
| 1008 if (!compiler.caseSensitive) { | 1049 if (!compiler.caseSensitive) { |
| 1009 List<int> equivalents = internalRegExpEquivalenceClass(charCode); | 1050 List<int> equivalents = internalRegExpEquivalenceClass(charCode); |
| 1010 if (equivalents != null && equivalents.length > 1) { | 1051 if (equivalents != null && equivalents.length > 1) { |
| 1011 match = new MiniExpLabel(); | 1052 match = new MiniExpLabel(); |
| 1012 for (int equivalent in equivalents) { | 1053 for (int equivalent in equivalents) { |
| 1013 if (equivalent == charCode) continue; | 1054 if (equivalent == charCode) continue; |
| 1014 compiler.gotoIfMatches(equivalent, match); | 1055 compiler.gotoIfMatches(equivalent, match); |
| 1015 } | 1056 } |
| 1016 } | 1057 } |
| 1017 } | 1058 } |
| 1018 compiler.backtrackIfNoMatch(_constantIndex); | 1059 compiler.backtrackIfNoMatch(_constantIndex); |
| 1019 if (match != null) compiler.bind(match); | 1060 if (match != null) compiler.bind(match); |
| 1020 compiler.addToRegister(CURRENT_POSITION, 1); | 1061 compiler.addToRegister(CURRENT_POSITION, compiler.direction); |
| 1021 compiler.goto(onSuccess); | 1062 compiler.goto(onSuccess); |
| 1022 } | 1063 } |
| 1023 | 1064 |
| 1024 MiniExpAnalysis analyze(compiler) => const MiniExpAnalysis.atom(); | 1065 MiniExpAnalysis analyze(compiler) => const MiniExpAnalysis.atom(); |
| 1025 } | 1066 } |
| 1026 | 1067 |
| 1027 class CharClass extends MiniExpAst { | 1068 class CharClass extends MiniExpAst { |
| 1028 final List<int> _ranges = new List<int>(); | 1069 final List<int> _ranges = new List<int>(); |
| 1029 final bool _positive; | 1070 final bool _positive; |
| 1030 | 1071 |
| (...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1112 } | 1153 } |
| 1113 } | 1154 } |
| 1114 ranges.add(start); | 1155 ranges.add(start); |
| 1115 ranges.add(end); | 1156 ranges.add(end); |
| 1116 } | 1157 } |
| 1117 // TODO(erikcorry): Sort and merge ranges. | 1158 // TODO(erikcorry): Sort and merge ranges. |
| 1118 return ranges; | 1159 return ranges; |
| 1119 } | 1160 } |
| 1120 | 1161 |
| 1121 void generate(MiniExpCompiler compiler, MiniExpLabel onSuccess) { | 1162 void generate(MiniExpCompiler compiler, MiniExpLabel onSuccess) { |
| 1122 compiler.backtrackIfEqual(CURRENT_POSITION, STRING_LENGTH); | 1163 if (compiler.stepBackwards) { |
| 1164 compiler.backtrackIfEqual(CURRENT_POSITION, 0); |
| 1165 } else { |
| 1166 compiler.backtrackIfEqual(CURRENT_POSITION, STRING_LENGTH); |
| 1167 } |
| 1123 List<int> ranges = _ranges; | 1168 List<int> ranges = _ranges; |
| 1124 if (!compiler.caseSensitive) { | 1169 if (!compiler.caseSensitive) { |
| 1125 ranges = caseInsensitiveRanges(_ranges); | 1170 ranges = caseInsensitiveRanges(_ranges); |
| 1126 } | 1171 } |
| 1127 MiniExpLabel match = new MiniExpLabel(); | 1172 MiniExpLabel match = new MiniExpLabel(); |
| 1128 if (_positive) { | 1173 if (_positive) { |
| 1129 for (int i = 0; i < ranges.length; i += 2) { | 1174 for (int i = 0; i < ranges.length; i += 2) { |
| 1130 compiler.gotoIfInRange(ranges[i], ranges[i + 1], match); | 1175 compiler.gotoIfInRange(ranges[i], ranges[i + 1], match); |
| 1131 } | 1176 } |
| 1132 compiler.backtrack(); | 1177 compiler.backtrack(); |
| 1133 compiler.bind(match); | 1178 compiler.bind(match); |
| 1134 } else { | 1179 } else { |
| 1135 for (int i = 0; i < ranges.length; i += 2) { | 1180 for (int i = 0; i < ranges.length; i += 2) { |
| 1136 compiler.backtrackIfInRange(ranges[i], ranges[i + 1]); | 1181 compiler.backtrackIfInRange(ranges[i], ranges[i + 1]); |
| 1137 } | 1182 } |
| 1138 } | 1183 } |
| 1139 compiler.addToRegister(CURRENT_POSITION, 1); | 1184 compiler.addToRegister(CURRENT_POSITION, compiler.direction); |
| 1140 compiler.goto(onSuccess); | 1185 compiler.goto(onSuccess); |
| 1141 } | 1186 } |
| 1142 | 1187 |
| 1143 MiniExpAnalysis analyze(compiler) => const MiniExpAnalysis.atom(); | 1188 MiniExpAnalysis analyze(compiler) => const MiniExpAnalysis.atom(); |
| 1144 } | 1189 } |
| 1145 | 1190 |
| 1146 // This class is used for all backslashes followed by numbers. For web | 1191 // This class is used for all backslashes followed by numbers. For web |
| 1147 // compatibility, if the number (interpreted as decimal) is smaller than the | 1192 // compatibility, if the number (interpreted as decimal) is smaller than the |
| 1148 // number of captures, then it will be interpreted as a back reference. | 1193 // number of captures, then it will be interpreted as a back reference. |
| 1149 // Otherwise the number will be interpreted as an octal character code escape. | 1194 // Otherwise the number will be interpreted as an octal character code escape. |
| (...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1191 if (_startRegister == null) { | 1236 if (_startRegister == null) { |
| 1192 _startRegister = compiler.allocateCaptureRegisters(); | 1237 _startRegister = compiler.allocateCaptureRegisters(); |
| 1193 _endRegister = _startRegister - 1; | 1238 _endRegister = _startRegister - 1; |
| 1194 } | 1239 } |
| 1195 } | 1240 } |
| 1196 | 1241 |
| 1197 void generate(MiniExpCompiler compiler, MiniExpLabel onSuccess) { | 1242 void generate(MiniExpCompiler compiler, MiniExpLabel onSuccess) { |
| 1198 MiniExpLabel undoStart = new MiniExpLabel(); | 1243 MiniExpLabel undoStart = new MiniExpLabel(); |
| 1199 MiniExpLabel writeEnd = new MiniExpLabel(); | 1244 MiniExpLabel writeEnd = new MiniExpLabel(); |
| 1200 MiniExpLabel undoEnd = new MiniExpLabel(); | 1245 MiniExpLabel undoEnd = new MiniExpLabel(); |
| 1201 compiler.copyRegister(_startRegister, CURRENT_POSITION); | 1246 int firstRegister = compiler.stepBackwards ? _endRegister : _startRegister; |
| 1247 int lastRegister = compiler.stepBackwards ? _startRegister : _endRegister; |
| 1248 compiler.copyRegister(firstRegister, CURRENT_POSITION); |
| 1202 compiler.pushBacktrack(undoStart); | 1249 compiler.pushBacktrack(undoStart); |
| 1203 | 1250 |
| 1204 compiler.generate(_body, writeEnd); | 1251 compiler.generate(_body, writeEnd); |
| 1205 | 1252 |
| 1206 compiler.bind(writeEnd); | 1253 compiler.bind(writeEnd); |
| 1207 compiler.copyRegister(_endRegister, CURRENT_POSITION); | 1254 compiler.copyRegister(lastRegister, CURRENT_POSITION); |
| 1208 compiler.pushBacktrack(undoEnd); | 1255 compiler.pushBacktrack(undoEnd); |
| 1209 compiler.goto(onSuccess); | 1256 compiler.goto(onSuccess); |
| 1210 | 1257 |
| 1211 compiler.bind(undoStart); | 1258 compiler.bind(undoStart); |
| 1212 compiler.copyRegister(_startRegister, NO_POSITION_REGISTER); | 1259 compiler.copyRegister(firstRegister, NO_POSITION_REGISTER); |
| 1213 compiler.backtrack(); | 1260 compiler.backtrack(); |
| 1214 | 1261 |
| 1215 compiler.bind(undoEnd); | 1262 compiler.bind(undoEnd); |
| 1216 compiler.copyRegister(_endRegister, NO_POSITION_REGISTER); | 1263 compiler.copyRegister(lastRegister, NO_POSITION_REGISTER); |
| 1217 compiler.backtrack(); | 1264 compiler.backtrack(); |
| 1218 } | 1265 } |
| 1219 | 1266 |
| 1220 MiniExpAnalysis analyze(MiniExpCompiler compiler) { | 1267 MiniExpAnalysis analyze(MiniExpCompiler compiler) { |
| 1221 allocateRegisters(compiler); | 1268 allocateRegisters(compiler); |
| 1222 MiniExpAnalysis bodyAnalysis = _body.analyze(compiler); | 1269 MiniExpAnalysis bodyAnalysis = _body.analyze(compiler); |
| 1223 return new MiniExpAnalysis.capture( | 1270 return new MiniExpAnalysis.capture( |
| 1224 bodyAnalysis, _startRegister, _endRegister); | 1271 bodyAnalysis, _startRegister, _endRegister); |
| 1225 } | 1272 } |
| 1226 } | 1273 } |
| (...skipping 189 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1416 notWordBoundary, | 1463 notWordBoundary, |
| 1417 wordCharacter, | 1464 wordCharacter, |
| 1418 notWordCharacter, | 1465 notWordCharacter, |
| 1419 digit, | 1466 digit, |
| 1420 notDigit, | 1467 notDigit, |
| 1421 whitespace, | 1468 whitespace, |
| 1422 notWhitespace, | 1469 notWhitespace, |
| 1423 nonCapturing, | 1470 nonCapturing, |
| 1424 lookAhead, | 1471 lookAhead, |
| 1425 negativeLookAhead, | 1472 negativeLookAhead, |
| 1473 lookBehind, |
| 1474 negativeLookBehind, |
| 1426 other | 1475 other |
| 1427 } | 1476 } |
| 1428 | 1477 |
| 1429 class MiniExpParser { | 1478 class MiniExpParser { |
| 1430 // The constant pool is used to look up character data when the regexp is | 1479 // The constant pool is used to look up character data when the regexp is |
| 1431 // running. It consists of the regexp source with some characters appended | 1480 // running. It consists of the regexp source with some characters appended |
| 1432 // to handle escapes that are not literally present in the regexp input. | 1481 // to handle escapes that are not literally present in the regexp input. |
| 1433 final MiniExpCompiler _compiler; | 1482 final MiniExpCompiler _compiler; |
| 1434 final String _source; | 1483 final String _source; |
| 1435 final bool _isMultiLine; | 1484 final bool _isMultiLine; |
| (...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1493 | 1542 |
| 1494 MiniExpAst tryParseAssertion() { | 1543 MiniExpAst tryParseAssertion() { |
| 1495 if (acceptToken(Token.hat)) { | 1544 if (acceptToken(Token.hat)) { |
| 1496 return _isMultiLine ? new AtBeginningOfLine() : new AtStart(); | 1545 return _isMultiLine ? new AtBeginningOfLine() : new AtStart(); |
| 1497 } | 1546 } |
| 1498 if (acceptToken(Token.dollar)) { | 1547 if (acceptToken(Token.dollar)) { |
| 1499 return _isMultiLine ? new AtEndOfLine() : new AtEnd(); | 1548 return _isMultiLine ? new AtEndOfLine() : new AtEnd(); |
| 1500 } | 1549 } |
| 1501 if (acceptToken(Token.wordBoundary)) return new WordBoundary(true); | 1550 if (acceptToken(Token.wordBoundary)) return new WordBoundary(true); |
| 1502 if (acceptToken(Token.notWordBoundary)) return new WordBoundary(false); | 1551 if (acceptToken(Token.notWordBoundary)) return new WordBoundary(false); |
| 1503 var lookaheadAst; | 1552 var lookAst; |
| 1504 if (acceptToken(Token.lookAhead)) { | 1553 if (acceptToken(Token.lookAhead)) { |
| 1505 lookaheadAst = new LookAhead(true, parseDisjunction(), _compiler); | 1554 lookAst = new LookAhead(true, false, parseDisjunction(), _compiler); |
| 1506 } else if (acceptToken(Token.negativeLookAhead)) { | 1555 } else if (acceptToken(Token.negativeLookAhead)) { |
| 1507 lookaheadAst = new LookAhead(false, parseDisjunction(), _compiler); | 1556 lookAst = new LookAhead(false, false, parseDisjunction(), _compiler); |
| 1557 } else if (acceptToken(Token.lookBehind)) { |
| 1558 lookAst = new LookAhead(true, true, parseDisjunction(), _compiler); |
| 1559 } else if (acceptToken(Token.negativeLookBehind)) { |
| 1560 lookAst = new LookAhead(false, true, parseDisjunction(), _compiler); |
| 1508 } | 1561 } |
| 1509 if (lookaheadAst != null) { | 1562 if (lookAst != null) { |
| 1510 expectToken(Token.rParen); | 1563 expectToken(Token.rParen); |
| 1511 // The normal syntax does not allow a quantifier here, but the web | 1564 // The normal syntax does not allow a quantifier here, but the web |
| 1512 // compatible one does. Slightly nasty hack for compatibility: | 1565 // compatible one does. Slightly nasty hack for compatibility: |
| 1513 if (peekToken(Token.quant)) { | 1566 if (peekToken(Token.quant)) { |
| 1514 MiniExpAst quant = new Quantifier( | 1567 MiniExpAst quant = new Quantifier( |
| 1515 _minimumRepeats, _maximumRepeats, _lastWasGreedy, | 1568 _minimumRepeats, _maximumRepeats, _lastWasGreedy, |
| 1516 lookaheadAst, _compiler); | 1569 lookAst, _compiler); |
| 1517 expectToken(Token.quant); | 1570 expectToken(Token.quant); |
| 1518 return quant; | 1571 return quant; |
| 1519 } | 1572 } |
| 1520 return lookaheadAst; | 1573 return lookAst; |
| 1521 } | 1574 } |
| 1522 return null; | 1575 return null; |
| 1523 } | 1576 } |
| 1524 | 1577 |
| 1525 MiniExpAst parseTerm() { | 1578 MiniExpAst parseTerm() { |
| 1526 MiniExpAst ast = tryParseAssertion(); | 1579 MiniExpAst ast = tryParseAssertion(); |
| 1527 if (ast == null) { | 1580 if (ast == null) { |
| 1528 ast = parseAtom(); | 1581 ast = parseAtom(); |
| 1529 if (peekToken(Token.quant)) { | 1582 if (peekToken(Token.quant)) { |
| 1530 MiniExpAst quant = new Quantifier( | 1583 MiniExpAst quant = new Quantifier( |
| (...skipping 398 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1929 if (!_has(_position + 1)) error("unterminated group"); | 1982 if (!_has(_position + 1)) error("unterminated group"); |
| 1930 if (_at(_position + 1) == CHAR_CODE_QUERY) { | 1983 if (_at(_position + 1) == CHAR_CODE_QUERY) { |
| 1931 if (!_has(_position + 2)) error("unterminated group"); | 1984 if (!_has(_position + 2)) error("unterminated group"); |
| 1932 int parenthesisModifier = _at(_position + 2); | 1985 int parenthesisModifier = _at(_position + 2); |
| 1933 if (parenthesisModifier == CHAR_CODE_EQUALS) { | 1986 if (parenthesisModifier == CHAR_CODE_EQUALS) { |
| 1934 _lastToken = Token.lookAhead; | 1987 _lastToken = Token.lookAhead; |
| 1935 } else if (parenthesisModifier == CHAR_CODE_COLON) { | 1988 } else if (parenthesisModifier == CHAR_CODE_COLON) { |
| 1936 _lastToken = Token.nonCapturing; | 1989 _lastToken = Token.nonCapturing; |
| 1937 } else if (parenthesisModifier == CHAR_CODE_BANG) { | 1990 } else if (parenthesisModifier == CHAR_CODE_BANG) { |
| 1938 _lastToken = Token.negativeLookAhead; | 1991 _lastToken = Token.negativeLookAhead; |
| 1992 } else if (parenthesisModifier == CHAR_CODE_LESS_THAN) { |
| 1993 if (!_has(_position + 3)) error("unterminated group"); |
| 1994 int lookBehindModifier = _at(_position + 3); |
| 1995 _position++; |
| 1996 if (lookBehindModifier == CHAR_CODE_EQUALS) { |
| 1997 _lastToken = Token.lookBehind; |
| 1998 } else if (lookBehindModifier == CHAR_CODE_BANG) { |
| 1999 _lastToken = Token.negativeLookBehind; |
| 2000 } else { |
| 2001 error("invalid group"); |
| 2002 } |
| 1939 } else { | 2003 } else { |
| 1940 error("invalid group"); | 2004 error("invalid group"); |
| 1941 } | 2005 } |
| 1942 _position += 2; | 2006 _position += 2; |
| 1943 return; | 2007 return; |
| 1944 } | 2008 } |
| 1945 } | 2009 } |
| 1946 | 2010 |
| 1947 void lexQuantifier() { | 2011 void lexQuantifier() { |
| 1948 int quantifierCode = _at(_position); | 2012 int quantifierCode = _at(_position); |
| (...skipping 158 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2107 break; | 2171 break; |
| 2108 case BACKTRACK_GT: | 2172 case BACKTRACK_GT: |
| 2109 int reg1 = _registers[_byteCodes[programCounter++]]; | 2173 int reg1 = _registers[_byteCodes[programCounter++]]; |
| 2110 int reg2 = _registers[_byteCodes[programCounter++]]; | 2174 int reg2 = _registers[_byteCodes[programCounter++]]; |
| 2111 if (reg1 > reg2) { | 2175 if (reg1 > reg2) { |
| 2112 _registers[CURRENT_POSITION] = stack[--stackPointer]; | 2176 _registers[CURRENT_POSITION] = stack[--stackPointer]; |
| 2113 programCounter = stack[--stackPointer]; | 2177 programCounter = stack[--stackPointer]; |
| 2114 } | 2178 } |
| 2115 break; | 2179 break; |
| 2116 case BACKTRACK_IF_NO_MATCH: | 2180 case BACKTRACK_IF_NO_MATCH: |
| 2117 if (subject.codeUnitAt(_registers[CURRENT_POSITION]) != | 2181 case BACKTRACK_IF_PREVIOUS_NO_MATCH: |
| 2182 int offset = byteCode - BACKTRACK_IF_NO_MATCH; |
| 2183 if (subject.codeUnitAt(_registers[CURRENT_POSITION] - offset) != |
| 2118 _constantPool.codeUnitAt(_byteCodes[programCounter++])) { | 2184 _constantPool.codeUnitAt(_byteCodes[programCounter++])) { |
| 2119 _registers[CURRENT_POSITION] = stack[--stackPointer]; | 2185 _registers[CURRENT_POSITION] = stack[--stackPointer]; |
| 2120 programCounter = stack[--stackPointer]; | 2186 programCounter = stack[--stackPointer]; |
| 2121 } | 2187 } |
| 2122 break; | 2188 break; |
| 2123 case BACKTRACK_IF_IN_RANGE: | 2189 case BACKTRACK_IF_IN_RANGE: |
| 2124 int code = subject.codeUnitAt(_registers[CURRENT_POSITION]); | 2190 case BACKTRACK_IF_PREVIOUS_IN_RANGE: |
| 2191 int offset = byteCode - BACKTRACK_IF_IN_RANGE; |
| 2192 int code = subject.codeUnitAt(_registers[CURRENT_POSITION] - offset); |
| 2125 int from = _byteCodes[programCounter++]; | 2193 int from = _byteCodes[programCounter++]; |
| 2126 int to = _byteCodes[programCounter++]; | 2194 int to = _byteCodes[programCounter++]; |
| 2127 if (from <= code && code <= to) { | 2195 if (from <= code && code <= to) { |
| 2128 _registers[CURRENT_POSITION] = stack[--stackPointer]; | 2196 _registers[CURRENT_POSITION] = stack[--stackPointer]; |
| 2129 programCounter = stack[--stackPointer]; | 2197 programCounter = stack[--stackPointer]; |
| 2130 } | 2198 } |
| 2131 break; | 2199 break; |
| 2132 case GOTO_IF_MATCH: | 2200 case GOTO_IF_MATCH: |
| 2133 int code = subject.codeUnitAt(_registers[CURRENT_POSITION]); | 2201 case GOTO_IF_PREVIOUS_MATCH: |
| 2202 int offset = byteCode - GOTO_IF_MATCH; |
| 2203 int code = subject.codeUnitAt(_registers[CURRENT_POSITION] - offset); |
| 2134 int expected = _byteCodes[programCounter++]; | 2204 int expected = _byteCodes[programCounter++]; |
| 2135 int dest = _byteCodes[programCounter++]; | 2205 int dest = _byteCodes[programCounter++]; |
| 2136 if (code == expected) programCounter = dest; | 2206 if (code == expected) programCounter = dest; |
| 2137 break; | 2207 break; |
| 2138 case GOTO_IF_IN_RANGE: | 2208 case GOTO_IF_IN_RANGE: |
| 2139 int code = subject.codeUnitAt(_registers[CURRENT_POSITION]); | 2209 case GOTO_IF_PREVIOUS_IN_RANGE: |
| 2210 int offset = byteCode - GOTO_IF_IN_RANGE; |
| 2211 int code = subject.codeUnitAt(_registers[CURRENT_POSITION] - offset); |
| 2140 int from = _byteCodes[programCounter++]; | 2212 int from = _byteCodes[programCounter++]; |
| 2141 int to = _byteCodes[programCounter++]; | 2213 int to = _byteCodes[programCounter++]; |
| 2142 int dest = _byteCodes[programCounter++]; | 2214 int dest = _byteCodes[programCounter++]; |
| 2143 if (from <= code && code <= to) programCounter = dest; | 2215 if (from <= code && code <= to) programCounter = dest; |
| 2144 break; | 2216 break; |
| 2145 case GOTO_EQ: | 2217 case GOTO_EQ: |
| 2146 int reg1 = _registers[_byteCodes[programCounter++]]; | 2218 int reg1 = _registers[_byteCodes[programCounter++]]; |
| 2147 int reg2 = _registers[_byteCodes[programCounter++]]; | 2219 int reg2 = _registers[_byteCodes[programCounter++]]; |
| 2148 int dest = _byteCodes[programCounter++]; | 2220 int dest = _byteCodes[programCounter++]; |
| 2149 if (reg1 == reg2) programCounter = dest; | 2221 if (reg1 == reg2) programCounter = dest; |
| (...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2181 case COPY_REGISTER: | 2253 case COPY_REGISTER: |
| 2182 // We don't normally keep the stack pointer in sync with its slot in | 2254 // We don't normally keep the stack pointer in sync with its slot in |
| 2183 // the _registers, but we have to have it in sync here. | 2255 // the _registers, but we have to have it in sync here. |
| 2184 _registers[STACK_POINTER] = stackPointer; | 2256 _registers[STACK_POINTER] = stackPointer; |
| 2185 int registerIndex = _byteCodes[programCounter++]; | 2257 int registerIndex = _byteCodes[programCounter++]; |
| 2186 int value = _registers[_byteCodes[programCounter++]]; | 2258 int value = _registers[_byteCodes[programCounter++]]; |
| 2187 _registers[registerIndex] = value; | 2259 _registers[registerIndex] = value; |
| 2188 stackPointer = _registers[STACK_POINTER]; | 2260 stackPointer = _registers[STACK_POINTER]; |
| 2189 break; | 2261 break; |
| 2190 case BACKTRACK_ON_BACK_REFERENCE: | 2262 case BACKTRACK_ON_BACK_REFERENCE: |
| 2263 case BACKTRACK_ON_BACK_REFERENCE_BACKWARDS: |
| 2191 int registerIndex = _byteCodes[programCounter++]; | 2264 int registerIndex = _byteCodes[programCounter++]; |
| 2192 bool case_sensitive = _byteCodes[programCounter++] != 0; | 2265 bool case_sensitive = _byteCodes[programCounter++] != 0; |
| 2193 if (!checkBackReference(subject, case_sensitive, registerIndex)) { | 2266 if (!checkBackReference( |
| 2267 subject, case_sensitive, registerIndex, |
| 2268 byteCode == BACKTRACK_ON_BACK_REFERENCE_BACKWARDS)) { |
| 2194 // Backtrack. | 2269 // Backtrack. |
| 2195 _registers[CURRENT_POSITION] = stack[--stackPointer]; | 2270 _registers[CURRENT_POSITION] = stack[--stackPointer]; |
| 2196 programCounter = stack[--stackPointer]; | 2271 programCounter = stack[--stackPointer]; |
| 2197 } | 2272 } |
| 2198 break; | 2273 break; |
| 2199 case BACKTRACK: | 2274 case BACKTRACK: |
| 2200 _registers[CURRENT_POSITION] = stack[--stackPointer]; | 2275 _registers[CURRENT_POSITION] = stack[--stackPointer]; |
| 2201 programCounter = stack[--stackPointer]; | 2276 programCounter = stack[--stackPointer]; |
| 2202 break; | 2277 break; |
| 2203 case SUCCEED: | 2278 case SUCCEED: |
| 2204 return true; | 2279 return true; |
| 2205 case FAIL: | 2280 case FAIL: |
| 2206 return false; | 2281 return false; |
| 2207 default: | 2282 default: |
| 2208 assert(false); | 2283 assert(false); |
| 2209 break; | 2284 break; |
| 2210 } | 2285 } |
| 2211 } | 2286 } |
| 2212 } | 2287 } |
| 2213 | 2288 |
| 2214 bool checkBackReference( | 2289 bool checkBackReference( |
| 2215 String subject, bool caseSensitive, int registerIndex) { | 2290 String subject, bool caseSensitive, int registerIndex, bool backwards) { |
| 2216 int start = _registers[registerIndex]; | 2291 int start = _registers[registerIndex]; |
| 2217 int end = _registers[registerIndex + 1]; | 2292 int end = _registers[registerIndex + 1]; |
| 2218 if (end == NO_POSITION) return true; | 2293 if (end == NO_POSITION) return true; |
| 2219 int length = end - start; | 2294 int length = end - start; |
| 2220 int currentPosition = _registers[CURRENT_POSITION]; | 2295 int currentPosition = _registers[CURRENT_POSITION]; |
| 2221 if (currentPosition + end - start > subject.length) return false; | 2296 if (backwards) { |
| 2297 if (currentPosition < length) return false; |
| 2298 currentPosition -= length; |
| 2299 } else { |
| 2300 if (currentPosition + length > subject.length) return false; |
| 2301 } |
| 2222 for (int i = 0; i < length; i++) { | 2302 for (int i = 0; i < length; i++) { |
| 2223 int x = subject.codeUnitAt(start + i); | 2303 int x = subject.codeUnitAt(start + i); |
| 2224 int y = subject.codeUnitAt(currentPosition + i); | 2304 int y = subject.codeUnitAt(currentPosition + i); |
| 2225 if (!caseSensitive) { | 2305 if (!caseSensitive) { |
| 2226 x = internalRegExpCanonicalize(x); | 2306 x = internalRegExpCanonicalize(x); |
| 2227 y = internalRegExpCanonicalize(y); | 2307 y = internalRegExpCanonicalize(y); |
| 2228 } | 2308 } |
| 2229 if (x != y) return false; | 2309 if (x != y) return false; |
| 2230 } | 2310 } |
| 2231 _registers[CURRENT_POSITION] += length; | 2311 _registers[CURRENT_POSITION] += backwards ? -length : length; |
| 2232 return true; | 2312 return true; |
| 2233 } | 2313 } |
| 2234 } | 2314 } |
| OLD | NEW |