Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(318)

Side by Side Diff: lib/core/regexp.dart

Issue 1398033002: Implement lookbehind in regexp engine Base URL: git@github.com:dart-lang/fletch.git@master
Patch Set: Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | tests/unsorted/lookbehind_test.dart » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2015, the Fletch project authors. Please see the AUTHORS file 1 // Copyright (c) 2015, the Fletch project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE.md file. 3 // BSD-style license that can be found in the LICENSE.md file.
4 4
5 part of dart.core_patch; 5 part of dart.core_patch;
6 6
7 class MiniExpLabel { 7 class MiniExpLabel {
8 // Initially points to -1 to indicate the label is neither linked (used) nor 8 // Initially points to -1 to indicate the label is neither linked (used) nor
9 // bound (fixed to a location). When the label is linked, but not bound, it 9 // bound (fixed to a location). When the label is linked, but not bound, it
10 // has a negative value, determined by fixupLocation(l), that indicates the 10 // has a negative value, determined by fixupLocation(l), that indicates the
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after
90 90
91 // Byte codes. 91 // Byte codes.
92 const GOTO = 0; // label 92 const GOTO = 0; // label
93 const PUSH_REGISTER = 1; // reg 93 const PUSH_REGISTER = 1; // reg
94 const PUSH_BACKTRACK = 2; // const 94 const PUSH_BACKTRACK = 2; // const
95 const POP_REGISTER = 3; // reg 95 const POP_REGISTER = 3; // reg
96 const BACKTRACK_EQ = 4; // reg reg 96 const BACKTRACK_EQ = 4; // reg reg
97 const BACKTRACK_NE = 5; // reg reg 97 const BACKTRACK_NE = 5; // reg reg
98 const BACKTRACK_GT = 6; // reg reg 98 const BACKTRACK_GT = 6; // reg reg
99 const BACKTRACK_IF_NO_MATCH = 7; // constant-pool-offset 99 const BACKTRACK_IF_NO_MATCH = 7; // constant-pool-offset
100 const BACKTRACK_IF_IN_RANGE = 8; // from to 100 const BACKTRACK_IF_PREVIOUS_NO_MATCH = 8; // constant-pool-offset
101 const GOTO_IF_MATCH = 9; // charCode label 101 const BACKTRACK_IF_IN_RANGE = 9; // from to
102 const GOTO_IF_IN_RANGE = 10; // from to label 102 const BACKTRACK_IF_PREVIOUS_IN_RANGE = 10; // from to
103 const GOTO_EQ = 11; // reg reg label 103 const GOTO_IF_MATCH = 11; // charCode label
104 const GOTO_GE = 12; // reg reg label 104 const GOTO_IF_PREVIOUS_MATCH = 12; // charCode label
105 const GOTO_IF_WORD_CHARACTER = 13; // position-offset label 105 const GOTO_IF_IN_RANGE = 13; // from to label
106 const ADD_TO_REGISTER = 14; // reg const 106 const GOTO_IF_PREVIOUS_IN_RANGE = 14; // from to label
107 const COPY_REGISTER = 15; // dest-reg source-reg 107 const GOTO_EQ = 15; // reg reg label
108 const BACKTRACK_ON_BACK_REFERENCE = 16; // capture-reg 108 const GOTO_GE = 16; // reg reg label
109 const BACKTRACK = 17; 109 const GOTO_IF_WORD_CHARACTER = 17; // position-offset label
110 const SUCCEED = 18; 110 const ADD_TO_REGISTER = 18; // reg const
111 const FAIL = 19; 111 const COPY_REGISTER = 19; // dest-reg source-reg
112 const BACKTRACK_ON_BACK_REFERENCE = 20; // capture-reg
113 const BACKTRACK_ON_BACK_REFERENCE_BACKWARDS = 21; // capture-reg
114 const BACKTRACK = 22;
115 const SUCCEED = 23;
116 const FAIL = 24;
112 117
113 // Format is name, number of register arguments, number of other arguments. 118 // Format is name, number of register arguments, number of other arguments.
114 const BYTE_CODE_NAMES = const [ 119 const BYTE_CODE_NAMES = const [
115 "GOTO", 0, 1, 120 "GOTO", 0, 1,
116 "PUSH_REGISTER", 1, 0, 121 "PUSH_REGISTER", 1, 0,
117 "PUSH_BACKTRACK", 0, 1, 122 "PUSH_BACKTRACK", 0, 1,
118 "POP_REGISTER", 1, 0, 123 "POP_REGISTER", 1, 0,
119 "BACKTRACK_EQ", 2, 0, 124 "BACKTRACK_EQ", 2, 0,
120 "BACKTRACK_NE", 2, 0, 125 "BACKTRACK_NE", 2, 0,
121 "BACKTRACK_GT", 2, 0, 126 "BACKTRACK_GT", 2, 0,
122 "BACKTRACK_IF_NO_MATCH", 0, 1, 127 "BACKTRACK_IF_NO_MATCH", 0, 1,
128 "BACKTRACK_IF_PREVIOUS_NO_MATCH", 0, 1,
123 "BACKTRACK_IF_IN_RANGE", 0, 2, 129 "BACKTRACK_IF_IN_RANGE", 0, 2,
130 "BACKTRACK_IF_PREVIOUS_IN_RANGE", 0, 2,
124 "GOTO_IF_MATCH", 0, 2, 131 "GOTO_IF_MATCH", 0, 2,
132 "GOTO_IF_PREVIOUS_MATCH", 0, 2,
125 "GOTO_IF_IN_RANGE", 0, 3, 133 "GOTO_IF_IN_RANGE", 0, 3,
134 "GOTO_IF_PREVIOUS_IN_RANGE", 0, 3,
126 "GOTO_EQ", 2, 1, 135 "GOTO_EQ", 2, 1,
127 "GOTO_GE", 2, 1, 136 "GOTO_GE", 2, 1,
128 "GOTO_IF_WORD_CHARACTER", 0, 2, 137 "GOTO_IF_WORD_CHARACTER", 0, 2,
129 "ADD_TO_REGISTER", 1, 1, 138 "ADD_TO_REGISTER", 1, 1,
130 "COPY_REGISTER", 2, 0, 139 "COPY_REGISTER", 2, 0,
131 "BACKTRACK_ON_BACK_REFERENCE", 1, 1, 140 "BACKTRACK_ON_BACK_REFERENCE", 1, 1,
141 "BACKTRACK_ON_BACK_REFERENCE_BACKWARDS", 1, 1,
132 "BACKTRACK", 0, 0, 142 "BACKTRACK", 0, 0,
133 "SUCCEED", 0, 0, 143 "SUCCEED", 0, 0,
134 "FAIL", 0, 0]; 144 "FAIL", 0, 0];
135 145
136 const CHAR_CODE_NUL = 0; 146 const CHAR_CODE_NUL = 0;
137 const CHAR_CODE_BACKSPACE = 8; 147 const CHAR_CODE_BACKSPACE = 8;
138 const CHAR_CODE_TAB = 9; 148 const CHAR_CODE_TAB = 9;
139 const CHAR_CODE_NEWLINE = 10; 149 const CHAR_CODE_NEWLINE = 10;
140 const CHAR_CODE_VERTICAL_TAB = 11; 150 const CHAR_CODE_VERTICAL_TAB = 11;
141 const CHAR_CODE_FORM_FEED = 12; 151 const CHAR_CODE_FORM_FEED = 12;
142 const CHAR_CODE_CARRIAGE_RETURN = 13; 152 const CHAR_CODE_CARRIAGE_RETURN = 13;
143 const CHAR_CODE_SPACE = 32; 153 const CHAR_CODE_SPACE = 32;
144 const CHAR_CODE_BANG = 33; 154 const CHAR_CODE_BANG = 33;
145 const CHAR_CODE_ASTERISK = 42; 155 const CHAR_CODE_ASTERISK = 42;
146 const CHAR_CODE_PLUS = 43; 156 const CHAR_CODE_PLUS = 43;
147 const CHAR_CODE_COMMA = 44; 157 const CHAR_CODE_COMMA = 44;
148 const CHAR_CODE_DASH = 45; 158 const CHAR_CODE_DASH = 45;
149 const CHAR_CODE_0 = 48; 159 const CHAR_CODE_0 = 48;
150 const CHAR_CODE_7 = 55; 160 const CHAR_CODE_7 = 55;
151 const CHAR_CODE_9 = 57; 161 const CHAR_CODE_9 = 57;
152 const CHAR_CODE_COLON = 58; 162 const CHAR_CODE_COLON = 58;
163 const CHAR_CODE_LESS_THAN = 60;
153 const CHAR_CODE_EQUALS = 61; 164 const CHAR_CODE_EQUALS = 61;
154 const CHAR_CODE_QUERY = 63; 165 const CHAR_CODE_QUERY = 63;
155 const CHAR_CODE_UPPER_A = 65; 166 const CHAR_CODE_UPPER_A = 65;
156 const CHAR_CODE_UPPER_B = 66; 167 const CHAR_CODE_UPPER_B = 66;
157 const CHAR_CODE_UPPER_D = 68; 168 const CHAR_CODE_UPPER_D = 68;
158 const CHAR_CODE_UPPER_F = 70; 169 const CHAR_CODE_UPPER_F = 70;
159 const CHAR_CODE_UPPER_S = 83; 170 const CHAR_CODE_UPPER_S = 83;
160 const CHAR_CODE_UPPER_W = 87; 171 const CHAR_CODE_UPPER_W = 87;
161 const CHAR_CODE_UPPER_Z = 90; 172 const CHAR_CODE_UPPER_Z = 90;
162 const CHAR_CODE_BACKSLASH = 92; 173 const CHAR_CODE_BACKSLASH = 92;
(...skipping 30 matching lines...) Expand all
193 class MiniExpCompiler { 204 class MiniExpCompiler {
194 final String pattern; 205 final String pattern;
195 final bool caseSensitive; 206 final bool caseSensitive;
196 final List<int> registers = new List<int>(); 207 final List<int> registers = new List<int>();
197 int captureRegisterCount = 0; 208 int captureRegisterCount = 0;
198 int firstCaptureRegister; 209 int firstCaptureRegister;
199 final List<int> _codes = new List<int>(); 210 final List<int> _codes = new List<int>();
200 final List<int> _extraConstants = new List<int>(); 211 final List<int> _extraConstants = new List<int>();
201 final List<BackReference> _backReferences = new List<BackReference>(); 212 final List<BackReference> _backReferences = new List<BackReference>();
202 MiniExpLabel _pendingGoto; 213 MiniExpLabel _pendingGoto;
214 bool stepBackwards = false;
215 int get oneIfBackwards => stepBackwards ? 1 : 0;
216 int get direction => stepBackwards ? -1 : 1;
203 217
204 MiniExpCompiler(this.pattern, this.caseSensitive) { 218 MiniExpCompiler(this.pattern, this.caseSensitive) {
205 for (int i = 0; i < FIXED_REGISTERS; i++) { 219 for (int i = 0; i < FIXED_REGISTERS; i++) {
206 registers.add(i == NO_POSITION_REGISTER ? NO_POSITION : 0); 220 registers.add(i == NO_POSITION_REGISTER ? NO_POSITION : 0);
207 } 221 }
208 } 222 }
209 223
210 List<int> get codes { 224 List<int> get codes {
211 flushPendingGoto(); 225 flushPendingGoto();
226 //disassemble(_codes);
212 return _codes; 227 return _codes;
213 } 228 }
214 229
215 String get constantPool { 230 String get constantPool {
216 if (_extraConstants.isEmpty) { 231 if (_extraConstants.isEmpty) {
217 return pattern; 232 return pattern;
218 } else { 233 } else {
219 return pattern + new String.fromCharCodes(_extraConstants); 234 return pattern + new String.fromCharCodes(_extraConstants);
220 } 235 }
221 } 236 }
(...skipping 150 matching lines...) Expand 10 before | Expand all | Expand 10 after
372 void addToRegister(int reg, int offset) { 387 void addToRegister(int reg, int offset) {
373 _emit(ADD_TO_REGISTER, registerNumber(reg), offset); 388 _emit(ADD_TO_REGISTER, registerNumber(reg), offset);
374 } 389 }
375 390
376 void copyRegister(int destRegister, int sourceRegister) { 391 void copyRegister(int destRegister, int sourceRegister) {
377 _emit(COPY_REGISTER, registerNumber(destRegister), 392 _emit(COPY_REGISTER, registerNumber(destRegister),
378 registerNumber(sourceRegister)); 393 registerNumber(sourceRegister));
379 } 394 }
380 395
381 void backtrackOnBackReferenceFail(int register, bool caseSensitive) { 396 void backtrackOnBackReferenceFail(int register, bool caseSensitive) {
382 _emit(BACKTRACK_ON_BACK_REFERENCE, 397 _emit(BACKTRACK_ON_BACK_REFERENCE + oneIfBackwards,
383 registerNumber(register), caseSensitive ? 1 : 0); 398 registerNumber(register), caseSensitive ? 1 : 0);
384 } 399 }
385 400
386 void backtrackIfGreater(int register1, int register2) { 401 void backtrackIfGreater(int register1, int register2) {
387 _emit(BACKTRACK_GT, registerNumber(register1), registerNumber(register2)); 402 _emit(BACKTRACK_GT, registerNumber(register1), registerNumber(register2));
388 } 403 }
389 404
390 void gotoIfGreaterEqual(int register1, int register2, MiniExpLabel label) { 405 void gotoIfGreaterEqual(int register1, int register2, MiniExpLabel label) {
391 _emit(GOTO_GE, registerNumber(register1), registerNumber(register2)); 406 _emit(GOTO_GE, registerNumber(register1), registerNumber(register2));
392 link(label); 407 link(label);
393 } 408 }
394 409
395 void backtrackIfNoMatch(int constant_pool_offset) { 410 void backtrackIfNoMatch(int constant_pool_offset) {
396 _emit(BACKTRACK_IF_NO_MATCH, constant_pool_offset); 411 _emit(BACKTRACK_IF_NO_MATCH + oneIfBackwards, constant_pool_offset);
397 } 412 }
398 413
399 void backtrackIfInRange(int from, int to) { 414 void backtrackIfInRange(int from, int to) {
400 _emit(BACKTRACK_IF_IN_RANGE, from, to); 415 _emit(BACKTRACK_IF_IN_RANGE + oneIfBackwards, from, to);
401 } 416 }
402 417
403 void gotoIfMatches(int charCode, MiniExpLabel label) { 418 void gotoIfMatches(int charCode, MiniExpLabel label) {
404 _emit(GOTO_IF_MATCH, charCode); 419 _emit(GOTO_IF_MATCH + oneIfBackwards, charCode);
405 link(label); 420 link(label);
406 } 421 }
407 422
408 void gotoIfInRange(int from, int to, MiniExpLabel label) { 423 void gotoIfInRange(int from, int to, MiniExpLabel label) {
409 if (from == to) { 424 if (from == to) {
410 gotoIfMatches(from, label); 425 gotoIfMatches(from, label);
411 } else { 426 } else {
412 _emit(GOTO_IF_IN_RANGE, from, to); 427 _emit(GOTO_IF_IN_RANGE + oneIfBackwards, from, to);
413 link(label); 428 link(label);
414 } 429 }
415 } 430 }
416 431
417 void backtrackIfNotAtWordBoundary() { 432 void backtrackIfNotAtWordBoundary() {
418 MiniExpLabel non_word_on_left = new MiniExpLabel(); 433 MiniExpLabel non_word_on_left = new MiniExpLabel();
419 MiniExpLabel word_on_left = new MiniExpLabel(); 434 MiniExpLabel word_on_left = new MiniExpLabel();
420 MiniExpLabel at_word_boundary = new MiniExpLabel(); 435 MiniExpLabel at_word_boundary = new MiniExpLabel();
421 MiniExpLabel do_backtrack = new MiniExpLabel(); 436 MiniExpLabel do_backtrack = new MiniExpLabel();
422 437
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after
503 fixedLength = 0, 518 fixedLength = 0,
504 anchored = false, 519 anchored = false,
505 registersToSave = null; 520 registersToSave = null;
506 521
507 const MiniExpAnalysis.atStart() 522 const MiniExpAnalysis.atStart()
508 : canMatchEmpty = true, 523 : canMatchEmpty = true,
509 fixedLength = 0, 524 fixedLength = 0,
510 anchored = true, 525 anchored = true,
511 registersToSave = null; 526 registersToSave = null;
512 527
513 MiniExpAnalysis.lookahead(MiniExpAnalysis bodyAnalysis, bool positive) 528 MiniExpAnalysis.lookahead(
529 MiniExpAnalysis bodyAnalysis, bool positive, bool behind)
514 : canMatchEmpty = true, 530 : canMatchEmpty = true,
515 fixedLength = 0, 531 fixedLength = 0,
516 anchored = positive && bodyAnalysis.anchored, 532 anchored = positive && !behind && bodyAnalysis.anchored,
517 registersToSave = bodyAnalysis.registersToSave; 533 registersToSave = bodyAnalysis.registersToSave;
518 534
519 MiniExpAnalysis.quantifier( 535 MiniExpAnalysis.quantifier(
520 MiniExpAnalysis bodyAnalysis, int min, int max, List<int> regs) 536 MiniExpAnalysis bodyAnalysis, int min, int max, List<int> regs)
521 : canMatchEmpty = min == 0 || bodyAnalysis.canMatchEmpty, 537 : canMatchEmpty = min == 0 || bodyAnalysis.canMatchEmpty,
522 fixedLength = (min == 1 && max == 1) ? bodyAnalysis.fixedLength : null, 538 fixedLength = (min == 1 && max == 1) ? bodyAnalysis.fixedLength : null,
523 anchored = min > 0 && bodyAnalysis.anchored, 539 anchored = min > 0 && bodyAnalysis.anchored,
524 registersToSave = combineRegisters(bodyAnalysis.registersToSave, regs); 540 registersToSave = combineRegisters(bodyAnalysis.registersToSave, regs);
525 541
526 const MiniExpAnalysis.atom() 542 const MiniExpAnalysis.atom()
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after
594 } 610 }
595 } 611 }
596 612
597 class Alternative extends MiniExpAst { 613 class Alternative extends MiniExpAst {
598 final MiniExpAst _left; 614 final MiniExpAst _left;
599 final MiniExpAst _right; 615 final MiniExpAst _right;
600 616
601 Alternative(this._left, this._right); 617 Alternative(this._left, this._right);
602 618
603 void generate(MiniExpCompiler compiler, MiniExpLabel onSuccess) { 619 void generate(MiniExpCompiler compiler, MiniExpLabel onSuccess) {
604 compiler.generate(_left, _right.label); 620 MiniExpAst first = compiler.stepBackwards ? _right : _left;
605 compiler.generate(_right, onSuccess); 621 MiniExpAst second = compiler.stepBackwards ? _left : _right;
622
623 compiler.generate(first, second.label);
624 compiler.generate(second, onSuccess);
606 } 625 }
607 626
608 MiniExpAnalysis analyze(MiniExpCompiler compiler) { 627 MiniExpAnalysis analyze(MiniExpCompiler compiler) {
609 return new MiniExpAnalysis.and( 628 return new MiniExpAnalysis.and(
610 _left.analyze(compiler), _right.analyze(compiler)); 629 _left.analyze(compiler), _right.analyze(compiler));
611 } 630 }
612 } 631 }
613 632
614 abstract class Assertion extends MiniExpAst { 633 abstract class Assertion extends MiniExpAst {
615 MiniExpAnalysis analyze(MiniExpCompiler) => const MiniExpAnalysis.empty(); 634 MiniExpAnalysis analyze(MiniExpCompiler) => const MiniExpAnalysis.empty();
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after
683 compiler.pop(CURRENT_POSITION); 702 compiler.pop(CURRENT_POSITION);
684 compiler.pop(CURRENT_POSITION); 703 compiler.pop(CURRENT_POSITION);
685 // This overwrites the current position with the correct value. 704 // This overwrites the current position with the correct value.
686 compiler.backtrack(); 705 compiler.backtrack();
687 } 706 }
688 } 707 }
689 } 708 }
690 709
691 class LookAhead extends Assertion { 710 class LookAhead extends Assertion {
692 final bool _positive; 711 final bool _positive;
712 final bool _behind;
693 final MiniExpAst _body; 713 final MiniExpAst _body;
694 List<int> _subtreeRegisters; 714 List<int> _subtreeRegisters;
695 715
696 int _savedStackPointerRegister; 716 int _savedStackPointerRegister;
697 int _savedPosition; 717 int _savedPosition;
698 718
699 LookAhead(this._positive, this._body, MiniExpCompiler compiler) { 719 LookAhead(
720 this._positive, this._behind, this._body, MiniExpCompiler compiler) {
700 _savedStackPointerRegister = compiler.allocateWorkingRegister(); 721 _savedStackPointerRegister = compiler.allocateWorkingRegister();
701 _savedPosition = compiler.allocateWorkingRegister(); 722 _savedPosition = compiler.allocateWorkingRegister();
702 } 723 }
703 724
704 void generate(MiniExpCompiler compiler, MiniExpLabel onSuccess) { 725 void generate(MiniExpCompiler compiler, MiniExpLabel onSuccess) {
705 // Lookahead. Even if the subexpression succeeds, the current position is 726 // Lookahead. Even if the subexpression succeeds, the current position is
706 // reset, and the backtracking stack is unwound so that we can never 727 // reset, and the backtracking stack is unwound so that we can never
707 // backtrack into the lookahead. On a failure of the subexpression, the 728 // backtrack into the lookahead. On a failure of the subexpression, the
708 // stack will be naturally unwound. 729 // stack will be naturally unwound.
709 MiniExpLabel body_succeeded = new MiniExpLabel(); 730 MiniExpLabel body_succeeded = new MiniExpLabel();
710 MiniExpLabel succeed_on_failure = new MiniExpLabel(); 731 MiniExpLabel succeed_on_failure = new MiniExpLabel();
711 MiniExpLabel undoCaptures; 732 MiniExpLabel undoCaptures;
712 compiler.copyRegister(_savedStackPointerRegister, STACK_POINTER); 733 compiler.copyRegister(_savedStackPointerRegister, STACK_POINTER);
713 compiler.copyRegister(_savedPosition, CURRENT_POSITION); 734 compiler.copyRegister(_savedPosition, CURRENT_POSITION);
714 if (!_positive) { 735 if (!_positive) {
715 compiler.pushBacktrack(succeed_on_failure); 736 compiler.pushBacktrack(succeed_on_failure);
716 } 737 }
738 bool stepBackwards = compiler.stepBackwards;
739 compiler.stepBackwards = _behind;
717 compiler.generate(_body, body_succeeded); 740 compiler.generate(_body, body_succeeded);
741 compiler.stepBackwards = stepBackwards;
718 742
719 compiler.bind(body_succeeded); 743 compiler.bind(body_succeeded);
720 compiler.copyRegister(STACK_POINTER, _savedStackPointerRegister); 744 compiler.copyRegister(STACK_POINTER, _savedStackPointerRegister);
721 compiler.copyRegister(CURRENT_POSITION, _savedPosition); 745 compiler.copyRegister(CURRENT_POSITION, _savedPosition);
722 if (!_positive) { 746 if (!_positive) {
723 // For negative lookahead always zap the captures when the body succeeds 747 // For negative lookahead always zap the captures when the body succeeds
724 // and the lookahead thus fails. The captures are only needed for any 748 // and the lookahead thus fails. The captures are only needed for any
725 // backrefs inside the negative lookahead. 749 // backrefs inside the negative lookahead.
726 if (_subtreeRegisters != null) { 750 if (_subtreeRegisters != null) {
727 for (int register in _subtreeRegisters) { 751 for (int register in _subtreeRegisters) {
(...skipping 20 matching lines...) Expand all
748 for (int register in _subtreeRegisters) { 772 for (int register in _subtreeRegisters) {
749 compiler.copyRegister(register, NO_POSITION_REGISTER); 773 compiler.copyRegister(register, NO_POSITION_REGISTER);
750 } 774 }
751 compiler.backtrack(); 775 compiler.backtrack();
752 } 776 }
753 } 777 }
754 778
755 MiniExpAnalysis analyze(compiler) { 779 MiniExpAnalysis analyze(compiler) {
756 MiniExpAnalysis bodyAnalysis = _body.analyze(compiler); 780 MiniExpAnalysis bodyAnalysis = _body.analyze(compiler);
757 _subtreeRegisters = bodyAnalysis.registersToSave; 781 _subtreeRegisters = bodyAnalysis.registersToSave;
758 return new MiniExpAnalysis.lookahead(bodyAnalysis, _positive); 782 return new MiniExpAnalysis.lookahead(bodyAnalysis, _positive, _behind);
759 } 783 }
760 } 784 }
761 785
762 class Quantifier extends MiniExpAst { 786 class Quantifier extends MiniExpAst {
763 final int _min; 787 final int _min;
764 final int _max; 788 final int _max;
765 final bool _greedy; 789 final bool _greedy;
766 final MiniExpAst _body; 790 final MiniExpAst _body;
767 int _counterRegister; 791 int _counterRegister;
768 int _startOfMatchRegister; // Implements 21.2.2.5.1 note 4. 792 int _startOfMatchRegister; // Implements 21.2.2.5.1 note 4.
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after
841 void generateFixedLengthGreedy( 865 void generateFixedLengthGreedy(
842 MiniExpCompiler compiler, MiniExpLabel onSuccess) { 866 MiniExpCompiler compiler, MiniExpLabel onSuccess) {
843 MiniExpLabel newIteration = new MiniExpLabel(); 867 MiniExpLabel newIteration = new MiniExpLabel();
844 MiniExpLabel cantAdvanceMore = new MiniExpLabel(); 868 MiniExpLabel cantAdvanceMore = new MiniExpLabel();
845 MiniExpLabel continuationFailed = new MiniExpLabel(); 869 MiniExpLabel continuationFailed = new MiniExpLabel();
846 870
847 // Save the current position, so we know when the quantifier has been 871 // Save the current position, so we know when the quantifier has been
848 // unwound enough. 872 // unwound enough.
849 compiler.copyRegister(_optimizedGreedyRegister, CURRENT_POSITION); 873 compiler.copyRegister(_optimizedGreedyRegister, CURRENT_POSITION);
850 if (_min != 0) { 874 if (_min != 0) {
851 compiler.addToRegister(_optimizedGreedyRegister, _min * _bodyLength); 875 int offset = compiler.direction * _bodyLength;
876 compiler.addToRegister(_optimizedGreedyRegister, _min * offset);
852 } 877 }
853 878
854 // This backtrack doesn't trigger until the quantifier has eaten as much as 879 // This backtrack doesn't trigger until the quantifier has eaten as much as
855 // possible. Unfortunately, whenever we backtrack in this simple system, 880 // possible. Unfortunately, whenever we backtrack in this simple system,
856 // the old current position in the subject string is also rewound. That's 881 // the old current position in the subject string is also rewound. That's
857 // not so convenient here, so we will have to save the current position in 882 // not so convenient here, so we will have to save the current position in
858 // a special register. It would be faster to generate the body in a 883 // a special register. It would be faster to generate the body in a
859 // special mode where we use a different backtrack instruction that just 884 // special mode where we use a different backtrack instruction that just
860 // rewinds the current position a certain number of characters instead of 885 // rewinds the current position a certain number of characters instead of
861 // popping it. 886 // popping it.
862 compiler.pushBacktrack(cantAdvanceMore); 887 compiler.pushBacktrack(cantAdvanceMore);
863 888
864 // The loop. 889 // The loop.
865 compiler.bind(newIteration); 890 compiler.bind(newIteration);
866 compiler.copyRegister(_savePositionRegister, CURRENT_POSITION); 891 compiler.copyRegister(_savePositionRegister, CURRENT_POSITION);
867 compiler.generate(_body, newIteration); 892 compiler.generate(_body, newIteration);
868 893
869 // The greedy quantifier has eaten as much as it can. Time to try the 894 // The greedy quantifier has eaten as much as it can. Time to try the
870 // continuation of the regexp after the quantifier. 895 // continuation of the regexp after the quantifier.
871 compiler.bind(cantAdvanceMore); 896 compiler.bind(cantAdvanceMore);
872 897
873 if (_min != 0) { 898 if (_min != 0) {
874 compiler.backtrackIfGreater( 899 if (compiler.stepBackwards) {
875 _optimizedGreedyRegister, _savePositionRegister); 900 compiler.backtrackIfGreater(
901 _savePositionRegister, _optimizedGreedyRegister);
902 } else {
903 compiler.backtrackIfGreater(
904 _optimizedGreedyRegister, _savePositionRegister);
905 }
876 } 906 }
877 907
878 compiler.addToRegister(_savePositionRegister, _bodyLength); 908 compiler.addToRegister(
909 _savePositionRegister, compiler.direction * _bodyLength);
879 compiler.copyRegister(CURRENT_POSITION, _savePositionRegister); 910 compiler.copyRegister(CURRENT_POSITION, _savePositionRegister);
880 911
881 // The continuation of the regexp failed. We backtrack the greedy 912 // The continuation of the regexp failed. We backtrack the greedy
882 // quantifier by one step and retry. 913 // quantifier by one step and retry.
883 compiler.bind(continuationFailed); 914 compiler.bind(continuationFailed);
884 compiler.addToRegister(CURRENT_POSITION, -_bodyLength); 915 compiler.addToRegister(
916 CURRENT_POSITION, compiler.direction * -_bodyLength);
885 // If we got back to where the quantifier started matching, then jump 917 // If we got back to where the quantifier started matching, then jump
886 // to the continuation (we haven't pushed a backtrack, so if that fails, it 918 // to the continuation (we haven't pushed a backtrack, so if that fails, it
887 // will backtrack further). 919 // will backtrack further).
888 // We don't have gotoIfEqual, so use gotoIfGreaterEqual. 920 // We don't have gotoIfEqual, so use gotoIfGreaterEqual.
889 compiler.gotoIfGreaterEqual( 921 if (compiler.stepBackwards) {
890 _optimizedGreedyRegister, CURRENT_POSITION, onSuccess); 922 compiler.gotoIfGreaterEqual(
923 CURRENT_POSITION, _optimizedGreedyRegister, onSuccess);
924 } else {
925 compiler.gotoIfGreaterEqual(
926 _optimizedGreedyRegister, CURRENT_POSITION, onSuccess);
927 }
891 compiler.pushBacktrack(continuationFailed); 928 compiler.pushBacktrack(continuationFailed);
892 compiler.goto(onSuccess); 929 compiler.goto(onSuccess);
893 } 930 }
894 931
895 void generate(MiniExpCompiler compiler, MiniExpLabel onSuccess) { 932 void generate(MiniExpCompiler compiler, MiniExpLabel onSuccess) {
896 // We optimize loops of the form .* to avoid big backtrack stacks. 933 // We optimize loops of the form .* to avoid big backtrack stacks.
897 if (_isOptimizedGreedy) { 934 if (_isOptimizedGreedy) {
898 generateFixedLengthGreedy(compiler, onSuccess); 935 generateFixedLengthGreedy(compiler, onSuccess);
899 return; 936 return;
900 } 937 }
(...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after
995 _subtreeRegistersThatNeedSaving.isNotEmpty; 1032 _subtreeRegistersThatNeedSaving.isNotEmpty;
996 } 1033 }
997 } 1034 }
998 1035
999 class Atom extends MiniExpAst { 1036 class Atom extends MiniExpAst {
1000 final int _constantIndex; 1037 final int _constantIndex;
1001 1038
1002 Atom(this._constantIndex); 1039 Atom(this._constantIndex);
1003 1040
1004 void generate(MiniExpCompiler compiler, MiniExpLabel onSuccess) { 1041 void generate(MiniExpCompiler compiler, MiniExpLabel onSuccess) {
1005 compiler.backtrackIfEqual(CURRENT_POSITION, STRING_LENGTH); 1042 if (compiler.stepBackwards) {
1043 compiler.backtrackIfEqual(CURRENT_POSITION, 0);
1044 } else {
1045 compiler.backtrackIfEqual(CURRENT_POSITION, STRING_LENGTH);
1046 }
1006 MiniExpLabel match; 1047 MiniExpLabel match;
1007 int charCode = compiler.constantPoolEntry(_constantIndex); 1048 int charCode = compiler.constantPoolEntry(_constantIndex);
1008 if (!compiler.caseSensitive) { 1049 if (!compiler.caseSensitive) {
1009 List<int> equivalents = internalRegExpEquivalenceClass(charCode); 1050 List<int> equivalents = internalRegExpEquivalenceClass(charCode);
1010 if (equivalents != null && equivalents.length > 1) { 1051 if (equivalents != null && equivalents.length > 1) {
1011 match = new MiniExpLabel(); 1052 match = new MiniExpLabel();
1012 for (int equivalent in equivalents) { 1053 for (int equivalent in equivalents) {
1013 if (equivalent == charCode) continue; 1054 if (equivalent == charCode) continue;
1014 compiler.gotoIfMatches(equivalent, match); 1055 compiler.gotoIfMatches(equivalent, match);
1015 } 1056 }
1016 } 1057 }
1017 } 1058 }
1018 compiler.backtrackIfNoMatch(_constantIndex); 1059 compiler.backtrackIfNoMatch(_constantIndex);
1019 if (match != null) compiler.bind(match); 1060 if (match != null) compiler.bind(match);
1020 compiler.addToRegister(CURRENT_POSITION, 1); 1061 compiler.addToRegister(CURRENT_POSITION, compiler.direction);
1021 compiler.goto(onSuccess); 1062 compiler.goto(onSuccess);
1022 } 1063 }
1023 1064
1024 MiniExpAnalysis analyze(compiler) => const MiniExpAnalysis.atom(); 1065 MiniExpAnalysis analyze(compiler) => const MiniExpAnalysis.atom();
1025 } 1066 }
1026 1067
1027 class CharClass extends MiniExpAst { 1068 class CharClass extends MiniExpAst {
1028 final List<int> _ranges = new List<int>(); 1069 final List<int> _ranges = new List<int>();
1029 final bool _positive; 1070 final bool _positive;
1030 1071
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after
1112 } 1153 }
1113 } 1154 }
1114 ranges.add(start); 1155 ranges.add(start);
1115 ranges.add(end); 1156 ranges.add(end);
1116 } 1157 }
1117 // TODO(erikcorry): Sort and merge ranges. 1158 // TODO(erikcorry): Sort and merge ranges.
1118 return ranges; 1159 return ranges;
1119 } 1160 }
1120 1161
1121 void generate(MiniExpCompiler compiler, MiniExpLabel onSuccess) { 1162 void generate(MiniExpCompiler compiler, MiniExpLabel onSuccess) {
1122 compiler.backtrackIfEqual(CURRENT_POSITION, STRING_LENGTH); 1163 if (compiler.stepBackwards) {
1164 compiler.backtrackIfEqual(CURRENT_POSITION, 0);
1165 } else {
1166 compiler.backtrackIfEqual(CURRENT_POSITION, STRING_LENGTH);
1167 }
1123 List<int> ranges = _ranges; 1168 List<int> ranges = _ranges;
1124 if (!compiler.caseSensitive) { 1169 if (!compiler.caseSensitive) {
1125 ranges = caseInsensitiveRanges(_ranges); 1170 ranges = caseInsensitiveRanges(_ranges);
1126 } 1171 }
1127 MiniExpLabel match = new MiniExpLabel(); 1172 MiniExpLabel match = new MiniExpLabel();
1128 if (_positive) { 1173 if (_positive) {
1129 for (int i = 0; i < ranges.length; i += 2) { 1174 for (int i = 0; i < ranges.length; i += 2) {
1130 compiler.gotoIfInRange(ranges[i], ranges[i + 1], match); 1175 compiler.gotoIfInRange(ranges[i], ranges[i + 1], match);
1131 } 1176 }
1132 compiler.backtrack(); 1177 compiler.backtrack();
1133 compiler.bind(match); 1178 compiler.bind(match);
1134 } else { 1179 } else {
1135 for (int i = 0; i < ranges.length; i += 2) { 1180 for (int i = 0; i < ranges.length; i += 2) {
1136 compiler.backtrackIfInRange(ranges[i], ranges[i + 1]); 1181 compiler.backtrackIfInRange(ranges[i], ranges[i + 1]);
1137 } 1182 }
1138 } 1183 }
1139 compiler.addToRegister(CURRENT_POSITION, 1); 1184 compiler.addToRegister(CURRENT_POSITION, compiler.direction);
1140 compiler.goto(onSuccess); 1185 compiler.goto(onSuccess);
1141 } 1186 }
1142 1187
1143 MiniExpAnalysis analyze(compiler) => const MiniExpAnalysis.atom(); 1188 MiniExpAnalysis analyze(compiler) => const MiniExpAnalysis.atom();
1144 } 1189 }
1145 1190
1146 // This class is used for all backslashes followed by numbers. For web 1191 // This class is used for all backslashes followed by numbers. For web
1147 // compatibility, if the number (interpreted as decimal) is smaller than the 1192 // compatibility, if the number (interpreted as decimal) is smaller than the
1148 // number of captures, then it will be interpreted as a back reference. 1193 // number of captures, then it will be interpreted as a back reference.
1149 // Otherwise the number will be interpreted as an octal character code escape. 1194 // Otherwise the number will be interpreted as an octal character code escape.
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
1191 if (_startRegister == null) { 1236 if (_startRegister == null) {
1192 _startRegister = compiler.allocateCaptureRegisters(); 1237 _startRegister = compiler.allocateCaptureRegisters();
1193 _endRegister = _startRegister - 1; 1238 _endRegister = _startRegister - 1;
1194 } 1239 }
1195 } 1240 }
1196 1241
1197 void generate(MiniExpCompiler compiler, MiniExpLabel onSuccess) { 1242 void generate(MiniExpCompiler compiler, MiniExpLabel onSuccess) {
1198 MiniExpLabel undoStart = new MiniExpLabel(); 1243 MiniExpLabel undoStart = new MiniExpLabel();
1199 MiniExpLabel writeEnd = new MiniExpLabel(); 1244 MiniExpLabel writeEnd = new MiniExpLabel();
1200 MiniExpLabel undoEnd = new MiniExpLabel(); 1245 MiniExpLabel undoEnd = new MiniExpLabel();
1201 compiler.copyRegister(_startRegister, CURRENT_POSITION); 1246 int firstRegister = compiler.stepBackwards ? _endRegister : _startRegister;
1247 int lastRegister = compiler.stepBackwards ? _startRegister : _endRegister;
1248 compiler.copyRegister(firstRegister, CURRENT_POSITION);
1202 compiler.pushBacktrack(undoStart); 1249 compiler.pushBacktrack(undoStart);
1203 1250
1204 compiler.generate(_body, writeEnd); 1251 compiler.generate(_body, writeEnd);
1205 1252
1206 compiler.bind(writeEnd); 1253 compiler.bind(writeEnd);
1207 compiler.copyRegister(_endRegister, CURRENT_POSITION); 1254 compiler.copyRegister(lastRegister, CURRENT_POSITION);
1208 compiler.pushBacktrack(undoEnd); 1255 compiler.pushBacktrack(undoEnd);
1209 compiler.goto(onSuccess); 1256 compiler.goto(onSuccess);
1210 1257
1211 compiler.bind(undoStart); 1258 compiler.bind(undoStart);
1212 compiler.copyRegister(_startRegister, NO_POSITION_REGISTER); 1259 compiler.copyRegister(firstRegister, NO_POSITION_REGISTER);
1213 compiler.backtrack(); 1260 compiler.backtrack();
1214 1261
1215 compiler.bind(undoEnd); 1262 compiler.bind(undoEnd);
1216 compiler.copyRegister(_endRegister, NO_POSITION_REGISTER); 1263 compiler.copyRegister(lastRegister, NO_POSITION_REGISTER);
1217 compiler.backtrack(); 1264 compiler.backtrack();
1218 } 1265 }
1219 1266
1220 MiniExpAnalysis analyze(MiniExpCompiler compiler) { 1267 MiniExpAnalysis analyze(MiniExpCompiler compiler) {
1221 allocateRegisters(compiler); 1268 allocateRegisters(compiler);
1222 MiniExpAnalysis bodyAnalysis = _body.analyze(compiler); 1269 MiniExpAnalysis bodyAnalysis = _body.analyze(compiler);
1223 return new MiniExpAnalysis.capture( 1270 return new MiniExpAnalysis.capture(
1224 bodyAnalysis, _startRegister, _endRegister); 1271 bodyAnalysis, _startRegister, _endRegister);
1225 } 1272 }
1226 } 1273 }
(...skipping 189 matching lines...) Expand 10 before | Expand all | Expand 10 after
1416 notWordBoundary, 1463 notWordBoundary,
1417 wordCharacter, 1464 wordCharacter,
1418 notWordCharacter, 1465 notWordCharacter,
1419 digit, 1466 digit,
1420 notDigit, 1467 notDigit,
1421 whitespace, 1468 whitespace,
1422 notWhitespace, 1469 notWhitespace,
1423 nonCapturing, 1470 nonCapturing,
1424 lookAhead, 1471 lookAhead,
1425 negativeLookAhead, 1472 negativeLookAhead,
1473 lookBehind,
1474 negativeLookBehind,
1426 other 1475 other
1427 } 1476 }
1428 1477
1429 class MiniExpParser { 1478 class MiniExpParser {
1430 // The constant pool is used to look up character data when the regexp is 1479 // The constant pool is used to look up character data when the regexp is
1431 // running. It consists of the regexp source with some characters appended 1480 // running. It consists of the regexp source with some characters appended
1432 // to handle escapes that are not literally present in the regexp input. 1481 // to handle escapes that are not literally present in the regexp input.
1433 final MiniExpCompiler _compiler; 1482 final MiniExpCompiler _compiler;
1434 final String _source; 1483 final String _source;
1435 final bool _isMultiLine; 1484 final bool _isMultiLine;
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after
1493 1542
1494 MiniExpAst tryParseAssertion() { 1543 MiniExpAst tryParseAssertion() {
1495 if (acceptToken(Token.hat)) { 1544 if (acceptToken(Token.hat)) {
1496 return _isMultiLine ? new AtBeginningOfLine() : new AtStart(); 1545 return _isMultiLine ? new AtBeginningOfLine() : new AtStart();
1497 } 1546 }
1498 if (acceptToken(Token.dollar)) { 1547 if (acceptToken(Token.dollar)) {
1499 return _isMultiLine ? new AtEndOfLine() : new AtEnd(); 1548 return _isMultiLine ? new AtEndOfLine() : new AtEnd();
1500 } 1549 }
1501 if (acceptToken(Token.wordBoundary)) return new WordBoundary(true); 1550 if (acceptToken(Token.wordBoundary)) return new WordBoundary(true);
1502 if (acceptToken(Token.notWordBoundary)) return new WordBoundary(false); 1551 if (acceptToken(Token.notWordBoundary)) return new WordBoundary(false);
1503 var lookaheadAst; 1552 var lookAst;
1504 if (acceptToken(Token.lookAhead)) { 1553 if (acceptToken(Token.lookAhead)) {
1505 lookaheadAst = new LookAhead(true, parseDisjunction(), _compiler); 1554 lookAst = new LookAhead(true, false, parseDisjunction(), _compiler);
1506 } else if (acceptToken(Token.negativeLookAhead)) { 1555 } else if (acceptToken(Token.negativeLookAhead)) {
1507 lookaheadAst = new LookAhead(false, parseDisjunction(), _compiler); 1556 lookAst = new LookAhead(false, false, parseDisjunction(), _compiler);
1557 } else if (acceptToken(Token.lookBehind)) {
1558 lookAst = new LookAhead(true, true, parseDisjunction(), _compiler);
1559 } else if (acceptToken(Token.negativeLookBehind)) {
1560 lookAst = new LookAhead(false, true, parseDisjunction(), _compiler);
1508 } 1561 }
1509 if (lookaheadAst != null) { 1562 if (lookAst != null) {
1510 expectToken(Token.rParen); 1563 expectToken(Token.rParen);
1511 // The normal syntax does not allow a quantifier here, but the web 1564 // The normal syntax does not allow a quantifier here, but the web
1512 // compatible one does. Slightly nasty hack for compatibility: 1565 // compatible one does. Slightly nasty hack for compatibility:
1513 if (peekToken(Token.quant)) { 1566 if (peekToken(Token.quant)) {
1514 MiniExpAst quant = new Quantifier( 1567 MiniExpAst quant = new Quantifier(
1515 _minimumRepeats, _maximumRepeats, _lastWasGreedy, 1568 _minimumRepeats, _maximumRepeats, _lastWasGreedy,
1516 lookaheadAst, _compiler); 1569 lookAst, _compiler);
1517 expectToken(Token.quant); 1570 expectToken(Token.quant);
1518 return quant; 1571 return quant;
1519 } 1572 }
1520 return lookaheadAst; 1573 return lookAst;
1521 } 1574 }
1522 return null; 1575 return null;
1523 } 1576 }
1524 1577
1525 MiniExpAst parseTerm() { 1578 MiniExpAst parseTerm() {
1526 MiniExpAst ast = tryParseAssertion(); 1579 MiniExpAst ast = tryParseAssertion();
1527 if (ast == null) { 1580 if (ast == null) {
1528 ast = parseAtom(); 1581 ast = parseAtom();
1529 if (peekToken(Token.quant)) { 1582 if (peekToken(Token.quant)) {
1530 MiniExpAst quant = new Quantifier( 1583 MiniExpAst quant = new Quantifier(
(...skipping 398 matching lines...) Expand 10 before | Expand all | Expand 10 after
1929 if (!_has(_position + 1)) error("unterminated group"); 1982 if (!_has(_position + 1)) error("unterminated group");
1930 if (_at(_position + 1) == CHAR_CODE_QUERY) { 1983 if (_at(_position + 1) == CHAR_CODE_QUERY) {
1931 if (!_has(_position + 2)) error("unterminated group"); 1984 if (!_has(_position + 2)) error("unterminated group");
1932 int parenthesisModifier = _at(_position + 2); 1985 int parenthesisModifier = _at(_position + 2);
1933 if (parenthesisModifier == CHAR_CODE_EQUALS) { 1986 if (parenthesisModifier == CHAR_CODE_EQUALS) {
1934 _lastToken = Token.lookAhead; 1987 _lastToken = Token.lookAhead;
1935 } else if (parenthesisModifier == CHAR_CODE_COLON) { 1988 } else if (parenthesisModifier == CHAR_CODE_COLON) {
1936 _lastToken = Token.nonCapturing; 1989 _lastToken = Token.nonCapturing;
1937 } else if (parenthesisModifier == CHAR_CODE_BANG) { 1990 } else if (parenthesisModifier == CHAR_CODE_BANG) {
1938 _lastToken = Token.negativeLookAhead; 1991 _lastToken = Token.negativeLookAhead;
1992 } else if (parenthesisModifier == CHAR_CODE_LESS_THAN) {
1993 if (!_has(_position + 3)) error("unterminated group");
1994 int lookBehindModifier = _at(_position + 3);
1995 _position++;
1996 if (lookBehindModifier == CHAR_CODE_EQUALS) {
1997 _lastToken = Token.lookBehind;
1998 } else if (lookBehindModifier == CHAR_CODE_BANG) {
1999 _lastToken = Token.negativeLookBehind;
2000 } else {
2001 error("invalid group");
2002 }
1939 } else { 2003 } else {
1940 error("invalid group"); 2004 error("invalid group");
1941 } 2005 }
1942 _position += 2; 2006 _position += 2;
1943 return; 2007 return;
1944 } 2008 }
1945 } 2009 }
1946 2010
1947 void lexQuantifier() { 2011 void lexQuantifier() {
1948 int quantifierCode = _at(_position); 2012 int quantifierCode = _at(_position);
(...skipping 158 matching lines...) Expand 10 before | Expand all | Expand 10 after
2107 break; 2171 break;
2108 case BACKTRACK_GT: 2172 case BACKTRACK_GT:
2109 int reg1 = _registers[_byteCodes[programCounter++]]; 2173 int reg1 = _registers[_byteCodes[programCounter++]];
2110 int reg2 = _registers[_byteCodes[programCounter++]]; 2174 int reg2 = _registers[_byteCodes[programCounter++]];
2111 if (reg1 > reg2) { 2175 if (reg1 > reg2) {
2112 _registers[CURRENT_POSITION] = stack[--stackPointer]; 2176 _registers[CURRENT_POSITION] = stack[--stackPointer];
2113 programCounter = stack[--stackPointer]; 2177 programCounter = stack[--stackPointer];
2114 } 2178 }
2115 break; 2179 break;
2116 case BACKTRACK_IF_NO_MATCH: 2180 case BACKTRACK_IF_NO_MATCH:
2117 if (subject.codeUnitAt(_registers[CURRENT_POSITION]) != 2181 case BACKTRACK_IF_PREVIOUS_NO_MATCH:
2182 int offset = byteCode - BACKTRACK_IF_NO_MATCH;
2183 if (subject.codeUnitAt(_registers[CURRENT_POSITION] - offset) !=
2118 _constantPool.codeUnitAt(_byteCodes[programCounter++])) { 2184 _constantPool.codeUnitAt(_byteCodes[programCounter++])) {
2119 _registers[CURRENT_POSITION] = stack[--stackPointer]; 2185 _registers[CURRENT_POSITION] = stack[--stackPointer];
2120 programCounter = stack[--stackPointer]; 2186 programCounter = stack[--stackPointer];
2121 } 2187 }
2122 break; 2188 break;
2123 case BACKTRACK_IF_IN_RANGE: 2189 case BACKTRACK_IF_IN_RANGE:
2124 int code = subject.codeUnitAt(_registers[CURRENT_POSITION]); 2190 case BACKTRACK_IF_PREVIOUS_IN_RANGE:
2191 int offset = byteCode - BACKTRACK_IF_IN_RANGE;
2192 int code = subject.codeUnitAt(_registers[CURRENT_POSITION] - offset);
2125 int from = _byteCodes[programCounter++]; 2193 int from = _byteCodes[programCounter++];
2126 int to = _byteCodes[programCounter++]; 2194 int to = _byteCodes[programCounter++];
2127 if (from <= code && code <= to) { 2195 if (from <= code && code <= to) {
2128 _registers[CURRENT_POSITION] = stack[--stackPointer]; 2196 _registers[CURRENT_POSITION] = stack[--stackPointer];
2129 programCounter = stack[--stackPointer]; 2197 programCounter = stack[--stackPointer];
2130 } 2198 }
2131 break; 2199 break;
2132 case GOTO_IF_MATCH: 2200 case GOTO_IF_MATCH:
2133 int code = subject.codeUnitAt(_registers[CURRENT_POSITION]); 2201 case GOTO_IF_PREVIOUS_MATCH:
2202 int offset = byteCode - GOTO_IF_MATCH;
2203 int code = subject.codeUnitAt(_registers[CURRENT_POSITION] - offset);
2134 int expected = _byteCodes[programCounter++]; 2204 int expected = _byteCodes[programCounter++];
2135 int dest = _byteCodes[programCounter++]; 2205 int dest = _byteCodes[programCounter++];
2136 if (code == expected) programCounter = dest; 2206 if (code == expected) programCounter = dest;
2137 break; 2207 break;
2138 case GOTO_IF_IN_RANGE: 2208 case GOTO_IF_IN_RANGE:
2139 int code = subject.codeUnitAt(_registers[CURRENT_POSITION]); 2209 case GOTO_IF_PREVIOUS_IN_RANGE:
2210 int offset = byteCode - GOTO_IF_IN_RANGE;
2211 int code = subject.codeUnitAt(_registers[CURRENT_POSITION] - offset);
2140 int from = _byteCodes[programCounter++]; 2212 int from = _byteCodes[programCounter++];
2141 int to = _byteCodes[programCounter++]; 2213 int to = _byteCodes[programCounter++];
2142 int dest = _byteCodes[programCounter++]; 2214 int dest = _byteCodes[programCounter++];
2143 if (from <= code && code <= to) programCounter = dest; 2215 if (from <= code && code <= to) programCounter = dest;
2144 break; 2216 break;
2145 case GOTO_EQ: 2217 case GOTO_EQ:
2146 int reg1 = _registers[_byteCodes[programCounter++]]; 2218 int reg1 = _registers[_byteCodes[programCounter++]];
2147 int reg2 = _registers[_byteCodes[programCounter++]]; 2219 int reg2 = _registers[_byteCodes[programCounter++]];
2148 int dest = _byteCodes[programCounter++]; 2220 int dest = _byteCodes[programCounter++];
2149 if (reg1 == reg2) programCounter = dest; 2221 if (reg1 == reg2) programCounter = dest;
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
2181 case COPY_REGISTER: 2253 case COPY_REGISTER:
2182 // We don't normally keep the stack pointer in sync with its slot in 2254 // We don't normally keep the stack pointer in sync with its slot in
2183 // the _registers, but we have to have it in sync here. 2255 // the _registers, but we have to have it in sync here.
2184 _registers[STACK_POINTER] = stackPointer; 2256 _registers[STACK_POINTER] = stackPointer;
2185 int registerIndex = _byteCodes[programCounter++]; 2257 int registerIndex = _byteCodes[programCounter++];
2186 int value = _registers[_byteCodes[programCounter++]]; 2258 int value = _registers[_byteCodes[programCounter++]];
2187 _registers[registerIndex] = value; 2259 _registers[registerIndex] = value;
2188 stackPointer = _registers[STACK_POINTER]; 2260 stackPointer = _registers[STACK_POINTER];
2189 break; 2261 break;
2190 case BACKTRACK_ON_BACK_REFERENCE: 2262 case BACKTRACK_ON_BACK_REFERENCE:
2263 case BACKTRACK_ON_BACK_REFERENCE_BACKWARDS:
2191 int registerIndex = _byteCodes[programCounter++]; 2264 int registerIndex = _byteCodes[programCounter++];
2192 bool case_sensitive = _byteCodes[programCounter++] != 0; 2265 bool case_sensitive = _byteCodes[programCounter++] != 0;
2193 if (!checkBackReference(subject, case_sensitive, registerIndex)) { 2266 if (!checkBackReference(
2267 subject, case_sensitive, registerIndex,
2268 byteCode == BACKTRACK_ON_BACK_REFERENCE_BACKWARDS)) {
2194 // Backtrack. 2269 // Backtrack.
2195 _registers[CURRENT_POSITION] = stack[--stackPointer]; 2270 _registers[CURRENT_POSITION] = stack[--stackPointer];
2196 programCounter = stack[--stackPointer]; 2271 programCounter = stack[--stackPointer];
2197 } 2272 }
2198 break; 2273 break;
2199 case BACKTRACK: 2274 case BACKTRACK:
2200 _registers[CURRENT_POSITION] = stack[--stackPointer]; 2275 _registers[CURRENT_POSITION] = stack[--stackPointer];
2201 programCounter = stack[--stackPointer]; 2276 programCounter = stack[--stackPointer];
2202 break; 2277 break;
2203 case SUCCEED: 2278 case SUCCEED:
2204 return true; 2279 return true;
2205 case FAIL: 2280 case FAIL:
2206 return false; 2281 return false;
2207 default: 2282 default:
2208 assert(false); 2283 assert(false);
2209 break; 2284 break;
2210 } 2285 }
2211 } 2286 }
2212 } 2287 }
2213 2288
2214 bool checkBackReference( 2289 bool checkBackReference(
2215 String subject, bool caseSensitive, int registerIndex) { 2290 String subject, bool caseSensitive, int registerIndex, bool backwards) {
2216 int start = _registers[registerIndex]; 2291 int start = _registers[registerIndex];
2217 int end = _registers[registerIndex + 1]; 2292 int end = _registers[registerIndex + 1];
2218 if (end == NO_POSITION) return true; 2293 if (end == NO_POSITION) return true;
2219 int length = end - start; 2294 int length = end - start;
2220 int currentPosition = _registers[CURRENT_POSITION]; 2295 int currentPosition = _registers[CURRENT_POSITION];
2221 if (currentPosition + end - start > subject.length) return false; 2296 if (backwards) {
2297 if (currentPosition < length) return false;
2298 currentPosition -= length;
2299 } else {
2300 if (currentPosition + length > subject.length) return false;
2301 }
2222 for (int i = 0; i < length; i++) { 2302 for (int i = 0; i < length; i++) {
2223 int x = subject.codeUnitAt(start + i); 2303 int x = subject.codeUnitAt(start + i);
2224 int y = subject.codeUnitAt(currentPosition + i); 2304 int y = subject.codeUnitAt(currentPosition + i);
2225 if (!caseSensitive) { 2305 if (!caseSensitive) {
2226 x = internalRegExpCanonicalize(x); 2306 x = internalRegExpCanonicalize(x);
2227 y = internalRegExpCanonicalize(y); 2307 y = internalRegExpCanonicalize(y);
2228 } 2308 }
2229 if (x != y) return false; 2309 if (x != y) return false;
2230 } 2310 }
2231 _registers[CURRENT_POSITION] += length; 2311 _registers[CURRENT_POSITION] += backwards ? -length : length;
2232 return true; 2312 return true;
2233 } 2313 }
2234 } 2314 }
OLDNEW
« no previous file with comments | « no previous file | tests/unsorted/lookbehind_test.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698