OLD | NEW |
1 // Copyright (c) 2015, the Fletch project authors. Please see the AUTHORS file | 1 // Copyright (c) 2015, the Fletch project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE.md file. | 3 // BSD-style license that can be found in the LICENSE.md file. |
4 | 4 |
5 part of dart.core_patch; | 5 part of dart.core_patch; |
6 | 6 |
7 class MiniExpLabel { | 7 class MiniExpLabel { |
8 // Initially points to -1 to indicate the label is neither linked (used) nor | 8 // Initially points to -1 to indicate the label is neither linked (used) nor |
9 // bound (fixed to a location). When the label is linked, but not bound, it | 9 // bound (fixed to a location). When the label is linked, but not bound, it |
10 // has a negative value, determined by fixupLocation(l), that indicates the | 10 // has a negative value, determined by fixupLocation(l), that indicates the |
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
90 | 90 |
91 // Byte codes. | 91 // Byte codes. |
92 const GOTO = 0; // label | 92 const GOTO = 0; // label |
93 const PUSH_REGISTER = 1; // reg | 93 const PUSH_REGISTER = 1; // reg |
94 const PUSH_BACKTRACK = 2; // const | 94 const PUSH_BACKTRACK = 2; // const |
95 const POP_REGISTER = 3; // reg | 95 const POP_REGISTER = 3; // reg |
96 const BACKTRACK_EQ = 4; // reg reg | 96 const BACKTRACK_EQ = 4; // reg reg |
97 const BACKTRACK_NE = 5; // reg reg | 97 const BACKTRACK_NE = 5; // reg reg |
98 const BACKTRACK_GT = 6; // reg reg | 98 const BACKTRACK_GT = 6; // reg reg |
99 const BACKTRACK_IF_NO_MATCH = 7; // constant-pool-offset | 99 const BACKTRACK_IF_NO_MATCH = 7; // constant-pool-offset |
100 const BACKTRACK_IF_IN_RANGE = 8; // from to | 100 const BACKTRACK_IF_PREVIOUS_NO_MATCH = 8; // constant-pool-offset |
101 const GOTO_IF_MATCH = 9; // charCode label | 101 const BACKTRACK_IF_IN_RANGE = 9; // from to |
102 const GOTO_IF_IN_RANGE = 10; // from to label | 102 const BACKTRACK_IF_PREVIOUS_IN_RANGE = 10; // from to |
103 const GOTO_EQ = 11; // reg reg label | 103 const GOTO_IF_MATCH = 11; // charCode label |
104 const GOTO_GE = 12; // reg reg label | 104 const GOTO_IF_PREVIOUS_MATCH = 12; // charCode label |
105 const GOTO_IF_WORD_CHARACTER = 13; // position-offset label | 105 const GOTO_IF_IN_RANGE = 13; // from to label |
106 const ADD_TO_REGISTER = 14; // reg const | 106 const GOTO_IF_PREVIOUS_IN_RANGE = 14; // from to label |
107 const COPY_REGISTER = 15; // dest-reg source-reg | 107 const GOTO_EQ = 15; // reg reg label |
108 const BACKTRACK_ON_BACK_REFERENCE = 16; // capture-reg | 108 const GOTO_GE = 16; // reg reg label |
109 const BACKTRACK = 17; | 109 const GOTO_IF_WORD_CHARACTER = 17; // position-offset label |
110 const SUCCEED = 18; | 110 const ADD_TO_REGISTER = 18; // reg const |
111 const FAIL = 19; | 111 const COPY_REGISTER = 19; // dest-reg source-reg |
| 112 const BACKTRACK_ON_BACK_REFERENCE = 20; // capture-reg |
| 113 const BACKTRACK_ON_BACK_REFERENCE_BACKWARDS = 21; // capture-reg |
| 114 const BACKTRACK = 22; |
| 115 const SUCCEED = 23; |
| 116 const FAIL = 24; |
112 | 117 |
113 // Format is name, number of register arguments, number of other arguments. | 118 // Format is name, number of register arguments, number of other arguments. |
114 const BYTE_CODE_NAMES = const [ | 119 const BYTE_CODE_NAMES = const [ |
115 "GOTO", 0, 1, | 120 "GOTO", 0, 1, |
116 "PUSH_REGISTER", 1, 0, | 121 "PUSH_REGISTER", 1, 0, |
117 "PUSH_BACKTRACK", 0, 1, | 122 "PUSH_BACKTRACK", 0, 1, |
118 "POP_REGISTER", 1, 0, | 123 "POP_REGISTER", 1, 0, |
119 "BACKTRACK_EQ", 2, 0, | 124 "BACKTRACK_EQ", 2, 0, |
120 "BACKTRACK_NE", 2, 0, | 125 "BACKTRACK_NE", 2, 0, |
121 "BACKTRACK_GT", 2, 0, | 126 "BACKTRACK_GT", 2, 0, |
122 "BACKTRACK_IF_NO_MATCH", 0, 1, | 127 "BACKTRACK_IF_NO_MATCH", 0, 1, |
| 128 "BACKTRACK_IF_PREVIOUS_NO_MATCH", 0, 1, |
123 "BACKTRACK_IF_IN_RANGE", 0, 2, | 129 "BACKTRACK_IF_IN_RANGE", 0, 2, |
| 130 "BACKTRACK_IF_PREVIOUS_IN_RANGE", 0, 2, |
124 "GOTO_IF_MATCH", 0, 2, | 131 "GOTO_IF_MATCH", 0, 2, |
| 132 "GOTO_IF_PREVIOUS_MATCH", 0, 2, |
125 "GOTO_IF_IN_RANGE", 0, 3, | 133 "GOTO_IF_IN_RANGE", 0, 3, |
| 134 "GOTO_IF_PREVIOUS_IN_RANGE", 0, 3, |
126 "GOTO_EQ", 2, 1, | 135 "GOTO_EQ", 2, 1, |
127 "GOTO_GE", 2, 1, | 136 "GOTO_GE", 2, 1, |
128 "GOTO_IF_WORD_CHARACTER", 0, 2, | 137 "GOTO_IF_WORD_CHARACTER", 0, 2, |
129 "ADD_TO_REGISTER", 1, 1, | 138 "ADD_TO_REGISTER", 1, 1, |
130 "COPY_REGISTER", 2, 0, | 139 "COPY_REGISTER", 2, 0, |
131 "BACKTRACK_ON_BACK_REFERENCE", 1, 1, | 140 "BACKTRACK_ON_BACK_REFERENCE", 1, 1, |
| 141 "BACKTRACK_ON_BACK_REFERENCE_BACKWARDS", 1, 1, |
132 "BACKTRACK", 0, 0, | 142 "BACKTRACK", 0, 0, |
133 "SUCCEED", 0, 0, | 143 "SUCCEED", 0, 0, |
134 "FAIL", 0, 0]; | 144 "FAIL", 0, 0]; |
135 | 145 |
136 const CHAR_CODE_NUL = 0; | 146 const CHAR_CODE_NUL = 0; |
137 const CHAR_CODE_BACKSPACE = 8; | 147 const CHAR_CODE_BACKSPACE = 8; |
138 const CHAR_CODE_TAB = 9; | 148 const CHAR_CODE_TAB = 9; |
139 const CHAR_CODE_NEWLINE = 10; | 149 const CHAR_CODE_NEWLINE = 10; |
140 const CHAR_CODE_VERTICAL_TAB = 11; | 150 const CHAR_CODE_VERTICAL_TAB = 11; |
141 const CHAR_CODE_FORM_FEED = 12; | 151 const CHAR_CODE_FORM_FEED = 12; |
142 const CHAR_CODE_CARRIAGE_RETURN = 13; | 152 const CHAR_CODE_CARRIAGE_RETURN = 13; |
143 const CHAR_CODE_SPACE = 32; | 153 const CHAR_CODE_SPACE = 32; |
144 const CHAR_CODE_BANG = 33; | 154 const CHAR_CODE_BANG = 33; |
145 const CHAR_CODE_ASTERISK = 42; | 155 const CHAR_CODE_ASTERISK = 42; |
146 const CHAR_CODE_PLUS = 43; | 156 const CHAR_CODE_PLUS = 43; |
147 const CHAR_CODE_COMMA = 44; | 157 const CHAR_CODE_COMMA = 44; |
148 const CHAR_CODE_DASH = 45; | 158 const CHAR_CODE_DASH = 45; |
149 const CHAR_CODE_0 = 48; | 159 const CHAR_CODE_0 = 48; |
150 const CHAR_CODE_7 = 55; | 160 const CHAR_CODE_7 = 55; |
151 const CHAR_CODE_9 = 57; | 161 const CHAR_CODE_9 = 57; |
152 const CHAR_CODE_COLON = 58; | 162 const CHAR_CODE_COLON = 58; |
| 163 const CHAR_CODE_LESS_THAN = 60; |
153 const CHAR_CODE_EQUALS = 61; | 164 const CHAR_CODE_EQUALS = 61; |
154 const CHAR_CODE_QUERY = 63; | 165 const CHAR_CODE_QUERY = 63; |
155 const CHAR_CODE_UPPER_A = 65; | 166 const CHAR_CODE_UPPER_A = 65; |
156 const CHAR_CODE_UPPER_B = 66; | 167 const CHAR_CODE_UPPER_B = 66; |
157 const CHAR_CODE_UPPER_D = 68; | 168 const CHAR_CODE_UPPER_D = 68; |
158 const CHAR_CODE_UPPER_F = 70; | 169 const CHAR_CODE_UPPER_F = 70; |
159 const CHAR_CODE_UPPER_S = 83; | 170 const CHAR_CODE_UPPER_S = 83; |
160 const CHAR_CODE_UPPER_W = 87; | 171 const CHAR_CODE_UPPER_W = 87; |
161 const CHAR_CODE_UPPER_Z = 90; | 172 const CHAR_CODE_UPPER_Z = 90; |
162 const CHAR_CODE_BACKSLASH = 92; | 173 const CHAR_CODE_BACKSLASH = 92; |
(...skipping 30 matching lines...) Expand all Loading... |
193 class MiniExpCompiler { | 204 class MiniExpCompiler { |
194 final String pattern; | 205 final String pattern; |
195 final bool caseSensitive; | 206 final bool caseSensitive; |
196 final List<int> registers = new List<int>(); | 207 final List<int> registers = new List<int>(); |
197 int captureRegisterCount = 0; | 208 int captureRegisterCount = 0; |
198 int firstCaptureRegister; | 209 int firstCaptureRegister; |
199 final List<int> _codes = new List<int>(); | 210 final List<int> _codes = new List<int>(); |
200 final List<int> _extraConstants = new List<int>(); | 211 final List<int> _extraConstants = new List<int>(); |
201 final List<BackReference> _backReferences = new List<BackReference>(); | 212 final List<BackReference> _backReferences = new List<BackReference>(); |
202 MiniExpLabel _pendingGoto; | 213 MiniExpLabel _pendingGoto; |
| 214 bool stepBackwards = false; |
| 215 int get oneIfBackwards => stepBackwards ? 1 : 0; |
| 216 int get direction => stepBackwards ? -1 : 1; |
203 | 217 |
204 MiniExpCompiler(this.pattern, this.caseSensitive) { | 218 MiniExpCompiler(this.pattern, this.caseSensitive) { |
205 for (int i = 0; i < FIXED_REGISTERS; i++) { | 219 for (int i = 0; i < FIXED_REGISTERS; i++) { |
206 registers.add(i == NO_POSITION_REGISTER ? NO_POSITION : 0); | 220 registers.add(i == NO_POSITION_REGISTER ? NO_POSITION : 0); |
207 } | 221 } |
208 } | 222 } |
209 | 223 |
210 List<int> get codes { | 224 List<int> get codes { |
211 flushPendingGoto(); | 225 flushPendingGoto(); |
| 226 //disassemble(_codes); |
212 return _codes; | 227 return _codes; |
213 } | 228 } |
214 | 229 |
215 String get constantPool { | 230 String get constantPool { |
216 if (_extraConstants.isEmpty) { | 231 if (_extraConstants.isEmpty) { |
217 return pattern; | 232 return pattern; |
218 } else { | 233 } else { |
219 return pattern + new String.fromCharCodes(_extraConstants); | 234 return pattern + new String.fromCharCodes(_extraConstants); |
220 } | 235 } |
221 } | 236 } |
(...skipping 150 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
372 void addToRegister(int reg, int offset) { | 387 void addToRegister(int reg, int offset) { |
373 _emit(ADD_TO_REGISTER, registerNumber(reg), offset); | 388 _emit(ADD_TO_REGISTER, registerNumber(reg), offset); |
374 } | 389 } |
375 | 390 |
376 void copyRegister(int destRegister, int sourceRegister) { | 391 void copyRegister(int destRegister, int sourceRegister) { |
377 _emit(COPY_REGISTER, registerNumber(destRegister), | 392 _emit(COPY_REGISTER, registerNumber(destRegister), |
378 registerNumber(sourceRegister)); | 393 registerNumber(sourceRegister)); |
379 } | 394 } |
380 | 395 |
381 void backtrackOnBackReferenceFail(int register, bool caseSensitive) { | 396 void backtrackOnBackReferenceFail(int register, bool caseSensitive) { |
382 _emit(BACKTRACK_ON_BACK_REFERENCE, | 397 _emit(BACKTRACK_ON_BACK_REFERENCE + oneIfBackwards, |
383 registerNumber(register), caseSensitive ? 1 : 0); | 398 registerNumber(register), caseSensitive ? 1 : 0); |
384 } | 399 } |
385 | 400 |
386 void backtrackIfGreater(int register1, int register2) { | 401 void backtrackIfGreater(int register1, int register2) { |
387 _emit(BACKTRACK_GT, registerNumber(register1), registerNumber(register2)); | 402 _emit(BACKTRACK_GT, registerNumber(register1), registerNumber(register2)); |
388 } | 403 } |
389 | 404 |
390 void gotoIfGreaterEqual(int register1, int register2, MiniExpLabel label) { | 405 void gotoIfGreaterEqual(int register1, int register2, MiniExpLabel label) { |
391 _emit(GOTO_GE, registerNumber(register1), registerNumber(register2)); | 406 _emit(GOTO_GE, registerNumber(register1), registerNumber(register2)); |
392 link(label); | 407 link(label); |
393 } | 408 } |
394 | 409 |
395 void backtrackIfNoMatch(int constant_pool_offset) { | 410 void backtrackIfNoMatch(int constant_pool_offset) { |
396 _emit(BACKTRACK_IF_NO_MATCH, constant_pool_offset); | 411 _emit(BACKTRACK_IF_NO_MATCH + oneIfBackwards, constant_pool_offset); |
397 } | 412 } |
398 | 413 |
399 void backtrackIfInRange(int from, int to) { | 414 void backtrackIfInRange(int from, int to) { |
400 _emit(BACKTRACK_IF_IN_RANGE, from, to); | 415 _emit(BACKTRACK_IF_IN_RANGE + oneIfBackwards, from, to); |
401 } | 416 } |
402 | 417 |
403 void gotoIfMatches(int charCode, MiniExpLabel label) { | 418 void gotoIfMatches(int charCode, MiniExpLabel label) { |
404 _emit(GOTO_IF_MATCH, charCode); | 419 _emit(GOTO_IF_MATCH + oneIfBackwards, charCode); |
405 link(label); | 420 link(label); |
406 } | 421 } |
407 | 422 |
408 void gotoIfInRange(int from, int to, MiniExpLabel label) { | 423 void gotoIfInRange(int from, int to, MiniExpLabel label) { |
409 if (from == to) { | 424 if (from == to) { |
410 gotoIfMatches(from, label); | 425 gotoIfMatches(from, label); |
411 } else { | 426 } else { |
412 _emit(GOTO_IF_IN_RANGE, from, to); | 427 _emit(GOTO_IF_IN_RANGE + oneIfBackwards, from, to); |
413 link(label); | 428 link(label); |
414 } | 429 } |
415 } | 430 } |
416 | 431 |
417 void backtrackIfNotAtWordBoundary() { | 432 void backtrackIfNotAtWordBoundary() { |
418 MiniExpLabel non_word_on_left = new MiniExpLabel(); | 433 MiniExpLabel non_word_on_left = new MiniExpLabel(); |
419 MiniExpLabel word_on_left = new MiniExpLabel(); | 434 MiniExpLabel word_on_left = new MiniExpLabel(); |
420 MiniExpLabel at_word_boundary = new MiniExpLabel(); | 435 MiniExpLabel at_word_boundary = new MiniExpLabel(); |
421 MiniExpLabel do_backtrack = new MiniExpLabel(); | 436 MiniExpLabel do_backtrack = new MiniExpLabel(); |
422 | 437 |
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
503 fixedLength = 0, | 518 fixedLength = 0, |
504 anchored = false, | 519 anchored = false, |
505 registersToSave = null; | 520 registersToSave = null; |
506 | 521 |
507 const MiniExpAnalysis.atStart() | 522 const MiniExpAnalysis.atStart() |
508 : canMatchEmpty = true, | 523 : canMatchEmpty = true, |
509 fixedLength = 0, | 524 fixedLength = 0, |
510 anchored = true, | 525 anchored = true, |
511 registersToSave = null; | 526 registersToSave = null; |
512 | 527 |
513 MiniExpAnalysis.lookahead(MiniExpAnalysis bodyAnalysis, bool positive) | 528 MiniExpAnalysis.lookahead( |
| 529 MiniExpAnalysis bodyAnalysis, bool positive, bool behind) |
514 : canMatchEmpty = true, | 530 : canMatchEmpty = true, |
515 fixedLength = 0, | 531 fixedLength = 0, |
516 anchored = positive && bodyAnalysis.anchored, | 532 anchored = positive && !behind && bodyAnalysis.anchored, |
517 registersToSave = bodyAnalysis.registersToSave; | 533 registersToSave = bodyAnalysis.registersToSave; |
518 | 534 |
519 MiniExpAnalysis.quantifier( | 535 MiniExpAnalysis.quantifier( |
520 MiniExpAnalysis bodyAnalysis, int min, int max, List<int> regs) | 536 MiniExpAnalysis bodyAnalysis, int min, int max, List<int> regs) |
521 : canMatchEmpty = min == 0 || bodyAnalysis.canMatchEmpty, | 537 : canMatchEmpty = min == 0 || bodyAnalysis.canMatchEmpty, |
522 fixedLength = (min == 1 && max == 1) ? bodyAnalysis.fixedLength : null, | 538 fixedLength = (min == 1 && max == 1) ? bodyAnalysis.fixedLength : null, |
523 anchored = min > 0 && bodyAnalysis.anchored, | 539 anchored = min > 0 && bodyAnalysis.anchored, |
524 registersToSave = combineRegisters(bodyAnalysis.registersToSave, regs); | 540 registersToSave = combineRegisters(bodyAnalysis.registersToSave, regs); |
525 | 541 |
526 const MiniExpAnalysis.atom() | 542 const MiniExpAnalysis.atom() |
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
594 } | 610 } |
595 } | 611 } |
596 | 612 |
597 class Alternative extends MiniExpAst { | 613 class Alternative extends MiniExpAst { |
598 final MiniExpAst _left; | 614 final MiniExpAst _left; |
599 final MiniExpAst _right; | 615 final MiniExpAst _right; |
600 | 616 |
601 Alternative(this._left, this._right); | 617 Alternative(this._left, this._right); |
602 | 618 |
603 void generate(MiniExpCompiler compiler, MiniExpLabel onSuccess) { | 619 void generate(MiniExpCompiler compiler, MiniExpLabel onSuccess) { |
604 compiler.generate(_left, _right.label); | 620 MiniExpAst first = compiler.stepBackwards ? _right : _left; |
605 compiler.generate(_right, onSuccess); | 621 MiniExpAst second = compiler.stepBackwards ? _left : _right; |
| 622 |
| 623 compiler.generate(first, second.label); |
| 624 compiler.generate(second, onSuccess); |
606 } | 625 } |
607 | 626 |
608 MiniExpAnalysis analyze(MiniExpCompiler compiler) { | 627 MiniExpAnalysis analyze(MiniExpCompiler compiler) { |
609 return new MiniExpAnalysis.and( | 628 return new MiniExpAnalysis.and( |
610 _left.analyze(compiler), _right.analyze(compiler)); | 629 _left.analyze(compiler), _right.analyze(compiler)); |
611 } | 630 } |
612 } | 631 } |
613 | 632 |
614 abstract class Assertion extends MiniExpAst { | 633 abstract class Assertion extends MiniExpAst { |
615 MiniExpAnalysis analyze(MiniExpCompiler) => const MiniExpAnalysis.empty(); | 634 MiniExpAnalysis analyze(MiniExpCompiler) => const MiniExpAnalysis.empty(); |
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
683 compiler.pop(CURRENT_POSITION); | 702 compiler.pop(CURRENT_POSITION); |
684 compiler.pop(CURRENT_POSITION); | 703 compiler.pop(CURRENT_POSITION); |
685 // This overwrites the current position with the correct value. | 704 // This overwrites the current position with the correct value. |
686 compiler.backtrack(); | 705 compiler.backtrack(); |
687 } | 706 } |
688 } | 707 } |
689 } | 708 } |
690 | 709 |
691 class LookAhead extends Assertion { | 710 class LookAhead extends Assertion { |
692 final bool _positive; | 711 final bool _positive; |
| 712 final bool _behind; |
693 final MiniExpAst _body; | 713 final MiniExpAst _body; |
694 List<int> _subtreeRegisters; | 714 List<int> _subtreeRegisters; |
695 | 715 |
696 int _savedStackPointerRegister; | 716 int _savedStackPointerRegister; |
697 int _savedPosition; | 717 int _savedPosition; |
698 | 718 |
699 LookAhead(this._positive, this._body, MiniExpCompiler compiler) { | 719 LookAhead( |
| 720 this._positive, this._behind, this._body, MiniExpCompiler compiler) { |
700 _savedStackPointerRegister = compiler.allocateWorkingRegister(); | 721 _savedStackPointerRegister = compiler.allocateWorkingRegister(); |
701 _savedPosition = compiler.allocateWorkingRegister(); | 722 _savedPosition = compiler.allocateWorkingRegister(); |
702 } | 723 } |
703 | 724 |
704 void generate(MiniExpCompiler compiler, MiniExpLabel onSuccess) { | 725 void generate(MiniExpCompiler compiler, MiniExpLabel onSuccess) { |
705 // Lookahead. Even if the subexpression succeeds, the current position is | 726 // Lookahead. Even if the subexpression succeeds, the current position is |
706 // reset, and the backtracking stack is unwound so that we can never | 727 // reset, and the backtracking stack is unwound so that we can never |
707 // backtrack into the lookahead. On a failure of the subexpression, the | 728 // backtrack into the lookahead. On a failure of the subexpression, the |
708 // stack will be naturally unwound. | 729 // stack will be naturally unwound. |
709 MiniExpLabel body_succeeded = new MiniExpLabel(); | 730 MiniExpLabel body_succeeded = new MiniExpLabel(); |
710 MiniExpLabel succeed_on_failure = new MiniExpLabel(); | 731 MiniExpLabel succeed_on_failure = new MiniExpLabel(); |
711 MiniExpLabel undoCaptures; | 732 MiniExpLabel undoCaptures; |
712 compiler.copyRegister(_savedStackPointerRegister, STACK_POINTER); | 733 compiler.copyRegister(_savedStackPointerRegister, STACK_POINTER); |
713 compiler.copyRegister(_savedPosition, CURRENT_POSITION); | 734 compiler.copyRegister(_savedPosition, CURRENT_POSITION); |
714 if (!_positive) { | 735 if (!_positive) { |
715 compiler.pushBacktrack(succeed_on_failure); | 736 compiler.pushBacktrack(succeed_on_failure); |
716 } | 737 } |
| 738 bool stepBackwards = compiler.stepBackwards; |
| 739 compiler.stepBackwards = _behind; |
717 compiler.generate(_body, body_succeeded); | 740 compiler.generate(_body, body_succeeded); |
| 741 compiler.stepBackwards = stepBackwards; |
718 | 742 |
719 compiler.bind(body_succeeded); | 743 compiler.bind(body_succeeded); |
720 compiler.copyRegister(STACK_POINTER, _savedStackPointerRegister); | 744 compiler.copyRegister(STACK_POINTER, _savedStackPointerRegister); |
721 compiler.copyRegister(CURRENT_POSITION, _savedPosition); | 745 compiler.copyRegister(CURRENT_POSITION, _savedPosition); |
722 if (!_positive) { | 746 if (!_positive) { |
723 // For negative lookahead always zap the captures when the body succeeds | 747 // For negative lookahead always zap the captures when the body succeeds |
724 // and the lookahead thus fails. The captures are only needed for any | 748 // and the lookahead thus fails. The captures are only needed for any |
725 // backrefs inside the negative lookahead. | 749 // backrefs inside the negative lookahead. |
726 if (_subtreeRegisters != null) { | 750 if (_subtreeRegisters != null) { |
727 for (int register in _subtreeRegisters) { | 751 for (int register in _subtreeRegisters) { |
(...skipping 20 matching lines...) Expand all Loading... |
748 for (int register in _subtreeRegisters) { | 772 for (int register in _subtreeRegisters) { |
749 compiler.copyRegister(register, NO_POSITION_REGISTER); | 773 compiler.copyRegister(register, NO_POSITION_REGISTER); |
750 } | 774 } |
751 compiler.backtrack(); | 775 compiler.backtrack(); |
752 } | 776 } |
753 } | 777 } |
754 | 778 |
755 MiniExpAnalysis analyze(compiler) { | 779 MiniExpAnalysis analyze(compiler) { |
756 MiniExpAnalysis bodyAnalysis = _body.analyze(compiler); | 780 MiniExpAnalysis bodyAnalysis = _body.analyze(compiler); |
757 _subtreeRegisters = bodyAnalysis.registersToSave; | 781 _subtreeRegisters = bodyAnalysis.registersToSave; |
758 return new MiniExpAnalysis.lookahead(bodyAnalysis, _positive); | 782 return new MiniExpAnalysis.lookahead(bodyAnalysis, _positive, _behind); |
759 } | 783 } |
760 } | 784 } |
761 | 785 |
762 class Quantifier extends MiniExpAst { | 786 class Quantifier extends MiniExpAst { |
763 final int _min; | 787 final int _min; |
764 final int _max; | 788 final int _max; |
765 final bool _greedy; | 789 final bool _greedy; |
766 final MiniExpAst _body; | 790 final MiniExpAst _body; |
767 int _counterRegister; | 791 int _counterRegister; |
768 int _startOfMatchRegister; // Implements 21.2.2.5.1 note 4. | 792 int _startOfMatchRegister; // Implements 21.2.2.5.1 note 4. |
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
841 void generateFixedLengthGreedy( | 865 void generateFixedLengthGreedy( |
842 MiniExpCompiler compiler, MiniExpLabel onSuccess) { | 866 MiniExpCompiler compiler, MiniExpLabel onSuccess) { |
843 MiniExpLabel newIteration = new MiniExpLabel(); | 867 MiniExpLabel newIteration = new MiniExpLabel(); |
844 MiniExpLabel cantAdvanceMore = new MiniExpLabel(); | 868 MiniExpLabel cantAdvanceMore = new MiniExpLabel(); |
845 MiniExpLabel continuationFailed = new MiniExpLabel(); | 869 MiniExpLabel continuationFailed = new MiniExpLabel(); |
846 | 870 |
847 // Save the current position, so we know when the quantifier has been | 871 // Save the current position, so we know when the quantifier has been |
848 // unwound enough. | 872 // unwound enough. |
849 compiler.copyRegister(_optimizedGreedyRegister, CURRENT_POSITION); | 873 compiler.copyRegister(_optimizedGreedyRegister, CURRENT_POSITION); |
850 if (_min != 0) { | 874 if (_min != 0) { |
851 compiler.addToRegister(_optimizedGreedyRegister, _min * _bodyLength); | 875 int offset = compiler.direction * _bodyLength; |
| 876 compiler.addToRegister(_optimizedGreedyRegister, _min * offset); |
852 } | 877 } |
853 | 878 |
854 // This backtrack doesn't trigger until the quantifier has eaten as much as | 879 // This backtrack doesn't trigger until the quantifier has eaten as much as |
855 // possible. Unfortunately, whenever we backtrack in this simple system, | 880 // possible. Unfortunately, whenever we backtrack in this simple system, |
856 // the old current position in the subject string is also rewound. That's | 881 // the old current position in the subject string is also rewound. That's |
857 // not so convenient here, so we will have to save the current position in | 882 // not so convenient here, so we will have to save the current position in |
858 // a special register. It would be faster to generate the body in a | 883 // a special register. It would be faster to generate the body in a |
859 // special mode where we use a different backtrack instruction that just | 884 // special mode where we use a different backtrack instruction that just |
860 // rewinds the current position a certain number of characters instead of | 885 // rewinds the current position a certain number of characters instead of |
861 // popping it. | 886 // popping it. |
862 compiler.pushBacktrack(cantAdvanceMore); | 887 compiler.pushBacktrack(cantAdvanceMore); |
863 | 888 |
864 // The loop. | 889 // The loop. |
865 compiler.bind(newIteration); | 890 compiler.bind(newIteration); |
866 compiler.copyRegister(_savePositionRegister, CURRENT_POSITION); | 891 compiler.copyRegister(_savePositionRegister, CURRENT_POSITION); |
867 compiler.generate(_body, newIteration); | 892 compiler.generate(_body, newIteration); |
868 | 893 |
869 // The greedy quantifier has eaten as much as it can. Time to try the | 894 // The greedy quantifier has eaten as much as it can. Time to try the |
870 // continuation of the regexp after the quantifier. | 895 // continuation of the regexp after the quantifier. |
871 compiler.bind(cantAdvanceMore); | 896 compiler.bind(cantAdvanceMore); |
872 | 897 |
873 if (_min != 0) { | 898 if (_min != 0) { |
874 compiler.backtrackIfGreater( | 899 if (compiler.stepBackwards) { |
875 _optimizedGreedyRegister, _savePositionRegister); | 900 compiler.backtrackIfGreater( |
| 901 _savePositionRegister, _optimizedGreedyRegister); |
| 902 } else { |
| 903 compiler.backtrackIfGreater( |
| 904 _optimizedGreedyRegister, _savePositionRegister); |
| 905 } |
876 } | 906 } |
877 | 907 |
878 compiler.addToRegister(_savePositionRegister, _bodyLength); | 908 compiler.addToRegister( |
| 909 _savePositionRegister, compiler.direction * _bodyLength); |
879 compiler.copyRegister(CURRENT_POSITION, _savePositionRegister); | 910 compiler.copyRegister(CURRENT_POSITION, _savePositionRegister); |
880 | 911 |
881 // The continuation of the regexp failed. We backtrack the greedy | 912 // The continuation of the regexp failed. We backtrack the greedy |
882 // quantifier by one step and retry. | 913 // quantifier by one step and retry. |
883 compiler.bind(continuationFailed); | 914 compiler.bind(continuationFailed); |
884 compiler.addToRegister(CURRENT_POSITION, -_bodyLength); | 915 compiler.addToRegister( |
| 916 CURRENT_POSITION, compiler.direction * -_bodyLength); |
885 // If we got back to where the quantifier started matching, then jump | 917 // If we got back to where the quantifier started matching, then jump |
886 // to the continuation (we haven't pushed a backtrack, so if that fails, it | 918 // to the continuation (we haven't pushed a backtrack, so if that fails, it |
887 // will backtrack further). | 919 // will backtrack further). |
888 // We don't have gotoIfEqual, so use gotoIfGreaterEqual. | 920 // We don't have gotoIfEqual, so use gotoIfGreaterEqual. |
889 compiler.gotoIfGreaterEqual( | 921 if (compiler.stepBackwards) { |
890 _optimizedGreedyRegister, CURRENT_POSITION, onSuccess); | 922 compiler.gotoIfGreaterEqual( |
| 923 CURRENT_POSITION, _optimizedGreedyRegister, onSuccess); |
| 924 } else { |
| 925 compiler.gotoIfGreaterEqual( |
| 926 _optimizedGreedyRegister, CURRENT_POSITION, onSuccess); |
| 927 } |
891 compiler.pushBacktrack(continuationFailed); | 928 compiler.pushBacktrack(continuationFailed); |
892 compiler.goto(onSuccess); | 929 compiler.goto(onSuccess); |
893 } | 930 } |
894 | 931 |
895 void generate(MiniExpCompiler compiler, MiniExpLabel onSuccess) { | 932 void generate(MiniExpCompiler compiler, MiniExpLabel onSuccess) { |
896 // We optimize loops of the form .* to avoid big backtrack stacks. | 933 // We optimize loops of the form .* to avoid big backtrack stacks. |
897 if (_isOptimizedGreedy) { | 934 if (_isOptimizedGreedy) { |
898 generateFixedLengthGreedy(compiler, onSuccess); | 935 generateFixedLengthGreedy(compiler, onSuccess); |
899 return; | 936 return; |
900 } | 937 } |
(...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
995 _subtreeRegistersThatNeedSaving.isNotEmpty; | 1032 _subtreeRegistersThatNeedSaving.isNotEmpty; |
996 } | 1033 } |
997 } | 1034 } |
998 | 1035 |
999 class Atom extends MiniExpAst { | 1036 class Atom extends MiniExpAst { |
1000 final int _constantIndex; | 1037 final int _constantIndex; |
1001 | 1038 |
1002 Atom(this._constantIndex); | 1039 Atom(this._constantIndex); |
1003 | 1040 |
1004 void generate(MiniExpCompiler compiler, MiniExpLabel onSuccess) { | 1041 void generate(MiniExpCompiler compiler, MiniExpLabel onSuccess) { |
1005 compiler.backtrackIfEqual(CURRENT_POSITION, STRING_LENGTH); | 1042 if (compiler.stepBackwards) { |
| 1043 compiler.backtrackIfEqual(CURRENT_POSITION, 0); |
| 1044 } else { |
| 1045 compiler.backtrackIfEqual(CURRENT_POSITION, STRING_LENGTH); |
| 1046 } |
1006 MiniExpLabel match; | 1047 MiniExpLabel match; |
1007 int charCode = compiler.constantPoolEntry(_constantIndex); | 1048 int charCode = compiler.constantPoolEntry(_constantIndex); |
1008 if (!compiler.caseSensitive) { | 1049 if (!compiler.caseSensitive) { |
1009 List<int> equivalents = internalRegExpEquivalenceClass(charCode); | 1050 List<int> equivalents = internalRegExpEquivalenceClass(charCode); |
1010 if (equivalents != null && equivalents.length > 1) { | 1051 if (equivalents != null && equivalents.length > 1) { |
1011 match = new MiniExpLabel(); | 1052 match = new MiniExpLabel(); |
1012 for (int equivalent in equivalents) { | 1053 for (int equivalent in equivalents) { |
1013 if (equivalent == charCode) continue; | 1054 if (equivalent == charCode) continue; |
1014 compiler.gotoIfMatches(equivalent, match); | 1055 compiler.gotoIfMatches(equivalent, match); |
1015 } | 1056 } |
1016 } | 1057 } |
1017 } | 1058 } |
1018 compiler.backtrackIfNoMatch(_constantIndex); | 1059 compiler.backtrackIfNoMatch(_constantIndex); |
1019 if (match != null) compiler.bind(match); | 1060 if (match != null) compiler.bind(match); |
1020 compiler.addToRegister(CURRENT_POSITION, 1); | 1061 compiler.addToRegister(CURRENT_POSITION, compiler.direction); |
1021 compiler.goto(onSuccess); | 1062 compiler.goto(onSuccess); |
1022 } | 1063 } |
1023 | 1064 |
1024 MiniExpAnalysis analyze(compiler) => const MiniExpAnalysis.atom(); | 1065 MiniExpAnalysis analyze(compiler) => const MiniExpAnalysis.atom(); |
1025 } | 1066 } |
1026 | 1067 |
1027 class CharClass extends MiniExpAst { | 1068 class CharClass extends MiniExpAst { |
1028 final List<int> _ranges = new List<int>(); | 1069 final List<int> _ranges = new List<int>(); |
1029 final bool _positive; | 1070 final bool _positive; |
1030 | 1071 |
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1112 } | 1153 } |
1113 } | 1154 } |
1114 ranges.add(start); | 1155 ranges.add(start); |
1115 ranges.add(end); | 1156 ranges.add(end); |
1116 } | 1157 } |
1117 // TODO(erikcorry): Sort and merge ranges. | 1158 // TODO(erikcorry): Sort and merge ranges. |
1118 return ranges; | 1159 return ranges; |
1119 } | 1160 } |
1120 | 1161 |
1121 void generate(MiniExpCompiler compiler, MiniExpLabel onSuccess) { | 1162 void generate(MiniExpCompiler compiler, MiniExpLabel onSuccess) { |
1122 compiler.backtrackIfEqual(CURRENT_POSITION, STRING_LENGTH); | 1163 if (compiler.stepBackwards) { |
| 1164 compiler.backtrackIfEqual(CURRENT_POSITION, 0); |
| 1165 } else { |
| 1166 compiler.backtrackIfEqual(CURRENT_POSITION, STRING_LENGTH); |
| 1167 } |
1123 List<int> ranges = _ranges; | 1168 List<int> ranges = _ranges; |
1124 if (!compiler.caseSensitive) { | 1169 if (!compiler.caseSensitive) { |
1125 ranges = caseInsensitiveRanges(_ranges); | 1170 ranges = caseInsensitiveRanges(_ranges); |
1126 } | 1171 } |
1127 MiniExpLabel match = new MiniExpLabel(); | 1172 MiniExpLabel match = new MiniExpLabel(); |
1128 if (_positive) { | 1173 if (_positive) { |
1129 for (int i = 0; i < ranges.length; i += 2) { | 1174 for (int i = 0; i < ranges.length; i += 2) { |
1130 compiler.gotoIfInRange(ranges[i], ranges[i + 1], match); | 1175 compiler.gotoIfInRange(ranges[i], ranges[i + 1], match); |
1131 } | 1176 } |
1132 compiler.backtrack(); | 1177 compiler.backtrack(); |
1133 compiler.bind(match); | 1178 compiler.bind(match); |
1134 } else { | 1179 } else { |
1135 for (int i = 0; i < ranges.length; i += 2) { | 1180 for (int i = 0; i < ranges.length; i += 2) { |
1136 compiler.backtrackIfInRange(ranges[i], ranges[i + 1]); | 1181 compiler.backtrackIfInRange(ranges[i], ranges[i + 1]); |
1137 } | 1182 } |
1138 } | 1183 } |
1139 compiler.addToRegister(CURRENT_POSITION, 1); | 1184 compiler.addToRegister(CURRENT_POSITION, compiler.direction); |
1140 compiler.goto(onSuccess); | 1185 compiler.goto(onSuccess); |
1141 } | 1186 } |
1142 | 1187 |
1143 MiniExpAnalysis analyze(compiler) => const MiniExpAnalysis.atom(); | 1188 MiniExpAnalysis analyze(compiler) => const MiniExpAnalysis.atom(); |
1144 } | 1189 } |
1145 | 1190 |
1146 // This class is used for all backslashes followed by numbers. For web | 1191 // This class is used for all backslashes followed by numbers. For web |
1147 // compatibility, if the number (interpreted as decimal) is smaller than the | 1192 // compatibility, if the number (interpreted as decimal) is smaller than the |
1148 // number of captures, then it will be interpreted as a back reference. | 1193 // number of captures, then it will be interpreted as a back reference. |
1149 // Otherwise the number will be interpreted as an octal character code escape. | 1194 // Otherwise the number will be interpreted as an octal character code escape. |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1191 if (_startRegister == null) { | 1236 if (_startRegister == null) { |
1192 _startRegister = compiler.allocateCaptureRegisters(); | 1237 _startRegister = compiler.allocateCaptureRegisters(); |
1193 _endRegister = _startRegister - 1; | 1238 _endRegister = _startRegister - 1; |
1194 } | 1239 } |
1195 } | 1240 } |
1196 | 1241 |
1197 void generate(MiniExpCompiler compiler, MiniExpLabel onSuccess) { | 1242 void generate(MiniExpCompiler compiler, MiniExpLabel onSuccess) { |
1198 MiniExpLabel undoStart = new MiniExpLabel(); | 1243 MiniExpLabel undoStart = new MiniExpLabel(); |
1199 MiniExpLabel writeEnd = new MiniExpLabel(); | 1244 MiniExpLabel writeEnd = new MiniExpLabel(); |
1200 MiniExpLabel undoEnd = new MiniExpLabel(); | 1245 MiniExpLabel undoEnd = new MiniExpLabel(); |
1201 compiler.copyRegister(_startRegister, CURRENT_POSITION); | 1246 int firstRegister = compiler.stepBackwards ? _endRegister : _startRegister; |
| 1247 int lastRegister = compiler.stepBackwards ? _startRegister : _endRegister; |
| 1248 compiler.copyRegister(firstRegister, CURRENT_POSITION); |
1202 compiler.pushBacktrack(undoStart); | 1249 compiler.pushBacktrack(undoStart); |
1203 | 1250 |
1204 compiler.generate(_body, writeEnd); | 1251 compiler.generate(_body, writeEnd); |
1205 | 1252 |
1206 compiler.bind(writeEnd); | 1253 compiler.bind(writeEnd); |
1207 compiler.copyRegister(_endRegister, CURRENT_POSITION); | 1254 compiler.copyRegister(lastRegister, CURRENT_POSITION); |
1208 compiler.pushBacktrack(undoEnd); | 1255 compiler.pushBacktrack(undoEnd); |
1209 compiler.goto(onSuccess); | 1256 compiler.goto(onSuccess); |
1210 | 1257 |
1211 compiler.bind(undoStart); | 1258 compiler.bind(undoStart); |
1212 compiler.copyRegister(_startRegister, NO_POSITION_REGISTER); | 1259 compiler.copyRegister(firstRegister, NO_POSITION_REGISTER); |
1213 compiler.backtrack(); | 1260 compiler.backtrack(); |
1214 | 1261 |
1215 compiler.bind(undoEnd); | 1262 compiler.bind(undoEnd); |
1216 compiler.copyRegister(_endRegister, NO_POSITION_REGISTER); | 1263 compiler.copyRegister(lastRegister, NO_POSITION_REGISTER); |
1217 compiler.backtrack(); | 1264 compiler.backtrack(); |
1218 } | 1265 } |
1219 | 1266 |
1220 MiniExpAnalysis analyze(MiniExpCompiler compiler) { | 1267 MiniExpAnalysis analyze(MiniExpCompiler compiler) { |
1221 allocateRegisters(compiler); | 1268 allocateRegisters(compiler); |
1222 MiniExpAnalysis bodyAnalysis = _body.analyze(compiler); | 1269 MiniExpAnalysis bodyAnalysis = _body.analyze(compiler); |
1223 return new MiniExpAnalysis.capture( | 1270 return new MiniExpAnalysis.capture( |
1224 bodyAnalysis, _startRegister, _endRegister); | 1271 bodyAnalysis, _startRegister, _endRegister); |
1225 } | 1272 } |
1226 } | 1273 } |
(...skipping 189 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1416 notWordBoundary, | 1463 notWordBoundary, |
1417 wordCharacter, | 1464 wordCharacter, |
1418 notWordCharacter, | 1465 notWordCharacter, |
1419 digit, | 1466 digit, |
1420 notDigit, | 1467 notDigit, |
1421 whitespace, | 1468 whitespace, |
1422 notWhitespace, | 1469 notWhitespace, |
1423 nonCapturing, | 1470 nonCapturing, |
1424 lookAhead, | 1471 lookAhead, |
1425 negativeLookAhead, | 1472 negativeLookAhead, |
| 1473 lookBehind, |
| 1474 negativeLookBehind, |
1426 other | 1475 other |
1427 } | 1476 } |
1428 | 1477 |
1429 class MiniExpParser { | 1478 class MiniExpParser { |
1430 // The constant pool is used to look up character data when the regexp is | 1479 // The constant pool is used to look up character data when the regexp is |
1431 // running. It consists of the regexp source with some characters appended | 1480 // running. It consists of the regexp source with some characters appended |
1432 // to handle escapes that are not literally present in the regexp input. | 1481 // to handle escapes that are not literally present in the regexp input. |
1433 final MiniExpCompiler _compiler; | 1482 final MiniExpCompiler _compiler; |
1434 final String _source; | 1483 final String _source; |
1435 final bool _isMultiLine; | 1484 final bool _isMultiLine; |
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1493 | 1542 |
1494 MiniExpAst tryParseAssertion() { | 1543 MiniExpAst tryParseAssertion() { |
1495 if (acceptToken(Token.hat)) { | 1544 if (acceptToken(Token.hat)) { |
1496 return _isMultiLine ? new AtBeginningOfLine() : new AtStart(); | 1545 return _isMultiLine ? new AtBeginningOfLine() : new AtStart(); |
1497 } | 1546 } |
1498 if (acceptToken(Token.dollar)) { | 1547 if (acceptToken(Token.dollar)) { |
1499 return _isMultiLine ? new AtEndOfLine() : new AtEnd(); | 1548 return _isMultiLine ? new AtEndOfLine() : new AtEnd(); |
1500 } | 1549 } |
1501 if (acceptToken(Token.wordBoundary)) return new WordBoundary(true); | 1550 if (acceptToken(Token.wordBoundary)) return new WordBoundary(true); |
1502 if (acceptToken(Token.notWordBoundary)) return new WordBoundary(false); | 1551 if (acceptToken(Token.notWordBoundary)) return new WordBoundary(false); |
1503 var lookaheadAst; | 1552 var lookAst; |
1504 if (acceptToken(Token.lookAhead)) { | 1553 if (acceptToken(Token.lookAhead)) { |
1505 lookaheadAst = new LookAhead(true, parseDisjunction(), _compiler); | 1554 lookAst = new LookAhead(true, false, parseDisjunction(), _compiler); |
1506 } else if (acceptToken(Token.negativeLookAhead)) { | 1555 } else if (acceptToken(Token.negativeLookAhead)) { |
1507 lookaheadAst = new LookAhead(false, parseDisjunction(), _compiler); | 1556 lookAst = new LookAhead(false, false, parseDisjunction(), _compiler); |
| 1557 } else if (acceptToken(Token.lookBehind)) { |
| 1558 lookAst = new LookAhead(true, true, parseDisjunction(), _compiler); |
| 1559 } else if (acceptToken(Token.negativeLookBehind)) { |
| 1560 lookAst = new LookAhead(false, true, parseDisjunction(), _compiler); |
1508 } | 1561 } |
1509 if (lookaheadAst != null) { | 1562 if (lookAst != null) { |
1510 expectToken(Token.rParen); | 1563 expectToken(Token.rParen); |
1511 // The normal syntax does not allow a quantifier here, but the web | 1564 // The normal syntax does not allow a quantifier here, but the web |
1512 // compatible one does. Slightly nasty hack for compatibility: | 1565 // compatible one does. Slightly nasty hack for compatibility: |
1513 if (peekToken(Token.quant)) { | 1566 if (peekToken(Token.quant)) { |
1514 MiniExpAst quant = new Quantifier( | 1567 MiniExpAst quant = new Quantifier( |
1515 _minimumRepeats, _maximumRepeats, _lastWasGreedy, | 1568 _minimumRepeats, _maximumRepeats, _lastWasGreedy, |
1516 lookaheadAst, _compiler); | 1569 lookAst, _compiler); |
1517 expectToken(Token.quant); | 1570 expectToken(Token.quant); |
1518 return quant; | 1571 return quant; |
1519 } | 1572 } |
1520 return lookaheadAst; | 1573 return lookAst; |
1521 } | 1574 } |
1522 return null; | 1575 return null; |
1523 } | 1576 } |
1524 | 1577 |
1525 MiniExpAst parseTerm() { | 1578 MiniExpAst parseTerm() { |
1526 MiniExpAst ast = tryParseAssertion(); | 1579 MiniExpAst ast = tryParseAssertion(); |
1527 if (ast == null) { | 1580 if (ast == null) { |
1528 ast = parseAtom(); | 1581 ast = parseAtom(); |
1529 if (peekToken(Token.quant)) { | 1582 if (peekToken(Token.quant)) { |
1530 MiniExpAst quant = new Quantifier( | 1583 MiniExpAst quant = new Quantifier( |
(...skipping 398 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1929 if (!_has(_position + 1)) error("unterminated group"); | 1982 if (!_has(_position + 1)) error("unterminated group"); |
1930 if (_at(_position + 1) == CHAR_CODE_QUERY) { | 1983 if (_at(_position + 1) == CHAR_CODE_QUERY) { |
1931 if (!_has(_position + 2)) error("unterminated group"); | 1984 if (!_has(_position + 2)) error("unterminated group"); |
1932 int parenthesisModifier = _at(_position + 2); | 1985 int parenthesisModifier = _at(_position + 2); |
1933 if (parenthesisModifier == CHAR_CODE_EQUALS) { | 1986 if (parenthesisModifier == CHAR_CODE_EQUALS) { |
1934 _lastToken = Token.lookAhead; | 1987 _lastToken = Token.lookAhead; |
1935 } else if (parenthesisModifier == CHAR_CODE_COLON) { | 1988 } else if (parenthesisModifier == CHAR_CODE_COLON) { |
1936 _lastToken = Token.nonCapturing; | 1989 _lastToken = Token.nonCapturing; |
1937 } else if (parenthesisModifier == CHAR_CODE_BANG) { | 1990 } else if (parenthesisModifier == CHAR_CODE_BANG) { |
1938 _lastToken = Token.negativeLookAhead; | 1991 _lastToken = Token.negativeLookAhead; |
| 1992 } else if (parenthesisModifier == CHAR_CODE_LESS_THAN) { |
| 1993 if (!_has(_position + 3)) error("unterminated group"); |
| 1994 int lookBehindModifier = _at(_position + 3); |
| 1995 _position++; |
| 1996 if (lookBehindModifier == CHAR_CODE_EQUALS) { |
| 1997 _lastToken = Token.lookBehind; |
| 1998 } else if (lookBehindModifier == CHAR_CODE_BANG) { |
| 1999 _lastToken = Token.negativeLookBehind; |
| 2000 } else { |
| 2001 error("invalid group"); |
| 2002 } |
1939 } else { | 2003 } else { |
1940 error("invalid group"); | 2004 error("invalid group"); |
1941 } | 2005 } |
1942 _position += 2; | 2006 _position += 2; |
1943 return; | 2007 return; |
1944 } | 2008 } |
1945 } | 2009 } |
1946 | 2010 |
1947 void lexQuantifier() { | 2011 void lexQuantifier() { |
1948 int quantifierCode = _at(_position); | 2012 int quantifierCode = _at(_position); |
(...skipping 158 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2107 break; | 2171 break; |
2108 case BACKTRACK_GT: | 2172 case BACKTRACK_GT: |
2109 int reg1 = _registers[_byteCodes[programCounter++]]; | 2173 int reg1 = _registers[_byteCodes[programCounter++]]; |
2110 int reg2 = _registers[_byteCodes[programCounter++]]; | 2174 int reg2 = _registers[_byteCodes[programCounter++]]; |
2111 if (reg1 > reg2) { | 2175 if (reg1 > reg2) { |
2112 _registers[CURRENT_POSITION] = stack[--stackPointer]; | 2176 _registers[CURRENT_POSITION] = stack[--stackPointer]; |
2113 programCounter = stack[--stackPointer]; | 2177 programCounter = stack[--stackPointer]; |
2114 } | 2178 } |
2115 break; | 2179 break; |
2116 case BACKTRACK_IF_NO_MATCH: | 2180 case BACKTRACK_IF_NO_MATCH: |
2117 if (subject.codeUnitAt(_registers[CURRENT_POSITION]) != | 2181 case BACKTRACK_IF_PREVIOUS_NO_MATCH: |
| 2182 int offset = byteCode - BACKTRACK_IF_NO_MATCH; |
| 2183 if (subject.codeUnitAt(_registers[CURRENT_POSITION] - offset) != |
2118 _constantPool.codeUnitAt(_byteCodes[programCounter++])) { | 2184 _constantPool.codeUnitAt(_byteCodes[programCounter++])) { |
2119 _registers[CURRENT_POSITION] = stack[--stackPointer]; | 2185 _registers[CURRENT_POSITION] = stack[--stackPointer]; |
2120 programCounter = stack[--stackPointer]; | 2186 programCounter = stack[--stackPointer]; |
2121 } | 2187 } |
2122 break; | 2188 break; |
2123 case BACKTRACK_IF_IN_RANGE: | 2189 case BACKTRACK_IF_IN_RANGE: |
2124 int code = subject.codeUnitAt(_registers[CURRENT_POSITION]); | 2190 case BACKTRACK_IF_PREVIOUS_IN_RANGE: |
| 2191 int offset = byteCode - BACKTRACK_IF_IN_RANGE; |
| 2192 int code = subject.codeUnitAt(_registers[CURRENT_POSITION] - offset); |
2125 int from = _byteCodes[programCounter++]; | 2193 int from = _byteCodes[programCounter++]; |
2126 int to = _byteCodes[programCounter++]; | 2194 int to = _byteCodes[programCounter++]; |
2127 if (from <= code && code <= to) { | 2195 if (from <= code && code <= to) { |
2128 _registers[CURRENT_POSITION] = stack[--stackPointer]; | 2196 _registers[CURRENT_POSITION] = stack[--stackPointer]; |
2129 programCounter = stack[--stackPointer]; | 2197 programCounter = stack[--stackPointer]; |
2130 } | 2198 } |
2131 break; | 2199 break; |
2132 case GOTO_IF_MATCH: | 2200 case GOTO_IF_MATCH: |
2133 int code = subject.codeUnitAt(_registers[CURRENT_POSITION]); | 2201 case GOTO_IF_PREVIOUS_MATCH: |
| 2202 int offset = byteCode - GOTO_IF_MATCH; |
| 2203 int code = subject.codeUnitAt(_registers[CURRENT_POSITION] - offset); |
2134 int expected = _byteCodes[programCounter++]; | 2204 int expected = _byteCodes[programCounter++]; |
2135 int dest = _byteCodes[programCounter++]; | 2205 int dest = _byteCodes[programCounter++]; |
2136 if (code == expected) programCounter = dest; | 2206 if (code == expected) programCounter = dest; |
2137 break; | 2207 break; |
2138 case GOTO_IF_IN_RANGE: | 2208 case GOTO_IF_IN_RANGE: |
2139 int code = subject.codeUnitAt(_registers[CURRENT_POSITION]); | 2209 case GOTO_IF_PREVIOUS_IN_RANGE: |
| 2210 int offset = byteCode - GOTO_IF_IN_RANGE; |
| 2211 int code = subject.codeUnitAt(_registers[CURRENT_POSITION] - offset); |
2140 int from = _byteCodes[programCounter++]; | 2212 int from = _byteCodes[programCounter++]; |
2141 int to = _byteCodes[programCounter++]; | 2213 int to = _byteCodes[programCounter++]; |
2142 int dest = _byteCodes[programCounter++]; | 2214 int dest = _byteCodes[programCounter++]; |
2143 if (from <= code && code <= to) programCounter = dest; | 2215 if (from <= code && code <= to) programCounter = dest; |
2144 break; | 2216 break; |
2145 case GOTO_EQ: | 2217 case GOTO_EQ: |
2146 int reg1 = _registers[_byteCodes[programCounter++]]; | 2218 int reg1 = _registers[_byteCodes[programCounter++]]; |
2147 int reg2 = _registers[_byteCodes[programCounter++]]; | 2219 int reg2 = _registers[_byteCodes[programCounter++]]; |
2148 int dest = _byteCodes[programCounter++]; | 2220 int dest = _byteCodes[programCounter++]; |
2149 if (reg1 == reg2) programCounter = dest; | 2221 if (reg1 == reg2) programCounter = dest; |
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2181 case COPY_REGISTER: | 2253 case COPY_REGISTER: |
2182 // We don't normally keep the stack pointer in sync with its slot in | 2254 // We don't normally keep the stack pointer in sync with its slot in |
2183 // the _registers, but we have to have it in sync here. | 2255 // the _registers, but we have to have it in sync here. |
2184 _registers[STACK_POINTER] = stackPointer; | 2256 _registers[STACK_POINTER] = stackPointer; |
2185 int registerIndex = _byteCodes[programCounter++]; | 2257 int registerIndex = _byteCodes[programCounter++]; |
2186 int value = _registers[_byteCodes[programCounter++]]; | 2258 int value = _registers[_byteCodes[programCounter++]]; |
2187 _registers[registerIndex] = value; | 2259 _registers[registerIndex] = value; |
2188 stackPointer = _registers[STACK_POINTER]; | 2260 stackPointer = _registers[STACK_POINTER]; |
2189 break; | 2261 break; |
2190 case BACKTRACK_ON_BACK_REFERENCE: | 2262 case BACKTRACK_ON_BACK_REFERENCE: |
| 2263 case BACKTRACK_ON_BACK_REFERENCE_BACKWARDS: |
2191 int registerIndex = _byteCodes[programCounter++]; | 2264 int registerIndex = _byteCodes[programCounter++]; |
2192 bool case_sensitive = _byteCodes[programCounter++] != 0; | 2265 bool case_sensitive = _byteCodes[programCounter++] != 0; |
2193 if (!checkBackReference(subject, case_sensitive, registerIndex)) { | 2266 if (!checkBackReference( |
| 2267 subject, case_sensitive, registerIndex, |
| 2268 byteCode == BACKTRACK_ON_BACK_REFERENCE_BACKWARDS)) { |
2194 // Backtrack. | 2269 // Backtrack. |
2195 _registers[CURRENT_POSITION] = stack[--stackPointer]; | 2270 _registers[CURRENT_POSITION] = stack[--stackPointer]; |
2196 programCounter = stack[--stackPointer]; | 2271 programCounter = stack[--stackPointer]; |
2197 } | 2272 } |
2198 break; | 2273 break; |
2199 case BACKTRACK: | 2274 case BACKTRACK: |
2200 _registers[CURRENT_POSITION] = stack[--stackPointer]; | 2275 _registers[CURRENT_POSITION] = stack[--stackPointer]; |
2201 programCounter = stack[--stackPointer]; | 2276 programCounter = stack[--stackPointer]; |
2202 break; | 2277 break; |
2203 case SUCCEED: | 2278 case SUCCEED: |
2204 return true; | 2279 return true; |
2205 case FAIL: | 2280 case FAIL: |
2206 return false; | 2281 return false; |
2207 default: | 2282 default: |
2208 assert(false); | 2283 assert(false); |
2209 break; | 2284 break; |
2210 } | 2285 } |
2211 } | 2286 } |
2212 } | 2287 } |
2213 | 2288 |
2214 bool checkBackReference( | 2289 bool checkBackReference( |
2215 String subject, bool caseSensitive, int registerIndex) { | 2290 String subject, bool caseSensitive, int registerIndex, bool backwards) { |
2216 int start = _registers[registerIndex]; | 2291 int start = _registers[registerIndex]; |
2217 int end = _registers[registerIndex + 1]; | 2292 int end = _registers[registerIndex + 1]; |
2218 if (end == NO_POSITION) return true; | 2293 if (end == NO_POSITION) return true; |
2219 int length = end - start; | 2294 int length = end - start; |
2220 int currentPosition = _registers[CURRENT_POSITION]; | 2295 int currentPosition = _registers[CURRENT_POSITION]; |
2221 if (currentPosition + end - start > subject.length) return false; | 2296 if (backwards) { |
| 2297 if (currentPosition < length) return false; |
| 2298 currentPosition -= length; |
| 2299 } else { |
| 2300 if (currentPosition + length > subject.length) return false; |
| 2301 } |
2222 for (int i = 0; i < length; i++) { | 2302 for (int i = 0; i < length; i++) { |
2223 int x = subject.codeUnitAt(start + i); | 2303 int x = subject.codeUnitAt(start + i); |
2224 int y = subject.codeUnitAt(currentPosition + i); | 2304 int y = subject.codeUnitAt(currentPosition + i); |
2225 if (!caseSensitive) { | 2305 if (!caseSensitive) { |
2226 x = internalRegExpCanonicalize(x); | 2306 x = internalRegExpCanonicalize(x); |
2227 y = internalRegExpCanonicalize(y); | 2307 y = internalRegExpCanonicalize(y); |
2228 } | 2308 } |
2229 if (x != y) return false; | 2309 if (x != y) return false; |
2230 } | 2310 } |
2231 _registers[CURRENT_POSITION] += length; | 2311 _registers[CURRENT_POSITION] += backwards ? -length : length; |
2232 return true; | 2312 return true; |
2233 } | 2313 } |
2234 } | 2314 } |
OLD | NEW |