| Index: lib/core/regexp.dart
|
| diff --git a/lib/core/regexp.dart b/lib/core/regexp.dart
|
| index 0f4b29404c1c41a0a8d3a164aa0681bb9de309bf..11d10cedc7165524e347f1c838ad644b15dcad0c 100644
|
| --- a/lib/core/regexp.dart
|
| +++ b/lib/core/regexp.dart
|
| @@ -97,18 +97,23 @@ const BACKTRACK_EQ = 4; // reg reg
|
| const BACKTRACK_NE = 5; // reg reg
|
| const BACKTRACK_GT = 6; // reg reg
|
| const BACKTRACK_IF_NO_MATCH = 7; // constant-pool-offset
|
| -const BACKTRACK_IF_IN_RANGE = 8; // from to
|
| -const GOTO_IF_MATCH = 9; // charCode label
|
| -const GOTO_IF_IN_RANGE = 10; // from to label
|
| -const GOTO_EQ = 11; // reg reg label
|
| -const GOTO_GE = 12; // reg reg label
|
| -const GOTO_IF_WORD_CHARACTER = 13; // position-offset label
|
| -const ADD_TO_REGISTER = 14; // reg const
|
| -const COPY_REGISTER = 15; // dest-reg source-reg
|
| -const BACKTRACK_ON_BACK_REFERENCE = 16; // capture-reg
|
| -const BACKTRACK = 17;
|
| -const SUCCEED = 18;
|
| -const FAIL = 19;
|
| +const BACKTRACK_IF_PREVIOUS_NO_MATCH = 8; // constant-pool-offset
|
| +const BACKTRACK_IF_IN_RANGE = 9; // from to
|
| +const BACKTRACK_IF_PREVIOUS_IN_RANGE = 10; // from to
|
| +const GOTO_IF_MATCH = 11; // charCode label
|
| +const GOTO_IF_PREVIOUS_MATCH = 12; // charCode label
|
| +const GOTO_IF_IN_RANGE = 13; // from to label
|
| +const GOTO_IF_PREVIOUS_IN_RANGE = 14; // from to label
|
| +const GOTO_EQ = 15; // reg reg label
|
| +const GOTO_GE = 16; // reg reg label
|
| +const GOTO_IF_WORD_CHARACTER = 17; // position-offset label
|
| +const ADD_TO_REGISTER = 18; // reg const
|
| +const COPY_REGISTER = 19; // dest-reg source-reg
|
| +const BACKTRACK_ON_BACK_REFERENCE = 20; // capture-reg
|
| +const BACKTRACK_ON_BACK_REFERENCE_BACKWARDS = 21; // capture-reg
|
| +const BACKTRACK = 22;
|
| +const SUCCEED = 23;
|
| +const FAIL = 24;
|
|
|
| // Format is name, number of register arguments, number of other arguments.
|
| const BYTE_CODE_NAMES = const [
|
| @@ -120,15 +125,20 @@ const BYTE_CODE_NAMES = const [
|
| "BACKTRACK_NE", 2, 0,
|
| "BACKTRACK_GT", 2, 0,
|
| "BACKTRACK_IF_NO_MATCH", 0, 1,
|
| + "BACKTRACK_IF_PREVIOUS_NO_MATCH", 0, 1,
|
| "BACKTRACK_IF_IN_RANGE", 0, 2,
|
| + "BACKTRACK_IF_PREVIOUS_IN_RANGE", 0, 2,
|
| "GOTO_IF_MATCH", 0, 2,
|
| + "GOTO_IF_PREVIOUS_MATCH", 0, 2,
|
| "GOTO_IF_IN_RANGE", 0, 3,
|
| + "GOTO_IF_PREVIOUS_IN_RANGE", 0, 3,
|
| "GOTO_EQ", 2, 1,
|
| "GOTO_GE", 2, 1,
|
| "GOTO_IF_WORD_CHARACTER", 0, 2,
|
| "ADD_TO_REGISTER", 1, 1,
|
| "COPY_REGISTER", 2, 0,
|
| "BACKTRACK_ON_BACK_REFERENCE", 1, 1,
|
| + "BACKTRACK_ON_BACK_REFERENCE_BACKWARDS", 1, 1,
|
| "BACKTRACK", 0, 0,
|
| "SUCCEED", 0, 0,
|
| "FAIL", 0, 0];
|
| @@ -150,6 +160,7 @@ const CHAR_CODE_0 = 48;
|
| const CHAR_CODE_7 = 55;
|
| const CHAR_CODE_9 = 57;
|
| const CHAR_CODE_COLON = 58;
|
| +const CHAR_CODE_LESS_THAN = 60;
|
| const CHAR_CODE_EQUALS = 61;
|
| const CHAR_CODE_QUERY = 63;
|
| const CHAR_CODE_UPPER_A = 65;
|
| @@ -200,6 +211,9 @@ class MiniExpCompiler {
|
| final List<int> _extraConstants = new List<int>();
|
| final List<BackReference> _backReferences = new List<BackReference>();
|
| MiniExpLabel _pendingGoto;
|
| + bool stepBackwards = false;
|
| + int get oneIfBackwards => stepBackwards ? 1 : 0;
|
| + int get direction => stepBackwards ? -1 : 1;
|
|
|
| MiniExpCompiler(this.pattern, this.caseSensitive) {
|
| for (int i = 0; i < FIXED_REGISTERS; i++) {
|
| @@ -209,6 +223,7 @@ class MiniExpCompiler {
|
|
|
| List<int> get codes {
|
| flushPendingGoto();
|
| + //disassemble(_codes);
|
| return _codes;
|
| }
|
|
|
| @@ -379,7 +394,7 @@ class MiniExpCompiler {
|
| }
|
|
|
| void backtrackOnBackReferenceFail(int register, bool caseSensitive) {
|
| - _emit(BACKTRACK_ON_BACK_REFERENCE,
|
| + _emit(BACKTRACK_ON_BACK_REFERENCE + oneIfBackwards,
|
| registerNumber(register), caseSensitive ? 1 : 0);
|
| }
|
|
|
| @@ -393,15 +408,15 @@ class MiniExpCompiler {
|
| }
|
|
|
| void backtrackIfNoMatch(int constant_pool_offset) {
|
| - _emit(BACKTRACK_IF_NO_MATCH, constant_pool_offset);
|
| + _emit(BACKTRACK_IF_NO_MATCH + oneIfBackwards, constant_pool_offset);
|
| }
|
|
|
| void backtrackIfInRange(int from, int to) {
|
| - _emit(BACKTRACK_IF_IN_RANGE, from, to);
|
| + _emit(BACKTRACK_IF_IN_RANGE + oneIfBackwards, from, to);
|
| }
|
|
|
| void gotoIfMatches(int charCode, MiniExpLabel label) {
|
| - _emit(GOTO_IF_MATCH, charCode);
|
| + _emit(GOTO_IF_MATCH + oneIfBackwards, charCode);
|
| link(label);
|
| }
|
|
|
| @@ -409,7 +424,7 @@ class MiniExpCompiler {
|
| if (from == to) {
|
| gotoIfMatches(from, label);
|
| } else {
|
| - _emit(GOTO_IF_IN_RANGE, from, to);
|
| + _emit(GOTO_IF_IN_RANGE + oneIfBackwards, from, to);
|
| link(label);
|
| }
|
| }
|
| @@ -510,10 +525,11 @@ class MiniExpAnalysis {
|
| anchored = true,
|
| registersToSave = null;
|
|
|
| - MiniExpAnalysis.lookahead(MiniExpAnalysis bodyAnalysis, bool positive)
|
| + MiniExpAnalysis.lookahead(
|
| + MiniExpAnalysis bodyAnalysis, bool positive, bool behind)
|
| : canMatchEmpty = true,
|
| fixedLength = 0,
|
| - anchored = positive && bodyAnalysis.anchored,
|
| + anchored = positive && !behind && bodyAnalysis.anchored,
|
| registersToSave = bodyAnalysis.registersToSave;
|
|
|
| MiniExpAnalysis.quantifier(
|
| @@ -601,8 +617,11 @@ class Alternative extends MiniExpAst {
|
| Alternative(this._left, this._right);
|
|
|
| void generate(MiniExpCompiler compiler, MiniExpLabel onSuccess) {
|
| - compiler.generate(_left, _right.label);
|
| - compiler.generate(_right, onSuccess);
|
| + MiniExpAst first = compiler.stepBackwards ? _right : _left;
|
| + MiniExpAst second = compiler.stepBackwards ? _left : _right;
|
| +
|
| + compiler.generate(first, second.label);
|
| + compiler.generate(second, onSuccess);
|
| }
|
|
|
| MiniExpAnalysis analyze(MiniExpCompiler compiler) {
|
| @@ -690,13 +709,15 @@ class WordBoundary extends Assertion {
|
|
|
| class LookAhead extends Assertion {
|
| final bool _positive;
|
| + final bool _behind;
|
| final MiniExpAst _body;
|
| List<int> _subtreeRegisters;
|
|
|
| int _savedStackPointerRegister;
|
| int _savedPosition;
|
|
|
| - LookAhead(this._positive, this._body, MiniExpCompiler compiler) {
|
| + LookAhead(
|
| + this._positive, this._behind, this._body, MiniExpCompiler compiler) {
|
| _savedStackPointerRegister = compiler.allocateWorkingRegister();
|
| _savedPosition = compiler.allocateWorkingRegister();
|
| }
|
| @@ -714,7 +735,10 @@ class LookAhead extends Assertion {
|
| if (!_positive) {
|
| compiler.pushBacktrack(succeed_on_failure);
|
| }
|
| + bool stepBackwards = compiler.stepBackwards;
|
| + compiler.stepBackwards = _behind;
|
| compiler.generate(_body, body_succeeded);
|
| + compiler.stepBackwards = stepBackwards;
|
|
|
| compiler.bind(body_succeeded);
|
| compiler.copyRegister(STACK_POINTER, _savedStackPointerRegister);
|
| @@ -755,7 +779,7 @@ class LookAhead extends Assertion {
|
| MiniExpAnalysis analyze(compiler) {
|
| MiniExpAnalysis bodyAnalysis = _body.analyze(compiler);
|
| _subtreeRegisters = bodyAnalysis.registersToSave;
|
| - return new MiniExpAnalysis.lookahead(bodyAnalysis, _positive);
|
| + return new MiniExpAnalysis.lookahead(bodyAnalysis, _positive, _behind);
|
| }
|
| }
|
|
|
| @@ -848,7 +872,8 @@ class Quantifier extends MiniExpAst {
|
| // unwound enough.
|
| compiler.copyRegister(_optimizedGreedyRegister, CURRENT_POSITION);
|
| if (_min != 0) {
|
| - compiler.addToRegister(_optimizedGreedyRegister, _min * _bodyLength);
|
| + int offset = compiler.direction * _bodyLength;
|
| + compiler.addToRegister(_optimizedGreedyRegister, _min * offset);
|
| }
|
|
|
| // This backtrack doesn't trigger until the quantifier has eaten as much as
|
| @@ -871,23 +896,35 @@ class Quantifier extends MiniExpAst {
|
| compiler.bind(cantAdvanceMore);
|
|
|
| if (_min != 0) {
|
| - compiler.backtrackIfGreater(
|
| - _optimizedGreedyRegister, _savePositionRegister);
|
| + if (compiler.stepBackwards) {
|
| + compiler.backtrackIfGreater(
|
| + _savePositionRegister, _optimizedGreedyRegister);
|
| + } else {
|
| + compiler.backtrackIfGreater(
|
| + _optimizedGreedyRegister, _savePositionRegister);
|
| + }
|
| }
|
|
|
| - compiler.addToRegister(_savePositionRegister, _bodyLength);
|
| + compiler.addToRegister(
|
| + _savePositionRegister, compiler.direction * _bodyLength);
|
| compiler.copyRegister(CURRENT_POSITION, _savePositionRegister);
|
|
|
| // The continuation of the regexp failed. We backtrack the greedy
|
| // quantifier by one step and retry.
|
| compiler.bind(continuationFailed);
|
| - compiler.addToRegister(CURRENT_POSITION, -_bodyLength);
|
| + compiler.addToRegister(
|
| + CURRENT_POSITION, compiler.direction * -_bodyLength);
|
| // If we got back to where the quantifier started matching, then jump
|
| // to the continuation (we haven't pushed a backtrack, so if that fails, it
|
| // will backtrack further).
|
| // We don't have gotoIfEqual, so use gotoIfGreaterEqual.
|
| - compiler.gotoIfGreaterEqual(
|
| - _optimizedGreedyRegister, CURRENT_POSITION, onSuccess);
|
| + if (compiler.stepBackwards) {
|
| + compiler.gotoIfGreaterEqual(
|
| + CURRENT_POSITION, _optimizedGreedyRegister, onSuccess);
|
| + } else {
|
| + compiler.gotoIfGreaterEqual(
|
| + _optimizedGreedyRegister, CURRENT_POSITION, onSuccess);
|
| + }
|
| compiler.pushBacktrack(continuationFailed);
|
| compiler.goto(onSuccess);
|
| }
|
| @@ -1002,7 +1039,11 @@ class Atom extends MiniExpAst {
|
| Atom(this._constantIndex);
|
|
|
| void generate(MiniExpCompiler compiler, MiniExpLabel onSuccess) {
|
| - compiler.backtrackIfEqual(CURRENT_POSITION, STRING_LENGTH);
|
| + if (compiler.stepBackwards) {
|
| + compiler.backtrackIfEqual(CURRENT_POSITION, 0);
|
| + } else {
|
| + compiler.backtrackIfEqual(CURRENT_POSITION, STRING_LENGTH);
|
| + }
|
| MiniExpLabel match;
|
| int charCode = compiler.constantPoolEntry(_constantIndex);
|
| if (!compiler.caseSensitive) {
|
| @@ -1017,7 +1058,7 @@ class Atom extends MiniExpAst {
|
| }
|
| compiler.backtrackIfNoMatch(_constantIndex);
|
| if (match != null) compiler.bind(match);
|
| - compiler.addToRegister(CURRENT_POSITION, 1);
|
| + compiler.addToRegister(CURRENT_POSITION, compiler.direction);
|
| compiler.goto(onSuccess);
|
| }
|
|
|
| @@ -1119,7 +1160,11 @@ class CharClass extends MiniExpAst {
|
| }
|
|
|
| void generate(MiniExpCompiler compiler, MiniExpLabel onSuccess) {
|
| - compiler.backtrackIfEqual(CURRENT_POSITION, STRING_LENGTH);
|
| + if (compiler.stepBackwards) {
|
| + compiler.backtrackIfEqual(CURRENT_POSITION, 0);
|
| + } else {
|
| + compiler.backtrackIfEqual(CURRENT_POSITION, STRING_LENGTH);
|
| + }
|
| List<int> ranges = _ranges;
|
| if (!compiler.caseSensitive) {
|
| ranges = caseInsensitiveRanges(_ranges);
|
| @@ -1136,7 +1181,7 @@ class CharClass extends MiniExpAst {
|
| compiler.backtrackIfInRange(ranges[i], ranges[i + 1]);
|
| }
|
| }
|
| - compiler.addToRegister(CURRENT_POSITION, 1);
|
| + compiler.addToRegister(CURRENT_POSITION, compiler.direction);
|
| compiler.goto(onSuccess);
|
| }
|
|
|
| @@ -1198,22 +1243,24 @@ class Capture extends MiniExpAst {
|
| MiniExpLabel undoStart = new MiniExpLabel();
|
| MiniExpLabel writeEnd = new MiniExpLabel();
|
| MiniExpLabel undoEnd = new MiniExpLabel();
|
| - compiler.copyRegister(_startRegister, CURRENT_POSITION);
|
| + int firstRegister = compiler.stepBackwards ? _endRegister : _startRegister;
|
| + int lastRegister = compiler.stepBackwards ? _startRegister : _endRegister;
|
| + compiler.copyRegister(firstRegister, CURRENT_POSITION);
|
| compiler.pushBacktrack(undoStart);
|
|
|
| compiler.generate(_body, writeEnd);
|
|
|
| compiler.bind(writeEnd);
|
| - compiler.copyRegister(_endRegister, CURRENT_POSITION);
|
| + compiler.copyRegister(lastRegister, CURRENT_POSITION);
|
| compiler.pushBacktrack(undoEnd);
|
| compiler.goto(onSuccess);
|
|
|
| compiler.bind(undoStart);
|
| - compiler.copyRegister(_startRegister, NO_POSITION_REGISTER);
|
| + compiler.copyRegister(firstRegister, NO_POSITION_REGISTER);
|
| compiler.backtrack();
|
|
|
| compiler.bind(undoEnd);
|
| - compiler.copyRegister(_endRegister, NO_POSITION_REGISTER);
|
| + compiler.copyRegister(lastRegister, NO_POSITION_REGISTER);
|
| compiler.backtrack();
|
| }
|
|
|
| @@ -1423,6 +1470,8 @@ enum Token {
|
| nonCapturing,
|
| lookAhead,
|
| negativeLookAhead,
|
| + lookBehind,
|
| + negativeLookBehind,
|
| other
|
| }
|
|
|
| @@ -1500,24 +1549,28 @@ class MiniExpParser {
|
| }
|
| if (acceptToken(Token.wordBoundary)) return new WordBoundary(true);
|
| if (acceptToken(Token.notWordBoundary)) return new WordBoundary(false);
|
| - var lookaheadAst;
|
| + var lookAst;
|
| if (acceptToken(Token.lookAhead)) {
|
| - lookaheadAst = new LookAhead(true, parseDisjunction(), _compiler);
|
| + lookAst = new LookAhead(true, false, parseDisjunction(), _compiler);
|
| } else if (acceptToken(Token.negativeLookAhead)) {
|
| - lookaheadAst = new LookAhead(false, parseDisjunction(), _compiler);
|
| + lookAst = new LookAhead(false, false, parseDisjunction(), _compiler);
|
| + } else if (acceptToken(Token.lookBehind)) {
|
| + lookAst = new LookAhead(true, true, parseDisjunction(), _compiler);
|
| + } else if (acceptToken(Token.negativeLookBehind)) {
|
| + lookAst = new LookAhead(false, true, parseDisjunction(), _compiler);
|
| }
|
| - if (lookaheadAst != null) {
|
| + if (lookAst != null) {
|
| expectToken(Token.rParen);
|
| // The normal syntax does not allow a quantifier here, but the web
|
| // compatible one does. Slightly nasty hack for compatibility:
|
| if (peekToken(Token.quant)) {
|
| MiniExpAst quant = new Quantifier(
|
| _minimumRepeats, _maximumRepeats, _lastWasGreedy,
|
| - lookaheadAst, _compiler);
|
| + lookAst, _compiler);
|
| expectToken(Token.quant);
|
| return quant;
|
| }
|
| - return lookaheadAst;
|
| + return lookAst;
|
| }
|
| return null;
|
| }
|
| @@ -1936,6 +1989,17 @@ class MiniExpParser {
|
| _lastToken = Token.nonCapturing;
|
| } else if (parenthesisModifier == CHAR_CODE_BANG) {
|
| _lastToken = Token.negativeLookAhead;
|
| + } else if (parenthesisModifier == CHAR_CODE_LESS_THAN) {
|
| + if (!_has(_position + 3)) error("unterminated group");
|
| + int lookBehindModifier = _at(_position + 3);
|
| + _position++;
|
| + if (lookBehindModifier == CHAR_CODE_EQUALS) {
|
| + _lastToken = Token.lookBehind;
|
| + } else if (lookBehindModifier == CHAR_CODE_BANG) {
|
| + _lastToken = Token.negativeLookBehind;
|
| + } else {
|
| + error("invalid group");
|
| + }
|
| } else {
|
| error("invalid group");
|
| }
|
| @@ -2114,14 +2178,18 @@ class MiniExpInterpreter {
|
| }
|
| break;
|
| case BACKTRACK_IF_NO_MATCH:
|
| - if (subject.codeUnitAt(_registers[CURRENT_POSITION]) !=
|
| + case BACKTRACK_IF_PREVIOUS_NO_MATCH:
|
| + int offset = byteCode - BACKTRACK_IF_NO_MATCH;
|
| + if (subject.codeUnitAt(_registers[CURRENT_POSITION] - offset) !=
|
| _constantPool.codeUnitAt(_byteCodes[programCounter++])) {
|
| _registers[CURRENT_POSITION] = stack[--stackPointer];
|
| programCounter = stack[--stackPointer];
|
| }
|
| break;
|
| case BACKTRACK_IF_IN_RANGE:
|
| - int code = subject.codeUnitAt(_registers[CURRENT_POSITION]);
|
| + case BACKTRACK_IF_PREVIOUS_IN_RANGE:
|
| + int offset = byteCode - BACKTRACK_IF_IN_RANGE;
|
| + int code = subject.codeUnitAt(_registers[CURRENT_POSITION] - offset);
|
| int from = _byteCodes[programCounter++];
|
| int to = _byteCodes[programCounter++];
|
| if (from <= code && code <= to) {
|
| @@ -2130,13 +2198,17 @@ class MiniExpInterpreter {
|
| }
|
| break;
|
| case GOTO_IF_MATCH:
|
| - int code = subject.codeUnitAt(_registers[CURRENT_POSITION]);
|
| + case GOTO_IF_PREVIOUS_MATCH:
|
| + int offset = byteCode - GOTO_IF_MATCH;
|
| + int code = subject.codeUnitAt(_registers[CURRENT_POSITION] - offset);
|
| int expected = _byteCodes[programCounter++];
|
| int dest = _byteCodes[programCounter++];
|
| if (code == expected) programCounter = dest;
|
| break;
|
| case GOTO_IF_IN_RANGE:
|
| - int code = subject.codeUnitAt(_registers[CURRENT_POSITION]);
|
| + case GOTO_IF_PREVIOUS_IN_RANGE:
|
| + int offset = byteCode - GOTO_IF_IN_RANGE;
|
| + int code = subject.codeUnitAt(_registers[CURRENT_POSITION] - offset);
|
| int from = _byteCodes[programCounter++];
|
| int to = _byteCodes[programCounter++];
|
| int dest = _byteCodes[programCounter++];
|
| @@ -2188,9 +2260,12 @@ class MiniExpInterpreter {
|
| stackPointer = _registers[STACK_POINTER];
|
| break;
|
| case BACKTRACK_ON_BACK_REFERENCE:
|
| + case BACKTRACK_ON_BACK_REFERENCE_BACKWARDS:
|
| int registerIndex = _byteCodes[programCounter++];
|
| bool case_sensitive = _byteCodes[programCounter++] != 0;
|
| - if (!checkBackReference(subject, case_sensitive, registerIndex)) {
|
| + if (!checkBackReference(
|
| + subject, case_sensitive, registerIndex,
|
| + byteCode == BACKTRACK_ON_BACK_REFERENCE_BACKWARDS)) {
|
| // Backtrack.
|
| _registers[CURRENT_POSITION] = stack[--stackPointer];
|
| programCounter = stack[--stackPointer];
|
| @@ -2212,13 +2287,18 @@ class MiniExpInterpreter {
|
| }
|
|
|
| bool checkBackReference(
|
| - String subject, bool caseSensitive, int registerIndex) {
|
| + String subject, bool caseSensitive, int registerIndex, bool backwards) {
|
| int start = _registers[registerIndex];
|
| int end = _registers[registerIndex + 1];
|
| if (end == NO_POSITION) return true;
|
| int length = end - start;
|
| int currentPosition = _registers[CURRENT_POSITION];
|
| - if (currentPosition + end - start > subject.length) return false;
|
| + if (backwards) {
|
| + if (currentPosition < length) return false;
|
| + currentPosition -= length;
|
| + } else {
|
| + if (currentPosition + length > subject.length) return false;
|
| + }
|
| for (int i = 0; i < length; i++) {
|
| int x = subject.codeUnitAt(start + i);
|
| int y = subject.codeUnitAt(currentPosition + i);
|
| @@ -2228,7 +2308,7 @@ class MiniExpInterpreter {
|
| }
|
| if (x != y) return false;
|
| }
|
| - _registers[CURRENT_POSITION] += length;
|
| + _registers[CURRENT_POSITION] += backwards ? -length : length;
|
| return true;
|
| }
|
| }
|
|
|