Index: lib/core/regexp.dart |
diff --git a/lib/core/regexp.dart b/lib/core/regexp.dart |
index 0f4b29404c1c41a0a8d3a164aa0681bb9de309bf..11d10cedc7165524e347f1c838ad644b15dcad0c 100644 |
--- a/lib/core/regexp.dart |
+++ b/lib/core/regexp.dart |
@@ -97,18 +97,23 @@ const BACKTRACK_EQ = 4; // reg reg |
const BACKTRACK_NE = 5; // reg reg |
const BACKTRACK_GT = 6; // reg reg |
const BACKTRACK_IF_NO_MATCH = 7; // constant-pool-offset |
-const BACKTRACK_IF_IN_RANGE = 8; // from to |
-const GOTO_IF_MATCH = 9; // charCode label |
-const GOTO_IF_IN_RANGE = 10; // from to label |
-const GOTO_EQ = 11; // reg reg label |
-const GOTO_GE = 12; // reg reg label |
-const GOTO_IF_WORD_CHARACTER = 13; // position-offset label |
-const ADD_TO_REGISTER = 14; // reg const |
-const COPY_REGISTER = 15; // dest-reg source-reg |
-const BACKTRACK_ON_BACK_REFERENCE = 16; // capture-reg |
-const BACKTRACK = 17; |
-const SUCCEED = 18; |
-const FAIL = 19; |
+const BACKTRACK_IF_PREVIOUS_NO_MATCH = 8; // constant-pool-offset |
+const BACKTRACK_IF_IN_RANGE = 9; // from to |
+const BACKTRACK_IF_PREVIOUS_IN_RANGE = 10; // from to |
+const GOTO_IF_MATCH = 11; // charCode label |
+const GOTO_IF_PREVIOUS_MATCH = 12; // charCode label |
+const GOTO_IF_IN_RANGE = 13; // from to label |
+const GOTO_IF_PREVIOUS_IN_RANGE = 14; // from to label |
+const GOTO_EQ = 15; // reg reg label |
+const GOTO_GE = 16; // reg reg label |
+const GOTO_IF_WORD_CHARACTER = 17; // position-offset label |
+const ADD_TO_REGISTER = 18; // reg const |
+const COPY_REGISTER = 19; // dest-reg source-reg |
+const BACKTRACK_ON_BACK_REFERENCE = 20; // capture-reg |
+const BACKTRACK_ON_BACK_REFERENCE_BACKWARDS = 21; // capture-reg |
+const BACKTRACK = 22; |
+const SUCCEED = 23; |
+const FAIL = 24; |
// Format is name, number of register arguments, number of other arguments. |
const BYTE_CODE_NAMES = const [ |
@@ -120,15 +125,20 @@ const BYTE_CODE_NAMES = const [ |
"BACKTRACK_NE", 2, 0, |
"BACKTRACK_GT", 2, 0, |
"BACKTRACK_IF_NO_MATCH", 0, 1, |
+ "BACKTRACK_IF_PREVIOUS_NO_MATCH", 0, 1, |
"BACKTRACK_IF_IN_RANGE", 0, 2, |
+ "BACKTRACK_IF_PREVIOUS_IN_RANGE", 0, 2, |
"GOTO_IF_MATCH", 0, 2, |
+ "GOTO_IF_PREVIOUS_MATCH", 0, 2, |
"GOTO_IF_IN_RANGE", 0, 3, |
+ "GOTO_IF_PREVIOUS_IN_RANGE", 0, 3, |
"GOTO_EQ", 2, 1, |
"GOTO_GE", 2, 1, |
"GOTO_IF_WORD_CHARACTER", 0, 2, |
"ADD_TO_REGISTER", 1, 1, |
"COPY_REGISTER", 2, 0, |
"BACKTRACK_ON_BACK_REFERENCE", 1, 1, |
+ "BACKTRACK_ON_BACK_REFERENCE_BACKWARDS", 1, 1, |
"BACKTRACK", 0, 0, |
"SUCCEED", 0, 0, |
"FAIL", 0, 0]; |
@@ -150,6 +160,7 @@ const CHAR_CODE_0 = 48; |
const CHAR_CODE_7 = 55; |
const CHAR_CODE_9 = 57; |
const CHAR_CODE_COLON = 58; |
+const CHAR_CODE_LESS_THAN = 60; |
const CHAR_CODE_EQUALS = 61; |
const CHAR_CODE_QUERY = 63; |
const CHAR_CODE_UPPER_A = 65; |
@@ -200,6 +211,9 @@ class MiniExpCompiler { |
final List<int> _extraConstants = new List<int>(); |
final List<BackReference> _backReferences = new List<BackReference>(); |
MiniExpLabel _pendingGoto; |
+ bool stepBackwards = false; |
+ int get oneIfBackwards => stepBackwards ? 1 : 0; |
+ int get direction => stepBackwards ? -1 : 1; |
MiniExpCompiler(this.pattern, this.caseSensitive) { |
for (int i = 0; i < FIXED_REGISTERS; i++) { |
@@ -209,6 +223,7 @@ class MiniExpCompiler { |
List<int> get codes { |
flushPendingGoto(); |
+ //disassemble(_codes); |
return _codes; |
} |
@@ -379,7 +394,7 @@ class MiniExpCompiler { |
} |
void backtrackOnBackReferenceFail(int register, bool caseSensitive) { |
- _emit(BACKTRACK_ON_BACK_REFERENCE, |
+ _emit(BACKTRACK_ON_BACK_REFERENCE + oneIfBackwards, |
registerNumber(register), caseSensitive ? 1 : 0); |
} |
@@ -393,15 +408,15 @@ class MiniExpCompiler { |
} |
void backtrackIfNoMatch(int constant_pool_offset) { |
- _emit(BACKTRACK_IF_NO_MATCH, constant_pool_offset); |
+ _emit(BACKTRACK_IF_NO_MATCH + oneIfBackwards, constant_pool_offset); |
} |
void backtrackIfInRange(int from, int to) { |
- _emit(BACKTRACK_IF_IN_RANGE, from, to); |
+ _emit(BACKTRACK_IF_IN_RANGE + oneIfBackwards, from, to); |
} |
void gotoIfMatches(int charCode, MiniExpLabel label) { |
- _emit(GOTO_IF_MATCH, charCode); |
+ _emit(GOTO_IF_MATCH + oneIfBackwards, charCode); |
link(label); |
} |
@@ -409,7 +424,7 @@ class MiniExpCompiler { |
if (from == to) { |
gotoIfMatches(from, label); |
} else { |
- _emit(GOTO_IF_IN_RANGE, from, to); |
+ _emit(GOTO_IF_IN_RANGE + oneIfBackwards, from, to); |
link(label); |
} |
} |
@@ -510,10 +525,11 @@ class MiniExpAnalysis { |
anchored = true, |
registersToSave = null; |
- MiniExpAnalysis.lookahead(MiniExpAnalysis bodyAnalysis, bool positive) |
+ MiniExpAnalysis.lookahead( |
+ MiniExpAnalysis bodyAnalysis, bool positive, bool behind) |
: canMatchEmpty = true, |
fixedLength = 0, |
- anchored = positive && bodyAnalysis.anchored, |
+ anchored = positive && !behind && bodyAnalysis.anchored, |
registersToSave = bodyAnalysis.registersToSave; |
MiniExpAnalysis.quantifier( |
@@ -601,8 +617,11 @@ class Alternative extends MiniExpAst { |
Alternative(this._left, this._right); |
void generate(MiniExpCompiler compiler, MiniExpLabel onSuccess) { |
- compiler.generate(_left, _right.label); |
- compiler.generate(_right, onSuccess); |
+ MiniExpAst first = compiler.stepBackwards ? _right : _left; |
+ MiniExpAst second = compiler.stepBackwards ? _left : _right; |
+ |
+ compiler.generate(first, second.label); |
+ compiler.generate(second, onSuccess); |
} |
MiniExpAnalysis analyze(MiniExpCompiler compiler) { |
@@ -690,13 +709,15 @@ class WordBoundary extends Assertion { |
class LookAhead extends Assertion { |
final bool _positive; |
+ final bool _behind; |
final MiniExpAst _body; |
List<int> _subtreeRegisters; |
int _savedStackPointerRegister; |
int _savedPosition; |
- LookAhead(this._positive, this._body, MiniExpCompiler compiler) { |
+ LookAhead( |
+ this._positive, this._behind, this._body, MiniExpCompiler compiler) { |
_savedStackPointerRegister = compiler.allocateWorkingRegister(); |
_savedPosition = compiler.allocateWorkingRegister(); |
} |
@@ -714,7 +735,10 @@ class LookAhead extends Assertion { |
if (!_positive) { |
compiler.pushBacktrack(succeed_on_failure); |
} |
+ bool stepBackwards = compiler.stepBackwards; |
+ compiler.stepBackwards = _behind; |
compiler.generate(_body, body_succeeded); |
+ compiler.stepBackwards = stepBackwards; |
compiler.bind(body_succeeded); |
compiler.copyRegister(STACK_POINTER, _savedStackPointerRegister); |
@@ -755,7 +779,7 @@ class LookAhead extends Assertion { |
MiniExpAnalysis analyze(compiler) { |
MiniExpAnalysis bodyAnalysis = _body.analyze(compiler); |
_subtreeRegisters = bodyAnalysis.registersToSave; |
- return new MiniExpAnalysis.lookahead(bodyAnalysis, _positive); |
+ return new MiniExpAnalysis.lookahead(bodyAnalysis, _positive, _behind); |
} |
} |
@@ -848,7 +872,8 @@ class Quantifier extends MiniExpAst { |
// unwound enough. |
compiler.copyRegister(_optimizedGreedyRegister, CURRENT_POSITION); |
if (_min != 0) { |
- compiler.addToRegister(_optimizedGreedyRegister, _min * _bodyLength); |
+ int offset = compiler.direction * _bodyLength; |
+ compiler.addToRegister(_optimizedGreedyRegister, _min * offset); |
} |
// This backtrack doesn't trigger until the quantifier has eaten as much as |
@@ -871,23 +896,35 @@ class Quantifier extends MiniExpAst { |
compiler.bind(cantAdvanceMore); |
if (_min != 0) { |
- compiler.backtrackIfGreater( |
- _optimizedGreedyRegister, _savePositionRegister); |
+ if (compiler.stepBackwards) { |
+ compiler.backtrackIfGreater( |
+ _savePositionRegister, _optimizedGreedyRegister); |
+ } else { |
+ compiler.backtrackIfGreater( |
+ _optimizedGreedyRegister, _savePositionRegister); |
+ } |
} |
- compiler.addToRegister(_savePositionRegister, _bodyLength); |
+ compiler.addToRegister( |
+ _savePositionRegister, compiler.direction * _bodyLength); |
compiler.copyRegister(CURRENT_POSITION, _savePositionRegister); |
// The continuation of the regexp failed. We backtrack the greedy |
// quantifier by one step and retry. |
compiler.bind(continuationFailed); |
- compiler.addToRegister(CURRENT_POSITION, -_bodyLength); |
+ compiler.addToRegister( |
+ CURRENT_POSITION, compiler.direction * -_bodyLength); |
// If we got back to where the quantifier started matching, then jump |
// to the continuation (we haven't pushed a backtrack, so if that fails, it |
// will backtrack further). |
// We don't have gotoIfEqual, so use gotoIfGreaterEqual. |
- compiler.gotoIfGreaterEqual( |
- _optimizedGreedyRegister, CURRENT_POSITION, onSuccess); |
+ if (compiler.stepBackwards) { |
+ compiler.gotoIfGreaterEqual( |
+ CURRENT_POSITION, _optimizedGreedyRegister, onSuccess); |
+ } else { |
+ compiler.gotoIfGreaterEqual( |
+ _optimizedGreedyRegister, CURRENT_POSITION, onSuccess); |
+ } |
compiler.pushBacktrack(continuationFailed); |
compiler.goto(onSuccess); |
} |
@@ -1002,7 +1039,11 @@ class Atom extends MiniExpAst { |
Atom(this._constantIndex); |
void generate(MiniExpCompiler compiler, MiniExpLabel onSuccess) { |
- compiler.backtrackIfEqual(CURRENT_POSITION, STRING_LENGTH); |
+ if (compiler.stepBackwards) { |
+ compiler.backtrackIfEqual(CURRENT_POSITION, 0); |
+ } else { |
+ compiler.backtrackIfEqual(CURRENT_POSITION, STRING_LENGTH); |
+ } |
MiniExpLabel match; |
int charCode = compiler.constantPoolEntry(_constantIndex); |
if (!compiler.caseSensitive) { |
@@ -1017,7 +1058,7 @@ class Atom extends MiniExpAst { |
} |
compiler.backtrackIfNoMatch(_constantIndex); |
if (match != null) compiler.bind(match); |
- compiler.addToRegister(CURRENT_POSITION, 1); |
+ compiler.addToRegister(CURRENT_POSITION, compiler.direction); |
compiler.goto(onSuccess); |
} |
@@ -1119,7 +1160,11 @@ class CharClass extends MiniExpAst { |
} |
void generate(MiniExpCompiler compiler, MiniExpLabel onSuccess) { |
- compiler.backtrackIfEqual(CURRENT_POSITION, STRING_LENGTH); |
+ if (compiler.stepBackwards) { |
+ compiler.backtrackIfEqual(CURRENT_POSITION, 0); |
+ } else { |
+ compiler.backtrackIfEqual(CURRENT_POSITION, STRING_LENGTH); |
+ } |
List<int> ranges = _ranges; |
if (!compiler.caseSensitive) { |
ranges = caseInsensitiveRanges(_ranges); |
@@ -1136,7 +1181,7 @@ class CharClass extends MiniExpAst { |
compiler.backtrackIfInRange(ranges[i], ranges[i + 1]); |
} |
} |
- compiler.addToRegister(CURRENT_POSITION, 1); |
+ compiler.addToRegister(CURRENT_POSITION, compiler.direction); |
compiler.goto(onSuccess); |
} |
@@ -1198,22 +1243,24 @@ class Capture extends MiniExpAst { |
MiniExpLabel undoStart = new MiniExpLabel(); |
MiniExpLabel writeEnd = new MiniExpLabel(); |
MiniExpLabel undoEnd = new MiniExpLabel(); |
- compiler.copyRegister(_startRegister, CURRENT_POSITION); |
+ int firstRegister = compiler.stepBackwards ? _endRegister : _startRegister; |
+ int lastRegister = compiler.stepBackwards ? _startRegister : _endRegister; |
+ compiler.copyRegister(firstRegister, CURRENT_POSITION); |
compiler.pushBacktrack(undoStart); |
compiler.generate(_body, writeEnd); |
compiler.bind(writeEnd); |
- compiler.copyRegister(_endRegister, CURRENT_POSITION); |
+ compiler.copyRegister(lastRegister, CURRENT_POSITION); |
compiler.pushBacktrack(undoEnd); |
compiler.goto(onSuccess); |
compiler.bind(undoStart); |
- compiler.copyRegister(_startRegister, NO_POSITION_REGISTER); |
+ compiler.copyRegister(firstRegister, NO_POSITION_REGISTER); |
compiler.backtrack(); |
compiler.bind(undoEnd); |
- compiler.copyRegister(_endRegister, NO_POSITION_REGISTER); |
+ compiler.copyRegister(lastRegister, NO_POSITION_REGISTER); |
compiler.backtrack(); |
} |
@@ -1423,6 +1470,8 @@ enum Token { |
nonCapturing, |
lookAhead, |
negativeLookAhead, |
+ lookBehind, |
+ negativeLookBehind, |
other |
} |
@@ -1500,24 +1549,28 @@ class MiniExpParser { |
} |
if (acceptToken(Token.wordBoundary)) return new WordBoundary(true); |
if (acceptToken(Token.notWordBoundary)) return new WordBoundary(false); |
- var lookaheadAst; |
+ var lookAst; |
if (acceptToken(Token.lookAhead)) { |
- lookaheadAst = new LookAhead(true, parseDisjunction(), _compiler); |
+ lookAst = new LookAhead(true, false, parseDisjunction(), _compiler); |
} else if (acceptToken(Token.negativeLookAhead)) { |
- lookaheadAst = new LookAhead(false, parseDisjunction(), _compiler); |
+ lookAst = new LookAhead(false, false, parseDisjunction(), _compiler); |
+ } else if (acceptToken(Token.lookBehind)) { |
+ lookAst = new LookAhead(true, true, parseDisjunction(), _compiler); |
+ } else if (acceptToken(Token.negativeLookBehind)) { |
+ lookAst = new LookAhead(false, true, parseDisjunction(), _compiler); |
} |
- if (lookaheadAst != null) { |
+ if (lookAst != null) { |
expectToken(Token.rParen); |
// The normal syntax does not allow a quantifier here, but the web |
// compatible one does. Slightly nasty hack for compatibility: |
if (peekToken(Token.quant)) { |
MiniExpAst quant = new Quantifier( |
_minimumRepeats, _maximumRepeats, _lastWasGreedy, |
- lookaheadAst, _compiler); |
+ lookAst, _compiler); |
expectToken(Token.quant); |
return quant; |
} |
- return lookaheadAst; |
+ return lookAst; |
} |
return null; |
} |
@@ -1936,6 +1989,17 @@ class MiniExpParser { |
_lastToken = Token.nonCapturing; |
} else if (parenthesisModifier == CHAR_CODE_BANG) { |
_lastToken = Token.negativeLookAhead; |
+ } else if (parenthesisModifier == CHAR_CODE_LESS_THAN) { |
+ if (!_has(_position + 3)) error("unterminated group"); |
+ int lookBehindModifier = _at(_position + 3); |
+ _position++; |
+ if (lookBehindModifier == CHAR_CODE_EQUALS) { |
+ _lastToken = Token.lookBehind; |
+ } else if (lookBehindModifier == CHAR_CODE_BANG) { |
+ _lastToken = Token.negativeLookBehind; |
+ } else { |
+ error("invalid group"); |
+ } |
} else { |
error("invalid group"); |
} |
@@ -2114,14 +2178,18 @@ class MiniExpInterpreter { |
} |
break; |
case BACKTRACK_IF_NO_MATCH: |
- if (subject.codeUnitAt(_registers[CURRENT_POSITION]) != |
+ case BACKTRACK_IF_PREVIOUS_NO_MATCH: |
+ int offset = byteCode - BACKTRACK_IF_NO_MATCH; |
+ if (subject.codeUnitAt(_registers[CURRENT_POSITION] - offset) != |
_constantPool.codeUnitAt(_byteCodes[programCounter++])) { |
_registers[CURRENT_POSITION] = stack[--stackPointer]; |
programCounter = stack[--stackPointer]; |
} |
break; |
case BACKTRACK_IF_IN_RANGE: |
- int code = subject.codeUnitAt(_registers[CURRENT_POSITION]); |
+ case BACKTRACK_IF_PREVIOUS_IN_RANGE: |
+ int offset = byteCode - BACKTRACK_IF_IN_RANGE; |
+ int code = subject.codeUnitAt(_registers[CURRENT_POSITION] - offset); |
int from = _byteCodes[programCounter++]; |
int to = _byteCodes[programCounter++]; |
if (from <= code && code <= to) { |
@@ -2130,13 +2198,17 @@ class MiniExpInterpreter { |
} |
break; |
case GOTO_IF_MATCH: |
- int code = subject.codeUnitAt(_registers[CURRENT_POSITION]); |
+ case GOTO_IF_PREVIOUS_MATCH: |
+ int offset = byteCode - GOTO_IF_MATCH; |
+ int code = subject.codeUnitAt(_registers[CURRENT_POSITION] - offset); |
int expected = _byteCodes[programCounter++]; |
int dest = _byteCodes[programCounter++]; |
if (code == expected) programCounter = dest; |
break; |
case GOTO_IF_IN_RANGE: |
- int code = subject.codeUnitAt(_registers[CURRENT_POSITION]); |
+ case GOTO_IF_PREVIOUS_IN_RANGE: |
+ int offset = byteCode - GOTO_IF_IN_RANGE; |
+ int code = subject.codeUnitAt(_registers[CURRENT_POSITION] - offset); |
int from = _byteCodes[programCounter++]; |
int to = _byteCodes[programCounter++]; |
int dest = _byteCodes[programCounter++]; |
@@ -2188,9 +2260,12 @@ class MiniExpInterpreter { |
stackPointer = _registers[STACK_POINTER]; |
break; |
case BACKTRACK_ON_BACK_REFERENCE: |
+ case BACKTRACK_ON_BACK_REFERENCE_BACKWARDS: |
int registerIndex = _byteCodes[programCounter++]; |
bool case_sensitive = _byteCodes[programCounter++] != 0; |
- if (!checkBackReference(subject, case_sensitive, registerIndex)) { |
+ if (!checkBackReference( |
+ subject, case_sensitive, registerIndex, |
+ byteCode == BACKTRACK_ON_BACK_REFERENCE_BACKWARDS)) { |
// Backtrack. |
_registers[CURRENT_POSITION] = stack[--stackPointer]; |
programCounter = stack[--stackPointer]; |
@@ -2212,13 +2287,18 @@ class MiniExpInterpreter { |
} |
bool checkBackReference( |
- String subject, bool caseSensitive, int registerIndex) { |
+ String subject, bool caseSensitive, int registerIndex, bool backwards) { |
int start = _registers[registerIndex]; |
int end = _registers[registerIndex + 1]; |
if (end == NO_POSITION) return true; |
int length = end - start; |
int currentPosition = _registers[CURRENT_POSITION]; |
- if (currentPosition + end - start > subject.length) return false; |
+ if (backwards) { |
+ if (currentPosition < length) return false; |
+ currentPosition -= length; |
+ } else { |
+ if (currentPosition + length > subject.length) return false; |
+ } |
for (int i = 0; i < length; i++) { |
int x = subject.codeUnitAt(start + i); |
int y = subject.codeUnitAt(currentPosition + i); |
@@ -2228,7 +2308,7 @@ class MiniExpInterpreter { |
} |
if (x != y) return false; |
} |
- _registers[CURRENT_POSITION] += length; |
+ _registers[CURRENT_POSITION] += backwards ? -length : length; |
return true; |
} |
} |