Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(819)

Unified Diff: lib/core/regexp.dart

Issue 1398033002: Implement lookbehind in regexp engine Base URL: git@github.com:dart-lang/fletch.git@master
Patch Set: Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | tests/unsorted/lookbehind_test.dart » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: lib/core/regexp.dart
diff --git a/lib/core/regexp.dart b/lib/core/regexp.dart
index 0f4b29404c1c41a0a8d3a164aa0681bb9de309bf..11d10cedc7165524e347f1c838ad644b15dcad0c 100644
--- a/lib/core/regexp.dart
+++ b/lib/core/regexp.dart
@@ -97,18 +97,23 @@ const BACKTRACK_EQ = 4; // reg reg
const BACKTRACK_NE = 5; // reg reg
const BACKTRACK_GT = 6; // reg reg
const BACKTRACK_IF_NO_MATCH = 7; // constant-pool-offset
-const BACKTRACK_IF_IN_RANGE = 8; // from to
-const GOTO_IF_MATCH = 9; // charCode label
-const GOTO_IF_IN_RANGE = 10; // from to label
-const GOTO_EQ = 11; // reg reg label
-const GOTO_GE = 12; // reg reg label
-const GOTO_IF_WORD_CHARACTER = 13; // position-offset label
-const ADD_TO_REGISTER = 14; // reg const
-const COPY_REGISTER = 15; // dest-reg source-reg
-const BACKTRACK_ON_BACK_REFERENCE = 16; // capture-reg
-const BACKTRACK = 17;
-const SUCCEED = 18;
-const FAIL = 19;
+const BACKTRACK_IF_PREVIOUS_NO_MATCH = 8; // constant-pool-offset
+const BACKTRACK_IF_IN_RANGE = 9; // from to
+const BACKTRACK_IF_PREVIOUS_IN_RANGE = 10; // from to
+const GOTO_IF_MATCH = 11; // charCode label
+const GOTO_IF_PREVIOUS_MATCH = 12; // charCode label
+const GOTO_IF_IN_RANGE = 13; // from to label
+const GOTO_IF_PREVIOUS_IN_RANGE = 14; // from to label
+const GOTO_EQ = 15; // reg reg label
+const GOTO_GE = 16; // reg reg label
+const GOTO_IF_WORD_CHARACTER = 17; // position-offset label
+const ADD_TO_REGISTER = 18; // reg const
+const COPY_REGISTER = 19; // dest-reg source-reg
+const BACKTRACK_ON_BACK_REFERENCE = 20; // capture-reg
+const BACKTRACK_ON_BACK_REFERENCE_BACKWARDS = 21; // capture-reg
+const BACKTRACK = 22;
+const SUCCEED = 23;
+const FAIL = 24;
// Format is name, number of register arguments, number of other arguments.
const BYTE_CODE_NAMES = const [
@@ -120,15 +125,20 @@ const BYTE_CODE_NAMES = const [
"BACKTRACK_NE", 2, 0,
"BACKTRACK_GT", 2, 0,
"BACKTRACK_IF_NO_MATCH", 0, 1,
+ "BACKTRACK_IF_PREVIOUS_NO_MATCH", 0, 1,
"BACKTRACK_IF_IN_RANGE", 0, 2,
+ "BACKTRACK_IF_PREVIOUS_IN_RANGE", 0, 2,
"GOTO_IF_MATCH", 0, 2,
+ "GOTO_IF_PREVIOUS_MATCH", 0, 2,
"GOTO_IF_IN_RANGE", 0, 3,
+ "GOTO_IF_PREVIOUS_IN_RANGE", 0, 3,
"GOTO_EQ", 2, 1,
"GOTO_GE", 2, 1,
"GOTO_IF_WORD_CHARACTER", 0, 2,
"ADD_TO_REGISTER", 1, 1,
"COPY_REGISTER", 2, 0,
"BACKTRACK_ON_BACK_REFERENCE", 1, 1,
+ "BACKTRACK_ON_BACK_REFERENCE_BACKWARDS", 1, 1,
"BACKTRACK", 0, 0,
"SUCCEED", 0, 0,
"FAIL", 0, 0];
@@ -150,6 +160,7 @@ const CHAR_CODE_0 = 48;
const CHAR_CODE_7 = 55;
const CHAR_CODE_9 = 57;
const CHAR_CODE_COLON = 58;
+const CHAR_CODE_LESS_THAN = 60;
const CHAR_CODE_EQUALS = 61;
const CHAR_CODE_QUERY = 63;
const CHAR_CODE_UPPER_A = 65;
@@ -200,6 +211,9 @@ class MiniExpCompiler {
final List<int> _extraConstants = new List<int>();
final List<BackReference> _backReferences = new List<BackReference>();
MiniExpLabel _pendingGoto;
+ bool stepBackwards = false;
+ int get oneIfBackwards => stepBackwards ? 1 : 0;
+ int get direction => stepBackwards ? -1 : 1;
MiniExpCompiler(this.pattern, this.caseSensitive) {
for (int i = 0; i < FIXED_REGISTERS; i++) {
@@ -209,6 +223,7 @@ class MiniExpCompiler {
List<int> get codes {
flushPendingGoto();
+ //disassemble(_codes);
return _codes;
}
@@ -379,7 +394,7 @@ class MiniExpCompiler {
}
void backtrackOnBackReferenceFail(int register, bool caseSensitive) {
- _emit(BACKTRACK_ON_BACK_REFERENCE,
+ _emit(BACKTRACK_ON_BACK_REFERENCE + oneIfBackwards,
registerNumber(register), caseSensitive ? 1 : 0);
}
@@ -393,15 +408,15 @@ class MiniExpCompiler {
}
void backtrackIfNoMatch(int constant_pool_offset) {
- _emit(BACKTRACK_IF_NO_MATCH, constant_pool_offset);
+ _emit(BACKTRACK_IF_NO_MATCH + oneIfBackwards, constant_pool_offset);
}
void backtrackIfInRange(int from, int to) {
- _emit(BACKTRACK_IF_IN_RANGE, from, to);
+ _emit(BACKTRACK_IF_IN_RANGE + oneIfBackwards, from, to);
}
void gotoIfMatches(int charCode, MiniExpLabel label) {
- _emit(GOTO_IF_MATCH, charCode);
+ _emit(GOTO_IF_MATCH + oneIfBackwards, charCode);
link(label);
}
@@ -409,7 +424,7 @@ class MiniExpCompiler {
if (from == to) {
gotoIfMatches(from, label);
} else {
- _emit(GOTO_IF_IN_RANGE, from, to);
+ _emit(GOTO_IF_IN_RANGE + oneIfBackwards, from, to);
link(label);
}
}
@@ -510,10 +525,11 @@ class MiniExpAnalysis {
anchored = true,
registersToSave = null;
- MiniExpAnalysis.lookahead(MiniExpAnalysis bodyAnalysis, bool positive)
+ MiniExpAnalysis.lookahead(
+ MiniExpAnalysis bodyAnalysis, bool positive, bool behind)
: canMatchEmpty = true,
fixedLength = 0,
- anchored = positive && bodyAnalysis.anchored,
+ anchored = positive && !behind && bodyAnalysis.anchored,
registersToSave = bodyAnalysis.registersToSave;
MiniExpAnalysis.quantifier(
@@ -601,8 +617,11 @@ class Alternative extends MiniExpAst {
Alternative(this._left, this._right);
void generate(MiniExpCompiler compiler, MiniExpLabel onSuccess) {
- compiler.generate(_left, _right.label);
- compiler.generate(_right, onSuccess);
+ MiniExpAst first = compiler.stepBackwards ? _right : _left;
+ MiniExpAst second = compiler.stepBackwards ? _left : _right;
+
+ compiler.generate(first, second.label);
+ compiler.generate(second, onSuccess);
}
MiniExpAnalysis analyze(MiniExpCompiler compiler) {
@@ -690,13 +709,15 @@ class WordBoundary extends Assertion {
class LookAhead extends Assertion {
final bool _positive;
+ final bool _behind;
final MiniExpAst _body;
List<int> _subtreeRegisters;
int _savedStackPointerRegister;
int _savedPosition;
- LookAhead(this._positive, this._body, MiniExpCompiler compiler) {
+ LookAhead(
+ this._positive, this._behind, this._body, MiniExpCompiler compiler) {
_savedStackPointerRegister = compiler.allocateWorkingRegister();
_savedPosition = compiler.allocateWorkingRegister();
}
@@ -714,7 +735,10 @@ class LookAhead extends Assertion {
if (!_positive) {
compiler.pushBacktrack(succeed_on_failure);
}
+ bool stepBackwards = compiler.stepBackwards;
+ compiler.stepBackwards = _behind;
compiler.generate(_body, body_succeeded);
+ compiler.stepBackwards = stepBackwards;
compiler.bind(body_succeeded);
compiler.copyRegister(STACK_POINTER, _savedStackPointerRegister);
@@ -755,7 +779,7 @@ class LookAhead extends Assertion {
MiniExpAnalysis analyze(compiler) {
MiniExpAnalysis bodyAnalysis = _body.analyze(compiler);
_subtreeRegisters = bodyAnalysis.registersToSave;
- return new MiniExpAnalysis.lookahead(bodyAnalysis, _positive);
+ return new MiniExpAnalysis.lookahead(bodyAnalysis, _positive, _behind);
}
}
@@ -848,7 +872,8 @@ class Quantifier extends MiniExpAst {
// unwound enough.
compiler.copyRegister(_optimizedGreedyRegister, CURRENT_POSITION);
if (_min != 0) {
- compiler.addToRegister(_optimizedGreedyRegister, _min * _bodyLength);
+ int offset = compiler.direction * _bodyLength;
+ compiler.addToRegister(_optimizedGreedyRegister, _min * offset);
}
// This backtrack doesn't trigger until the quantifier has eaten as much as
@@ -871,23 +896,35 @@ class Quantifier extends MiniExpAst {
compiler.bind(cantAdvanceMore);
if (_min != 0) {
- compiler.backtrackIfGreater(
- _optimizedGreedyRegister, _savePositionRegister);
+ if (compiler.stepBackwards) {
+ compiler.backtrackIfGreater(
+ _savePositionRegister, _optimizedGreedyRegister);
+ } else {
+ compiler.backtrackIfGreater(
+ _optimizedGreedyRegister, _savePositionRegister);
+ }
}
- compiler.addToRegister(_savePositionRegister, _bodyLength);
+ compiler.addToRegister(
+ _savePositionRegister, compiler.direction * _bodyLength);
compiler.copyRegister(CURRENT_POSITION, _savePositionRegister);
// The continuation of the regexp failed. We backtrack the greedy
// quantifier by one step and retry.
compiler.bind(continuationFailed);
- compiler.addToRegister(CURRENT_POSITION, -_bodyLength);
+ compiler.addToRegister(
+ CURRENT_POSITION, compiler.direction * -_bodyLength);
// If we got back to where the quantifier started matching, then jump
// to the continuation (we haven't pushed a backtrack, so if that fails, it
// will backtrack further).
// We don't have gotoIfEqual, so use gotoIfGreaterEqual.
- compiler.gotoIfGreaterEqual(
- _optimizedGreedyRegister, CURRENT_POSITION, onSuccess);
+ if (compiler.stepBackwards) {
+ compiler.gotoIfGreaterEqual(
+ CURRENT_POSITION, _optimizedGreedyRegister, onSuccess);
+ } else {
+ compiler.gotoIfGreaterEqual(
+ _optimizedGreedyRegister, CURRENT_POSITION, onSuccess);
+ }
compiler.pushBacktrack(continuationFailed);
compiler.goto(onSuccess);
}
@@ -1002,7 +1039,11 @@ class Atom extends MiniExpAst {
Atom(this._constantIndex);
void generate(MiniExpCompiler compiler, MiniExpLabel onSuccess) {
- compiler.backtrackIfEqual(CURRENT_POSITION, STRING_LENGTH);
+ if (compiler.stepBackwards) {
+ compiler.backtrackIfEqual(CURRENT_POSITION, 0);
+ } else {
+ compiler.backtrackIfEqual(CURRENT_POSITION, STRING_LENGTH);
+ }
MiniExpLabel match;
int charCode = compiler.constantPoolEntry(_constantIndex);
if (!compiler.caseSensitive) {
@@ -1017,7 +1058,7 @@ class Atom extends MiniExpAst {
}
compiler.backtrackIfNoMatch(_constantIndex);
if (match != null) compiler.bind(match);
- compiler.addToRegister(CURRENT_POSITION, 1);
+ compiler.addToRegister(CURRENT_POSITION, compiler.direction);
compiler.goto(onSuccess);
}
@@ -1119,7 +1160,11 @@ class CharClass extends MiniExpAst {
}
void generate(MiniExpCompiler compiler, MiniExpLabel onSuccess) {
- compiler.backtrackIfEqual(CURRENT_POSITION, STRING_LENGTH);
+ if (compiler.stepBackwards) {
+ compiler.backtrackIfEqual(CURRENT_POSITION, 0);
+ } else {
+ compiler.backtrackIfEqual(CURRENT_POSITION, STRING_LENGTH);
+ }
List<int> ranges = _ranges;
if (!compiler.caseSensitive) {
ranges = caseInsensitiveRanges(_ranges);
@@ -1136,7 +1181,7 @@ class CharClass extends MiniExpAst {
compiler.backtrackIfInRange(ranges[i], ranges[i + 1]);
}
}
- compiler.addToRegister(CURRENT_POSITION, 1);
+ compiler.addToRegister(CURRENT_POSITION, compiler.direction);
compiler.goto(onSuccess);
}
@@ -1198,22 +1243,24 @@ class Capture extends MiniExpAst {
MiniExpLabel undoStart = new MiniExpLabel();
MiniExpLabel writeEnd = new MiniExpLabel();
MiniExpLabel undoEnd = new MiniExpLabel();
- compiler.copyRegister(_startRegister, CURRENT_POSITION);
+ int firstRegister = compiler.stepBackwards ? _endRegister : _startRegister;
+ int lastRegister = compiler.stepBackwards ? _startRegister : _endRegister;
+ compiler.copyRegister(firstRegister, CURRENT_POSITION);
compiler.pushBacktrack(undoStart);
compiler.generate(_body, writeEnd);
compiler.bind(writeEnd);
- compiler.copyRegister(_endRegister, CURRENT_POSITION);
+ compiler.copyRegister(lastRegister, CURRENT_POSITION);
compiler.pushBacktrack(undoEnd);
compiler.goto(onSuccess);
compiler.bind(undoStart);
- compiler.copyRegister(_startRegister, NO_POSITION_REGISTER);
+ compiler.copyRegister(firstRegister, NO_POSITION_REGISTER);
compiler.backtrack();
compiler.bind(undoEnd);
- compiler.copyRegister(_endRegister, NO_POSITION_REGISTER);
+ compiler.copyRegister(lastRegister, NO_POSITION_REGISTER);
compiler.backtrack();
}
@@ -1423,6 +1470,8 @@ enum Token {
nonCapturing,
lookAhead,
negativeLookAhead,
+ lookBehind,
+ negativeLookBehind,
other
}
@@ -1500,24 +1549,28 @@ class MiniExpParser {
}
if (acceptToken(Token.wordBoundary)) return new WordBoundary(true);
if (acceptToken(Token.notWordBoundary)) return new WordBoundary(false);
- var lookaheadAst;
+ var lookAst;
if (acceptToken(Token.lookAhead)) {
- lookaheadAst = new LookAhead(true, parseDisjunction(), _compiler);
+ lookAst = new LookAhead(true, false, parseDisjunction(), _compiler);
} else if (acceptToken(Token.negativeLookAhead)) {
- lookaheadAst = new LookAhead(false, parseDisjunction(), _compiler);
+ lookAst = new LookAhead(false, false, parseDisjunction(), _compiler);
+ } else if (acceptToken(Token.lookBehind)) {
+ lookAst = new LookAhead(true, true, parseDisjunction(), _compiler);
+ } else if (acceptToken(Token.negativeLookBehind)) {
+ lookAst = new LookAhead(false, true, parseDisjunction(), _compiler);
}
- if (lookaheadAst != null) {
+ if (lookAst != null) {
expectToken(Token.rParen);
// The normal syntax does not allow a quantifier here, but the web
// compatible one does. Slightly nasty hack for compatibility:
if (peekToken(Token.quant)) {
MiniExpAst quant = new Quantifier(
_minimumRepeats, _maximumRepeats, _lastWasGreedy,
- lookaheadAst, _compiler);
+ lookAst, _compiler);
expectToken(Token.quant);
return quant;
}
- return lookaheadAst;
+ return lookAst;
}
return null;
}
@@ -1936,6 +1989,17 @@ class MiniExpParser {
_lastToken = Token.nonCapturing;
} else if (parenthesisModifier == CHAR_CODE_BANG) {
_lastToken = Token.negativeLookAhead;
+ } else if (parenthesisModifier == CHAR_CODE_LESS_THAN) {
+ if (!_has(_position + 3)) error("unterminated group");
+ int lookBehindModifier = _at(_position + 3);
+ _position++;
+ if (lookBehindModifier == CHAR_CODE_EQUALS) {
+ _lastToken = Token.lookBehind;
+ } else if (lookBehindModifier == CHAR_CODE_BANG) {
+ _lastToken = Token.negativeLookBehind;
+ } else {
+ error("invalid group");
+ }
} else {
error("invalid group");
}
@@ -2114,14 +2178,18 @@ class MiniExpInterpreter {
}
break;
case BACKTRACK_IF_NO_MATCH:
- if (subject.codeUnitAt(_registers[CURRENT_POSITION]) !=
+ case BACKTRACK_IF_PREVIOUS_NO_MATCH:
+ int offset = byteCode - BACKTRACK_IF_NO_MATCH;
+ if (subject.codeUnitAt(_registers[CURRENT_POSITION] - offset) !=
_constantPool.codeUnitAt(_byteCodes[programCounter++])) {
_registers[CURRENT_POSITION] = stack[--stackPointer];
programCounter = stack[--stackPointer];
}
break;
case BACKTRACK_IF_IN_RANGE:
- int code = subject.codeUnitAt(_registers[CURRENT_POSITION]);
+ case BACKTRACK_IF_PREVIOUS_IN_RANGE:
+ int offset = byteCode - BACKTRACK_IF_IN_RANGE;
+ int code = subject.codeUnitAt(_registers[CURRENT_POSITION] - offset);
int from = _byteCodes[programCounter++];
int to = _byteCodes[programCounter++];
if (from <= code && code <= to) {
@@ -2130,13 +2198,17 @@ class MiniExpInterpreter {
}
break;
case GOTO_IF_MATCH:
- int code = subject.codeUnitAt(_registers[CURRENT_POSITION]);
+ case GOTO_IF_PREVIOUS_MATCH:
+ int offset = byteCode - GOTO_IF_MATCH;
+ int code = subject.codeUnitAt(_registers[CURRENT_POSITION] - offset);
int expected = _byteCodes[programCounter++];
int dest = _byteCodes[programCounter++];
if (code == expected) programCounter = dest;
break;
case GOTO_IF_IN_RANGE:
- int code = subject.codeUnitAt(_registers[CURRENT_POSITION]);
+ case GOTO_IF_PREVIOUS_IN_RANGE:
+ int offset = byteCode - GOTO_IF_IN_RANGE;
+ int code = subject.codeUnitAt(_registers[CURRENT_POSITION] - offset);
int from = _byteCodes[programCounter++];
int to = _byteCodes[programCounter++];
int dest = _byteCodes[programCounter++];
@@ -2188,9 +2260,12 @@ class MiniExpInterpreter {
stackPointer = _registers[STACK_POINTER];
break;
case BACKTRACK_ON_BACK_REFERENCE:
+ case BACKTRACK_ON_BACK_REFERENCE_BACKWARDS:
int registerIndex = _byteCodes[programCounter++];
bool case_sensitive = _byteCodes[programCounter++] != 0;
- if (!checkBackReference(subject, case_sensitive, registerIndex)) {
+ if (!checkBackReference(
+ subject, case_sensitive, registerIndex,
+ byteCode == BACKTRACK_ON_BACK_REFERENCE_BACKWARDS)) {
// Backtrack.
_registers[CURRENT_POSITION] = stack[--stackPointer];
programCounter = stack[--stackPointer];
@@ -2212,13 +2287,18 @@ class MiniExpInterpreter {
}
bool checkBackReference(
- String subject, bool caseSensitive, int registerIndex) {
+ String subject, bool caseSensitive, int registerIndex, bool backwards) {
int start = _registers[registerIndex];
int end = _registers[registerIndex + 1];
if (end == NO_POSITION) return true;
int length = end - start;
int currentPosition = _registers[CURRENT_POSITION];
- if (currentPosition + end - start > subject.length) return false;
+ if (backwards) {
+ if (currentPosition < length) return false;
+ currentPosition -= length;
+ } else {
+ if (currentPosition + length > subject.length) return false;
+ }
for (int i = 0; i < length; i++) {
int x = subject.codeUnitAt(start + i);
int y = subject.codeUnitAt(currentPosition + i);
@@ -2228,7 +2308,7 @@ class MiniExpInterpreter {
}
if (x != y) return false;
}
- _registers[CURRENT_POSITION] += length;
+ _registers[CURRENT_POSITION] += backwards ? -length : length;
return true;
}
}
« no previous file with comments | « no previous file | tests/unsorted/lookbehind_test.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698