Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1139)

Unified Diff: src/interpreter-re2k.cc

Issue 10830: * We want to be able to find atoms and character classes without advancing th... (Closed) Base URL: http://v8.googlecode.com/svn/branches/experimental/regexp2000/
Patch Set: Created 12 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: src/interpreter-re2k.cc
===================================================================
--- src/interpreter-re2k.cc (revision 738)
+++ src/interpreter-re2k.cc (working copy)
@@ -52,9 +52,8 @@
-template <typename Char>
static bool RawMatch(const byte* code_base,
- Vector<const Char> subject,
+ Vector<const uc16> subject,
int* registers,
int current) {
const byte* pc = code_base;
@@ -93,6 +92,9 @@
BYTECODE(SET_REGISTER)
registers[pc[1]] = Load32(pc + 2);
pc += 6;
+ BYTECODE(ADVANCE_REGISTER)
+ registers[pc[1]] += Load32(pc + 2);
+ pc += 6;
BYTECODE(SET_REGISTER_TO_CP)
registers[pc[1]] = current + Load32(pc + 2);
pc += 6;
@@ -112,11 +114,6 @@
pc += 2;
BYTECODE(FAIL)
return false;
- BYTECODE(FAIL_IF_WITHIN)
- if (current + Load32(pc + 1) >= subject.length()) {
- return false;
- }
- pc += 5;
BYTECODE(SUCCEED)
return true;
BYTECODE(ADVANCE_CP)
@@ -127,11 +124,11 @@
BYTECODE(LOAD_CURRENT_CHAR) {
int pos = current + Load32(pc + 1);
if (pos >= subject.length()) {
- current_char = -1;
+ pc = code_base + Load32(pc + 5);
} else {
current_char = subject[pos];
+ pc += 9;
}
- pc += 5;
}
BYTECODE(CHECK_CHAR) {
int c = Load16(pc + 1);
@@ -149,24 +146,10 @@
pc += 7;
}
}
- BYTECODE(CHECK_END) {
- if (current_char != -1) {
- pc = code_base + Load32(pc + 1);
- } else {
- pc += 5;
- }
- }
- BYTECODE(CHECK_NOT_END) {
- if (current_char == -1) {
- pc = code_base + Load32(pc + 1);
- } else {
- pc += 5;
- }
- }
BYTECODE(CHECK_RANGE) {
int start = Load16(pc + 1);
int end = Load16(pc + 3);
- if (current_char >= start && current_char <= end) {
+ if (current_char < start || current_char > end) {
pc = code_base + Load32(pc + 5);
} else {
pc += 9;
@@ -175,7 +158,7 @@
BYTECODE(CHECK_NOT_RANGE) {
int start = Load16(pc + 1);
int end = Load16(pc + 3);
- if (current_char < start || current_char > end || current_char == -1) {
+ if (current_char >= start && current_char <= end) {
pc = code_base + Load32(pc + 5);
} else {
pc += 9;
@@ -193,14 +176,59 @@
} else {
pc += 8;
}
+ BYTECODE(LOOKUP_MAP1) {
+ // Look up character in a bitmap. If we find a 0, then jump to the
+ // location at pc + 7. Otherwise fall through!
+ int index = current_char - Load16(pc + 1);
+ byte map = code_base[Load32(pc + 3) + (index >> 3)];
+ map = ((map >> (index & 7)) & 1);
+ if (map == 0) {
+ pc = code_base + Load32(pc + 7);
+ } else {
+ pc += 11;
+ }
+ }
+ BYTECODE(LOOKUP_MAP2) {
+ // Look up character in a half-nibble map. If we find 00, then jump to
+ // the location at pc + 7. If we find 01 then jump to location at
+ // pc + 11, etc.
+ int index = (current_char - Load16(pc + 1)) << 1;
+ byte map = code_base[Load32(pc + 3) + (index >> 3)];
+ map = ((map >> (index & 7)) & 3);
+ if (map < 2) {
+ if (map == 0) {
+ pc = code_base + Load32(pc + 7);
+ } else {
+ pc = code_base + Load32(pc + 11);
+ }
+ } else {
+ if (map == 2) {
+ pc = code_base + Load32(pc + 15);
+ } else {
+ pc = code_base + Load32(pc + 19);
+ }
+ }
+ }
+ BYTECODE(LOOKUP_MAP8) {
+ // Look up character in a byte map. Use the byte as an index into a
+ // table that follows this instruction immediately.
+ int index = current_char - Load16(pc + 1);
+ byte map = code_base[Load32(pc + 3) + index];
+ const byte* new_pc = code_base + Load32(pc + 7) + (map << 2);
+ pc = code_base + Load32(new_pc);
+ }
+ BYTECODE(LOOKUP_HI_MAP8) {
+ // Look up high byte of this character in a byte map. Use the byte as
+ // an index into a table that follows this instruction immediately.
+ int index = (current_char >> 8) - pc[1];
+ byte map = code_base[Load32(pc + 2) + index];
+ const byte* new_pc = code_base + Load32(pc + 6) + (map << 2);
+ pc = code_base + Load32(new_pc);
+ }
BYTECODE(CHECK_BACKREF)
UNREACHABLE();
BYTECODE(CHECK_NOT_BACKREF)
UNREACHABLE();
- BYTECODE(CHECK_BITMAP)
- UNREACHABLE();
- BYTECODE(CHECK_NOT_BITMAP)
- UNREACHABLE();
break; // Last one doesn't have break in macro.
default:
UNREACHABLE();
@@ -210,24 +238,18 @@
}
-bool Re2kInterpreter::Match(ByteArray* code_array,
- String* subject,
+bool Re2kInterpreter::Match(Handle<ByteArray> code_array,
+ Handle<String> subject,
int* registers,
int start_position) {
const byte* code_base = code_array->GetDataStartAddress();
- StringShape shape(subject);
- ASSERT(subject->IsFlat(shape));
- if (shape.IsAsciiRepresentation()) {
- return RawMatch(code_base,
- subject->ToAsciiVector(),
- registers,
- start_position);
- } else {
- return RawMatch(code_base,
- subject->ToUC16Vector(),
- registers,
- start_position);
- }
+ ASSERT(subject->IsFlat(StringShape(*subject)));
+ Handle<String> flat_two_byte = RegExpImpl::CachedStringToTwoByte(subject);
+ ASSERT(StringShape(*flat_two_byte).IsTwoByteRepresentation());
+ return RawMatch(code_base,
+ flat_two_byte->ToUC16Vector(),
+ registers,
+ start_position);
}
} } // namespace v8::internal
« no previous file with comments | « src/interpreter-re2k.h ('k') | src/jsregexp.h » ('j') | src/regexp-macro-assembler.h » ('J')

Powered by Google App Engine
This is Rietveld 408576698