Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(7)

Unified Diff: src/interpreter-re2k.cc

Issue 9415: Add bytecodes and an interpreter for executing regular expressions. Very... (Closed) Base URL: http://v8.googlecode.com/svn/branches/experimental/regexp2000/
Patch Set: '' Created 12 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/interpreter-re2k.h ('k') | src/jsregexp.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/interpreter-re2k.cc
===================================================================
--- src/interpreter-re2k.cc (revision 0)
+++ src/interpreter-re2k.cc (revision 0)
@@ -0,0 +1,245 @@
+// Copyright 2008 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// A simple interpreter for the Regexp2000 byte code.
+
+
+#include "v8.h"
+#include "ast.h"
+#include "bytecodes-re2k.h"
+#include "interpreter-re2k.h"
+
+
+namespace v8 { namespace internal {
+
+
+static inline int Load32(const byte* pc) {
+#ifdef CAN_READ_UNALIGNED
+ return *reinterpret_cast<const int*>(pc);
+#else
+ union {
+ int word;
+ byte bytes[4];
+ } u;
+ u.bytes[0] = pc[0];
+ u.bytes[1] = pc[1];
+ u.bytes[2] = pc[2];
+ u.bytes[3] = pc[3];
+ return u.word;
+#endif
+}
+
+
+static inline int Load16(const byte* pc) {
+#ifdef CAN_READ_UNALIGNED
+ return *reinterpret_cast<const uc16*>(pc);
+#else
+ union {
+ uc16 word;
+ byte bytes[2];
+ } u;
+ u.bytes[0] = pc[0];
+ u.bytes[1] = pc[1];
+ return u.word;
+#endif
+}
+
+
+template <typename Char>
+static bool raw_match(const byte* code_base, Vector<const Char>subject, int* captures, int current) {
+ printf("raw match\n");
+ const byte* pc = code_base;
+ int backtrack_stack[1000];
+ int backtrack_stack_space = 1000;
+ int* backtrack_sp = backtrack_stack;
+ int current_char = -1;
+ while (true) {
+ switch (*pc) {
+ case BC_PUSH_CP:
+ if (--backtrack_stack_space < 0) {
+ return false; // No match on backtrack stack overflow.
+ }
+ *backtrack_sp++ = current + pc[1];
+ pc += 2;
+ break;
+ case BC_PUSH_CP_WIDE:
+ if (--backtrack_stack_space < 0) {
+ return false; // No match on backtrack stack overflow.
+ }
+ *backtrack_sp++ = current + Load32(pc + 1);
+ pc += 5;
+ break;
+ case BC_PUSH_BT:
+ if (--backtrack_stack_space < 0) {
+ return false; // No match on backtrack stack overflow.
+ }
+ *backtrack_sp++ = Load32(pc + 1);
+ pc += 5;
+ break;
+ case BC_PUSH_CAPTURE:
+ if (--backtrack_stack_space < 0) {
+ return false; // No match on backtrack stack overflow.
+ }
+ *backtrack_sp++ = captures[pc[1]];
+ pc += 2;
+ break;
+ case BC_SET_CAPTURE:
+ captures[pc[1]] = current + pc[2];
+ pc += 3;
+ break;
+ case BC_SET_CAPTURE_WIDE:
+ captures[pc[1]] = current + Load32(pc + 2);
+ pc += 6;
+ break;
+ case BC_POP_CP:
+ backtrack_stack_space++;
+ --backtrack_sp;
+ current = *backtrack_sp;
+ pc += 1;
+ break;
+ case BC_POP_BT:
+ backtrack_stack_space++;
+ --backtrack_sp;
+ pc = code_base + *backtrack_sp;
+ break;
+ case BC_POP_CAPTURE:
+ backtrack_stack_space++;
+ --backtrack_sp;
+ captures[pc[1]] = *backtrack_sp;
+ pc += 2;
+ break;
+ case BC_FAIL:
+ return false;
+ case BC_FAIL_IF_WITHIN:
+ if (current + pc[1] >= subject.length()) {
+ return false;
+ }
+ pc += 2;
+ break;
+ case BC_FAIL_IF_WITHIN_WIDE:
+ if (current + Load32(pc + 1) >= subject.length()) {
+ return false;
+ }
+ pc += 5;
+ break;
+ case BC_SUCCEED:
+ return true;
+ case BC_ADVANCE_CP:
+ current += pc[1];
+ pc += 2;
+ break;
+ case BC_ADVANCE_CP_WIDE:
+ current += Load32(pc + 1);
+ pc += 5;
+ break;
+ case BC_GOTO:
+ pc = code_base + Load32(pc + 1);
+ break;
+ case BC_LOAD_CURRENT_CHAR: {
+ int pos = current + pc[1];
+ if (pos >= subject.length()) {
+ current_char = -1;
+ } else {
+ current_char = subject[pos];
+ }
+ pc += 2;
+ break;
+ }
+ case BC_LOAD_CURRENT_CHAR_WIDE: {
+ int pos = current + Load32(pc + 1);
+ if (pos >= subject.length()) {
+ current_char = -1;
+ } else {
+ current_char = subject[pos];
+ }
+ pc += 5;
+ break;
+ }
+ case BC_CHECK_CHAR: {
+ int c = Load16(pc + 1);
+ if (c != current_char) {
+ pc = code_base + Load32(pc + 3);
+ } else {
+ pc += 7;
+ }
+ break;
+ }
+ case BC_CHECK_NOT_CHAR: {
+ int c = Load16(pc + 1);
+ if (c == current_char || current_char == -1) {
+ pc = code_base + Load32(pc + 3);
+ } else {
+ pc += 7;
+ }
+ break;
+ }
+ case BC_CHECK_RANGE: {
+ int start = Load16(pc + 1);
+ int end = Load16(pc + 3);
+ if (current_char >= start && current_char <= end) {
+ pc = code_base + Load32(pc + 5);
+ } else {
+ pc += 9;
+ }
+ break;
+ }
+ case BC_CHECK_NOT_RANGE: {
+ int start = Load16(pc + 1);
+ int end = Load16(pc + 3);
+ if (current_char < start || current_char > end || current_char == -1) {
+ pc = code_base + Load32(pc + 5);
+ } else {
+ pc += 9;
+ }
+ break;
+ }
+ case BC_CHECK_BACKREF:
+ case BC_CHECK_BACKREF_WIDE:
+ case BC_CHECK_NOT_BACKREF:
+ case BC_CHECK_NOT_BACKREF_WIDE:
+ case BC_CHECK_BITMAP:
+ case BC_CHECK_NOT_BITMAP:
+ default:
+ UNREACHABLE();
+ break;
+ }
+ }
+}
+
+
+bool Re2kInterpreter::Match(ByteArray* code_array, String* subject, int* captures, int start_position) {
+ const byte* code_base = code_array->GetDataStartAddress();
+ StringShape shape(subject);
+ ASSERT(subject->IsFlat(shape));
+ if (shape.IsAsciiRepresentation()) {
+ return raw_match(code_base, subject->ToAsciiVector(), captures, start_position);
+ } else {
+ return raw_match(code_base, subject->ToUC16Vector(), captures, start_position);
+ }
+}
+
+} } // namespace v8::internal
« no previous file with comments | « src/interpreter-re2k.h ('k') | src/jsregexp.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698