Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(4)

Side by Side Diff: src/interpreter-re2k.cc

Issue 10830: * We want to be able to find atoms and character classes without advancing th... (Closed) Base URL: http://v8.googlecode.com/svn/branches/experimental/regexp2000/
Patch Set: Created 12 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright 2008 the V8 project authors. All rights reserved. 1 // Copyright 2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
45 PrintF("pc = %d, current = %d, bc = " \ 45 PrintF("pc = %d, current = %d, bc = " \
46 #name "\n", pc - code_base, current); \ 46 #name "\n", pc - code_base, current); \
47 } 47 }
48 #else 48 #else
49 # define BYTECODE(name) break; \ 49 # define BYTECODE(name) break; \
50 case BC_##name: 50 case BC_##name:
51 #endif 51 #endif
52 52
53 53
54 54
55 template <typename Char>
56 static bool RawMatch(const byte* code_base, 55 static bool RawMatch(const byte* code_base,
57 Vector<const Char> subject, 56 Vector<const uc16> subject,
58 int* registers, 57 int* registers,
59 int current) { 58 int current) {
60 const byte* pc = code_base; 59 const byte* pc = code_base;
61 int backtrack_stack[1000]; 60 int backtrack_stack[1000];
62 int backtrack_stack_space = 1000; 61 int backtrack_stack_space = 1000;
63 int* backtrack_sp = backtrack_stack; 62 int* backtrack_sp = backtrack_stack;
64 int current_char = -1; 63 int current_char = -1;
65 #ifdef DEBUG 64 #ifdef DEBUG
66 if (FLAG_trace_regexp_bytecodes) { 65 if (FLAG_trace_regexp_bytecodes) {
67 PrintF("\n\nStart bytecode interpreter\n\n"); 66 PrintF("\n\nStart bytecode interpreter\n\n");
(...skipping 18 matching lines...) Expand all
86 pc += 5; 85 pc += 5;
87 BYTECODE(PUSH_REGISTER) 86 BYTECODE(PUSH_REGISTER)
88 if (--backtrack_stack_space < 0) { 87 if (--backtrack_stack_space < 0) {
89 return false; // No match on backtrack stack overflow. 88 return false; // No match on backtrack stack overflow.
90 } 89 }
91 *backtrack_sp++ = registers[pc[1]]; 90 *backtrack_sp++ = registers[pc[1]];
92 pc += 2; 91 pc += 2;
93 BYTECODE(SET_REGISTER) 92 BYTECODE(SET_REGISTER)
94 registers[pc[1]] = Load32(pc + 2); 93 registers[pc[1]] = Load32(pc + 2);
95 pc += 6; 94 pc += 6;
95 BYTECODE(ADVANCE_REGISTER)
96 registers[pc[1]] += Load32(pc + 2);
97 pc += 6;
96 BYTECODE(SET_REGISTER_TO_CP) 98 BYTECODE(SET_REGISTER_TO_CP)
97 registers[pc[1]] = current + Load32(pc + 2); 99 registers[pc[1]] = current + Load32(pc + 2);
98 pc += 6; 100 pc += 6;
99 BYTECODE(POP_CP) 101 BYTECODE(POP_CP)
100 backtrack_stack_space++; 102 backtrack_stack_space++;
101 --backtrack_sp; 103 --backtrack_sp;
102 current = *backtrack_sp; 104 current = *backtrack_sp;
103 pc += 1; 105 pc += 1;
104 BYTECODE(POP_BT) 106 BYTECODE(POP_BT)
105 backtrack_stack_space++; 107 backtrack_stack_space++;
106 --backtrack_sp; 108 --backtrack_sp;
107 pc = code_base + *backtrack_sp; 109 pc = code_base + *backtrack_sp;
108 BYTECODE(POP_REGISTER) 110 BYTECODE(POP_REGISTER)
109 backtrack_stack_space++; 111 backtrack_stack_space++;
110 --backtrack_sp; 112 --backtrack_sp;
111 registers[pc[1]] = *backtrack_sp; 113 registers[pc[1]] = *backtrack_sp;
112 pc += 2; 114 pc += 2;
113 BYTECODE(FAIL) 115 BYTECODE(FAIL)
114 return false; 116 return false;
115 BYTECODE(FAIL_IF_WITHIN)
116 if (current + Load32(pc + 1) >= subject.length()) {
117 return false;
118 }
119 pc += 5;
120 BYTECODE(SUCCEED) 117 BYTECODE(SUCCEED)
121 return true; 118 return true;
122 BYTECODE(ADVANCE_CP) 119 BYTECODE(ADVANCE_CP)
123 current += Load32(pc + 1); 120 current += Load32(pc + 1);
124 pc += 5; 121 pc += 5;
125 BYTECODE(GOTO) 122 BYTECODE(GOTO)
126 pc = code_base + Load32(pc + 1); 123 pc = code_base + Load32(pc + 1);
127 BYTECODE(LOAD_CURRENT_CHAR) { 124 BYTECODE(LOAD_CURRENT_CHAR) {
128 int pos = current + Load32(pc + 1); 125 int pos = current + Load32(pc + 1);
129 if (pos >= subject.length()) { 126 if (pos >= subject.length()) {
130 current_char = -1; 127 pc = code_base + Load32(pc + 5);
131 } else { 128 } else {
132 current_char = subject[pos]; 129 current_char = subject[pos];
130 pc += 9;
133 } 131 }
134 pc += 5;
135 } 132 }
136 BYTECODE(CHECK_CHAR) { 133 BYTECODE(CHECK_CHAR) {
137 int c = Load16(pc + 1); 134 int c = Load16(pc + 1);
138 if (c != current_char) { 135 if (c != current_char) {
139 pc = code_base + Load32(pc + 3); 136 pc = code_base + Load32(pc + 3);
140 } else { 137 } else {
141 pc += 7; 138 pc += 7;
142 } 139 }
143 } 140 }
144 BYTECODE(CHECK_NOT_CHAR) { 141 BYTECODE(CHECK_NOT_CHAR) {
145 int c = Load16(pc + 1); 142 int c = Load16(pc + 1);
146 if (c == current_char) { 143 if (c == current_char) {
147 pc = code_base + Load32(pc + 3); 144 pc = code_base + Load32(pc + 3);
148 } else { 145 } else {
149 pc += 7; 146 pc += 7;
150 } 147 }
151 } 148 }
152 BYTECODE(CHECK_END) {
153 if (current_char != -1) {
154 pc = code_base + Load32(pc + 1);
155 } else {
156 pc += 5;
157 }
158 }
159 BYTECODE(CHECK_NOT_END) {
160 if (current_char == -1) {
161 pc = code_base + Load32(pc + 1);
162 } else {
163 pc += 5;
164 }
165 }
166 BYTECODE(CHECK_RANGE) { 149 BYTECODE(CHECK_RANGE) {
167 int start = Load16(pc + 1); 150 int start = Load16(pc + 1);
168 int end = Load16(pc + 3); 151 int end = Load16(pc + 3);
169 if (current_char >= start && current_char <= end) { 152 if (current_char < start || current_char > end) {
170 pc = code_base + Load32(pc + 5); 153 pc = code_base + Load32(pc + 5);
171 } else { 154 } else {
172 pc += 9; 155 pc += 9;
173 } 156 }
174 } 157 }
175 BYTECODE(CHECK_NOT_RANGE) { 158 BYTECODE(CHECK_NOT_RANGE) {
176 int start = Load16(pc + 1); 159 int start = Load16(pc + 1);
177 int end = Load16(pc + 3); 160 int end = Load16(pc + 3);
178 if (current_char < start || current_char > end || current_char == -1) { 161 if (current_char >= start && current_char <= end) {
179 pc = code_base + Load32(pc + 5); 162 pc = code_base + Load32(pc + 5);
180 } else { 163 } else {
181 pc += 9; 164 pc += 9;
182 } 165 }
183 } 166 }
184 BYTECODE(CHECK_REGISTER_LT) 167 BYTECODE(CHECK_REGISTER_LT)
185 if (registers[pc[1]] < Load16(pc + 2)) { 168 if (registers[pc[1]] < Load16(pc + 2)) {
186 pc = code_base + Load32(pc + 4); 169 pc = code_base + Load32(pc + 4);
187 } else { 170 } else {
188 pc += 8; 171 pc += 8;
189 } 172 }
190 BYTECODE(CHECK_REGISTER_GE) 173 BYTECODE(CHECK_REGISTER_GE)
191 if (registers[pc[1]] >= Load16(pc + 2)) { 174 if (registers[pc[1]] >= Load16(pc + 2)) {
192 pc = code_base + Load32(pc + 4); 175 pc = code_base + Load32(pc + 4);
193 } else { 176 } else {
194 pc += 8; 177 pc += 8;
195 } 178 }
179 BYTECODE(LOOKUP_MAP1) {
180 // Look up character in a bitmap. If we find a 0, then jump to the
181 // location at pc + 7. Otherwise fall through!
182 int index = current_char - Load16(pc + 1);
183 byte map = code_base[Load32(pc + 3) + (index >> 3)];
184 map = ((map >> (index & 7)) & 1);
185 if (map == 0) {
186 pc = code_base + Load32(pc + 7);
187 } else {
188 pc += 11;
189 }
190 }
191 BYTECODE(LOOKUP_MAP2) {
192 // Look up character in a half-nibble map. If we find 00, then jump to
193 // the location at pc + 7. If we find 01 then jump to location at
194 // pc + 11, etc.
195 int index = (current_char - Load16(pc + 1)) << 1;
196 byte map = code_base[Load32(pc + 3) + (index >> 3)];
197 map = ((map >> (index & 7)) & 3);
198 if (map < 2) {
199 if (map == 0) {
200 pc = code_base + Load32(pc + 7);
201 } else {
202 pc = code_base + Load32(pc + 11);
203 }
204 } else {
205 if (map == 2) {
206 pc = code_base + Load32(pc + 15);
207 } else {
208 pc = code_base + Load32(pc + 19);
209 }
210 }
211 }
212 BYTECODE(LOOKUP_MAP8) {
213 // Look up character in a byte map. Use the byte as an index into a
214 // table that follows this instruction immediately.
215 int index = current_char - Load16(pc + 1);
216 byte map = code_base[Load32(pc + 3) + index];
217 const byte* new_pc = code_base + Load32(pc + 7) + (map << 2);
218 pc = code_base + Load32(new_pc);
219 }
220 BYTECODE(LOOKUP_HI_MAP8) {
221 // Look up high byte of this character in a byte map. Use the byte as
222 // an index into a table that follows this instruction immediately.
223 int index = (current_char >> 8) - pc[1];
224 byte map = code_base[Load32(pc + 2) + index];
225 const byte* new_pc = code_base + Load32(pc + 6) + (map << 2);
226 pc = code_base + Load32(new_pc);
227 }
196 BYTECODE(CHECK_BACKREF) 228 BYTECODE(CHECK_BACKREF)
197 UNREACHABLE(); 229 UNREACHABLE();
198 BYTECODE(CHECK_NOT_BACKREF) 230 BYTECODE(CHECK_NOT_BACKREF)
199 UNREACHABLE(); 231 UNREACHABLE();
200 BYTECODE(CHECK_BITMAP)
201 UNREACHABLE();
202 BYTECODE(CHECK_NOT_BITMAP)
203 UNREACHABLE();
204 break; // Last one doesn't have break in macro. 232 break; // Last one doesn't have break in macro.
205 default: 233 default:
206 UNREACHABLE(); 234 UNREACHABLE();
207 break; 235 break;
208 } 236 }
209 } 237 }
210 } 238 }
211 239
212 240
213 bool Re2kInterpreter::Match(ByteArray* code_array, 241 bool Re2kInterpreter::Match(Handle<ByteArray> code_array,
214 String* subject, 242 Handle<String> subject,
215 int* registers, 243 int* registers,
216 int start_position) { 244 int start_position) {
217 const byte* code_base = code_array->GetDataStartAddress(); 245 const byte* code_base = code_array->GetDataStartAddress();
218 StringShape shape(subject); 246 ASSERT(subject->IsFlat(StringShape(*subject)));
219 ASSERT(subject->IsFlat(shape)); 247 Handle<String> flat_two_byte = RegExpImpl::CachedStringToTwoByte(subject);
220 if (shape.IsAsciiRepresentation()) { 248 ASSERT(StringShape(*flat_two_byte).IsTwoByteRepresentation());
221 return RawMatch(code_base, 249 return RawMatch(code_base,
222 subject->ToAsciiVector(), 250 flat_two_byte->ToUC16Vector(),
223 registers, 251 registers,
224 start_position); 252 start_position);
225 } else {
226 return RawMatch(code_base,
227 subject->ToUC16Vector(),
228 registers,
229 start_position);
230 }
231 } 253 }
232 254
233 } } // namespace v8::internal 255 } } // namespace v8::internal
OLDNEW
« no previous file with comments | « src/interpreter-re2k.h ('k') | src/jsregexp.h » ('j') | src/regexp-macro-assembler.h » ('J')

Powered by Google App Engine
This is Rietveld 408576698