OLD | NEW |
---|---|
(Empty) | |
1 // Copyright 2008 the V8 project authors. All rights reserved. | |
2 // Redistribution and use in source and binary forms, with or without | |
3 // modification, are permitted provided that the following conditions are | |
4 // met: | |
5 // | |
6 // * Redistributions of source code must retain the above copyright | |
7 // notice, this list of conditions and the following disclaimer. | |
8 // * Redistributions in binary form must reproduce the above | |
9 // copyright notice, this list of conditions and the following | |
10 // disclaimer in the documentation and/or other materials provided | |
11 // with the distribution. | |
12 // * Neither the name of Google Inc. nor the names of its | |
13 // contributors may be used to endorse or promote products derived | |
14 // from this software without specific prior written permission. | |
15 // | |
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
27 | |
28 // A simple interpreter for the Irregexp byte code. | |
29 | |
30 | |
31 #include "v8.h" | |
32 #include "utils.h" | |
33 #include "ast.h" | |
34 #include "bytecodes-irregexp.h" | |
35 #include "interpreter-irregexp.h" | |
36 | |
37 | |
38 namespace v8 { namespace internal { | |
39 | |
40 | |
41 #ifdef DEBUG | |
42 static void TraceInterpreter(const byte* code_base, | |
43 const byte* pc, | |
44 int stack_depth, | |
45 int current_position, | |
46 int bytecode_length, | |
47 const char* bytecode_name) { | |
48 if (FLAG_trace_regexp_bytecodes) { | |
49 PrintF("pc = %02x, sp = %d, current = %d, bc = %s", | |
50 pc - code_base, | |
Mads Ager (chromium)
2008/11/25 21:09:41
Indentation one space off for all arguments except
Erik Corry
2008/11/26 12:18:36
Fixed.
| |
51 stack_depth, | |
52 current_position, | |
53 bytecode_name); | |
54 for (int i = 1; i < bytecode_length; i++) { | |
55 printf(", %02x", pc[i]); | |
56 } | |
57 printf("\n"); | |
58 } | |
59 } | |
60 | |
61 | |
62 # define BYTECODE(name) case BC_##name: \ | |
Mads Ager (chromium)
2008/11/25 21:09:41
No space between # and define.
The indentation se
Erik Corry
2008/11/26 12:18:36
I like indenting #if in an analogous way to the in
| |
63 TraceInterpreter(code_base, \ | |
64 pc, \ | |
65 backtrack_sp - backtrack_stack, \ | |
66 current, \ | |
67 BC_##name##_LENGTH, \ | |
68 #name); | |
69 #else | |
70 # define BYTECODE(name) case BC_##name: // NOLINT | |
Mads Ager (chromium)
2008/11/25 21:09:41
Remove space between # and define.
Erik Corry
2008/11/26 12:18:36
Fixed.
| |
71 #endif | |
72 | |
73 | |
74 | |
75 static bool RawMatch(const byte* code_base, | |
76 Vector<const uc16> subject, | |
77 int* registers, | |
78 int current) { | |
79 const byte* pc = code_base; | |
80 static const int kBacktrackStackSize = 10000; | |
81 int backtrack_stack[kBacktrackStackSize]; | |
82 int backtrack_stack_space = kBacktrackStackSize; | |
83 int* backtrack_sp = backtrack_stack; | |
84 int current_char = -1; | |
85 #ifdef DEBUG | |
86 if (FLAG_trace_regexp_bytecodes) { | |
87 PrintF("\n\nStart bytecode interpreter\n\n"); | |
88 } | |
89 #endif | |
90 while (true) { | |
91 switch (*pc) { | |
92 BYTECODE(BREAK) | |
93 UNREACHABLE(); | |
94 return false; | |
95 BYTECODE(PUSH_CP) | |
96 if (--backtrack_stack_space < 0) { | |
97 return false; // No match on backtrack stack overflow. | |
98 } | |
99 *backtrack_sp++ = current + Load32(pc + 1); | |
100 pc += BC_PUSH_CP_LENGTH; | |
101 break; | |
102 BYTECODE(PUSH_BT) | |
103 if (--backtrack_stack_space < 0) { | |
104 return false; // No match on backtrack stack overflow. | |
105 } | |
106 *backtrack_sp++ = Load32(pc + 1); | |
107 pc += BC_PUSH_BT_LENGTH; | |
108 break; | |
109 BYTECODE(PUSH_REGISTER) | |
110 if (--backtrack_stack_space < 0) { | |
111 return false; // No match on backtrack stack overflow. | |
112 } | |
113 *backtrack_sp++ = registers[pc[1]]; | |
114 pc += BC_PUSH_REGISTER_LENGTH; | |
115 break; | |
116 BYTECODE(SET_REGISTER) | |
117 registers[pc[1]] = Load32(pc + 2); | |
118 pc += BC_SET_REGISTER_LENGTH; | |
119 break; | |
120 BYTECODE(ADVANCE_REGISTER) | |
121 registers[pc[1]] += Load32(pc + 2); | |
122 pc += BC_ADVANCE_REGISTER_LENGTH; | |
123 break; | |
124 BYTECODE(SET_REGISTER_TO_CP) | |
125 registers[pc[1]] = current + Load32(pc + 2); | |
126 pc += BC_SET_REGISTER_TO_CP_LENGTH; | |
127 break; | |
128 BYTECODE(SET_CP_TO_REGISTER) | |
129 current = registers[pc[1]]; | |
130 pc += BC_SET_CP_TO_REGISTER_LENGTH; | |
131 break; | |
132 BYTECODE(SET_REGISTER_TO_SP) | |
133 registers[pc[1]] = backtrack_sp - backtrack_stack; | |
134 pc += BC_SET_REGISTER_TO_SP_LENGTH; | |
135 break; | |
136 BYTECODE(SET_SP_TO_REGISTER) | |
137 backtrack_sp = backtrack_stack + registers[pc[1]]; | |
138 backtrack_stack_space = kBacktrackStackSize - | |
139 (backtrack_sp - backtrack_stack); | |
140 pc += BC_SET_SP_TO_REGISTER_LENGTH; | |
141 break; | |
142 BYTECODE(POP_CP) | |
143 backtrack_stack_space++; | |
144 --backtrack_sp; | |
145 current = *backtrack_sp; | |
146 pc += BC_POP_CP_LENGTH; | |
147 break; | |
148 BYTECODE(POP_BT) | |
149 backtrack_stack_space++; | |
150 --backtrack_sp; | |
151 pc = code_base + *backtrack_sp; | |
152 break; | |
153 BYTECODE(POP_REGISTER) | |
154 backtrack_stack_space++; | |
155 --backtrack_sp; | |
156 registers[pc[1]] = *backtrack_sp; | |
157 pc += BC_POP_REGISTER_LENGTH; | |
158 break; | |
159 BYTECODE(FAIL) | |
160 return false; | |
161 BYTECODE(SUCCEED) | |
162 return true; | |
163 BYTECODE(ADVANCE_CP) | |
164 current += Load32(pc + 1); | |
165 pc += BC_ADVANCE_CP_LENGTH; | |
166 break; | |
167 BYTECODE(GOTO) | |
168 pc = code_base + Load32(pc + 1); | |
169 break; | |
170 BYTECODE(LOAD_CURRENT_CHAR) { | |
171 int pos = current + Load32(pc + 1); | |
172 if (pos >= subject.length()) { | |
173 pc = code_base + Load32(pc + 5); | |
174 } else { | |
175 current_char = subject[pos]; | |
176 pc += BC_LOAD_CURRENT_CHAR_LENGTH; | |
177 } | |
178 break; | |
179 } | |
180 BYTECODE(CHECK_CHAR) { | |
181 int c = Load16(pc + 1); | |
182 if (c == current_char) { | |
183 pc = code_base + Load32(pc + 3); | |
184 } else { | |
185 pc += BC_CHECK_CHAR_LENGTH; | |
186 } | |
187 break; | |
188 } | |
189 BYTECODE(CHECK_NOT_CHAR) { | |
190 int c = Load16(pc + 1); | |
191 if (c != current_char) { | |
192 pc = code_base + Load32(pc + 3); | |
193 } else { | |
194 pc += BC_CHECK_NOT_CHAR_LENGTH; | |
195 } | |
196 break; | |
197 } | |
198 BYTECODE(OR_CHECK_NOT_CHAR) { | |
199 int c = Load16(pc + 1); | |
200 if (c != (current_char | Load16(pc + 3))) { | |
201 pc = code_base + Load32(pc + 5); | |
202 } else { | |
203 pc += BC_OR_CHECK_NOT_CHAR_LENGTH; | |
204 } | |
205 break; | |
206 } | |
207 BYTECODE(MINUS_OR_CHECK_NOT_CHAR) { | |
208 int c = Load16(pc + 1); | |
209 int m = Load16(pc + 3); | |
210 if (c != ((current_char - m) | m)) { | |
211 pc = code_base + Load32(pc + 5); | |
212 } else { | |
213 pc += BC_MINUS_OR_CHECK_NOT_CHAR_LENGTH; | |
214 } | |
215 break; | |
216 } | |
217 BYTECODE(CHECK_LT) { | |
218 int limit = Load16(pc + 1); | |
219 if (current_char < limit) { | |
220 pc = code_base + Load32(pc + 3); | |
221 } else { | |
222 pc += BC_CHECK_LT_LENGTH; | |
223 } | |
224 break; | |
225 } | |
226 BYTECODE(CHECK_GT) { | |
227 int limit = Load16(pc + 1); | |
228 if (current_char > limit) { | |
229 pc = code_base + Load32(pc + 3); | |
230 } else { | |
231 pc += BC_CHECK_GT_LENGTH; | |
232 } | |
233 break; | |
234 } | |
235 BYTECODE(CHECK_REGISTER_LT) | |
236 if (registers[pc[1]] < Load16(pc + 2)) { | |
237 pc = code_base + Load32(pc + 4); | |
238 } else { | |
239 pc += BC_CHECK_REGISTER_LT_LENGTH; | |
240 } | |
241 break; | |
242 BYTECODE(CHECK_REGISTER_GE) | |
243 if (registers[pc[1]] >= Load16(pc + 2)) { | |
244 pc = code_base + Load32(pc + 4); | |
245 } else { | |
246 pc += BC_CHECK_REGISTER_GE_LENGTH; | |
247 } | |
248 break; | |
249 BYTECODE(LOOKUP_MAP1) { | |
250 // Look up character in a bitmap. If we find a 0, then jump to the | |
251 // location at pc + 7. Otherwise fall through! | |
252 int index = current_char - Load16(pc + 1); | |
253 byte map = code_base[Load32(pc + 3) + (index >> 3)]; | |
254 map = ((map >> (index & 7)) & 1); | |
255 if (map == 0) { | |
256 pc = code_base + Load32(pc + 7); | |
257 } else { | |
258 pc += BC_LOOKUP_MAP1_LENGTH; | |
259 } | |
260 break; | |
261 } | |
262 BYTECODE(LOOKUP_MAP2) { | |
263 // Look up character in a half-nibble map. If we find 00, then jump to | |
264 // the location at pc + 7. If we find 01 then jump to location at | |
265 // pc + 11, etc. | |
266 int index = (current_char - Load16(pc + 1)) << 1; | |
267 byte map = code_base[Load32(pc + 3) + (index >> 3)]; | |
268 map = ((map >> (index & 7)) & 3); | |
269 if (map < 2) { | |
270 if (map == 0) { | |
271 pc = code_base + Load32(pc + 7); | |
272 } else { | |
273 pc = code_base + Load32(pc + 11); | |
274 } | |
275 } else { | |
276 if (map == 2) { | |
277 pc = code_base + Load32(pc + 15); | |
278 } else { | |
279 pc = code_base + Load32(pc + 19); | |
280 } | |
281 } | |
282 break; | |
283 } | |
284 BYTECODE(LOOKUP_MAP8) { | |
285 // Look up character in a byte map. Use the byte as an index into a | |
286 // table that follows this instruction immediately. | |
287 int index = current_char - Load16(pc + 1); | |
288 byte map = code_base[Load32(pc + 3) + index]; | |
289 const byte* new_pc = code_base + Load32(pc + 7) + (map << 2); | |
290 pc = code_base + Load32(new_pc); | |
291 break; | |
292 } | |
293 BYTECODE(LOOKUP_HI_MAP8) { | |
294 // Look up high byte of this character in a byte map. Use the byte as | |
295 // an index into a table that follows this instruction immediately. | |
296 int index = (current_char >> 8) - pc[1]; | |
297 byte map = code_base[Load32(pc + 2) + index]; | |
298 const byte* new_pc = code_base + Load32(pc + 6) + (map << 2); | |
299 pc = code_base + Load32(new_pc); | |
300 break; | |
301 } | |
302 BYTECODE(CHECK_NOT_BACK_REF) { | |
303 int from = registers[pc[1]]; | |
304 int len = registers[pc[1] + 1] - from; | |
305 if (current + len > subject.length()) { | |
306 pc = code_base + Load32(pc + 2); | |
307 break; | |
308 } else { | |
309 int i; | |
310 for (i = 0; i < len; i++) { | |
311 if (subject[from + i] != subject[current + i]) { | |
312 pc = code_base + Load32(pc + 2); | |
313 break; | |
314 } | |
315 } | |
316 if (i < len) break; | |
317 current += len; | |
318 } | |
319 pc += BC_CHECK_NOT_BACK_REF_LENGTH; | |
320 break; | |
321 } | |
322 default: | |
323 UNREACHABLE(); | |
324 break; | |
325 } | |
326 } | |
327 } | |
328 | |
329 | |
330 bool IrregexpInterpreter::Match(Handle<ByteArray> code_array, | |
331 Handle<String> subject16, | |
332 int* registers, | |
333 int start_position) { | |
334 ASSERT(StringShape(*subject16).IsTwoByteRepresentation()); | |
335 ASSERT(subject16->IsFlat(StringShape(*subject16))); | |
336 | |
Mads Ager (chromium)
2008/11/25 21:09:41
Remove one of the empty lines?
Erik Corry
2008/11/26 12:18:36
Fixed.
| |
337 | |
338 AssertNoAllocation a; | |
339 const byte* code_base = code_array->GetDataStartAddress(); | |
340 return RawMatch(code_base, | |
341 Vector<const uc16>(subject16->GetTwoByteData(), | |
342 subject16->length()), | |
343 registers, | |
344 start_position); | |
345 } | |
346 | |
347 } } // namespace v8::internal | |
OLD | NEW |