Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 // Copyright 2008 the V8 project authors. All rights reserved. | |
| 2 // Redistribution and use in source and binary forms, with or without | |
| 3 // modification, are permitted provided that the following conditions are | |
| 4 // met: | |
| 5 // | |
| 6 // * Redistributions of source code must retain the above copyright | |
| 7 // notice, this list of conditions and the following disclaimer. | |
| 8 // * Redistributions in binary form must reproduce the above | |
| 9 // copyright notice, this list of conditions and the following | |
| 10 // disclaimer in the documentation and/or other materials provided | |
| 11 // with the distribution. | |
| 12 // * Neither the name of Google Inc. nor the names of its | |
| 13 // contributors may be used to endorse or promote products derived | |
| 14 // from this software without specific prior written permission. | |
| 15 // | |
| 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| 17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| 18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
| 19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
| 20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
| 21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
| 22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
| 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
| 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
| 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
| 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 27 | |
| 28 // A simple interpreter for the Irregexp byte code. | |
| 29 | |
| 30 | |
| 31 #include "v8.h" | |
| 32 #include "utils.h" | |
| 33 #include "ast.h" | |
| 34 #include "bytecodes-irregexp.h" | |
| 35 #include "interpreter-irregexp.h" | |
| 36 | |
| 37 | |
| 38 namespace v8 { namespace internal { | |
| 39 | |
| 40 | |
| 41 #ifdef DEBUG | |
| 42 static void TraceInterpreter(const byte* code_base, | |
| 43 const byte* pc, | |
| 44 int stack_depth, | |
| 45 int current_position, | |
| 46 int bytecode_length, | |
| 47 const char* bytecode_name) { | |
| 48 if (FLAG_trace_regexp_bytecodes) { | |
| 49 PrintF("pc = %02x, sp = %d, current = %d, bc = %s", | |
| 50 pc - code_base, | |
|
Mads Ager (chromium)
2008/11/25 21:09:41
Indentation one space off for all arguments except
Erik Corry
2008/11/26 12:18:36
Fixed.
| |
| 51 stack_depth, | |
| 52 current_position, | |
| 53 bytecode_name); | |
| 54 for (int i = 1; i < bytecode_length; i++) { | |
| 55 printf(", %02x", pc[i]); | |
| 56 } | |
| 57 printf("\n"); | |
| 58 } | |
| 59 } | |
| 60 | |
| 61 | |
| 62 # define BYTECODE(name) case BC_##name: \ | |
|
Mads Ager (chromium)
2008/11/25 21:09:41
No space between # and define.
The indentation se
Erik Corry
2008/11/26 12:18:36
I like indenting #if in an analogous way to the in
| |
| 63 TraceInterpreter(code_base, \ | |
| 64 pc, \ | |
| 65 backtrack_sp - backtrack_stack, \ | |
| 66 current, \ | |
| 67 BC_##name##_LENGTH, \ | |
| 68 #name); | |
| 69 #else | |
| 70 # define BYTECODE(name) case BC_##name: // NOLINT | |
|
Mads Ager (chromium)
2008/11/25 21:09:41
Remove space between # and define.
Erik Corry
2008/11/26 12:18:36
Fixed.
| |
| 71 #endif | |
| 72 | |
| 73 | |
| 74 | |
| 75 static bool RawMatch(const byte* code_base, | |
| 76 Vector<const uc16> subject, | |
| 77 int* registers, | |
| 78 int current) { | |
| 79 const byte* pc = code_base; | |
| 80 static const int kBacktrackStackSize = 10000; | |
| 81 int backtrack_stack[kBacktrackStackSize]; | |
| 82 int backtrack_stack_space = kBacktrackStackSize; | |
| 83 int* backtrack_sp = backtrack_stack; | |
| 84 int current_char = -1; | |
| 85 #ifdef DEBUG | |
| 86 if (FLAG_trace_regexp_bytecodes) { | |
| 87 PrintF("\n\nStart bytecode interpreter\n\n"); | |
| 88 } | |
| 89 #endif | |
| 90 while (true) { | |
| 91 switch (*pc) { | |
| 92 BYTECODE(BREAK) | |
| 93 UNREACHABLE(); | |
| 94 return false; | |
| 95 BYTECODE(PUSH_CP) | |
| 96 if (--backtrack_stack_space < 0) { | |
| 97 return false; // No match on backtrack stack overflow. | |
| 98 } | |
| 99 *backtrack_sp++ = current + Load32(pc + 1); | |
| 100 pc += BC_PUSH_CP_LENGTH; | |
| 101 break; | |
| 102 BYTECODE(PUSH_BT) | |
| 103 if (--backtrack_stack_space < 0) { | |
| 104 return false; // No match on backtrack stack overflow. | |
| 105 } | |
| 106 *backtrack_sp++ = Load32(pc + 1); | |
| 107 pc += BC_PUSH_BT_LENGTH; | |
| 108 break; | |
| 109 BYTECODE(PUSH_REGISTER) | |
| 110 if (--backtrack_stack_space < 0) { | |
| 111 return false; // No match on backtrack stack overflow. | |
| 112 } | |
| 113 *backtrack_sp++ = registers[pc[1]]; | |
| 114 pc += BC_PUSH_REGISTER_LENGTH; | |
| 115 break; | |
| 116 BYTECODE(SET_REGISTER) | |
| 117 registers[pc[1]] = Load32(pc + 2); | |
| 118 pc += BC_SET_REGISTER_LENGTH; | |
| 119 break; | |
| 120 BYTECODE(ADVANCE_REGISTER) | |
| 121 registers[pc[1]] += Load32(pc + 2); | |
| 122 pc += BC_ADVANCE_REGISTER_LENGTH; | |
| 123 break; | |
| 124 BYTECODE(SET_REGISTER_TO_CP) | |
| 125 registers[pc[1]] = current + Load32(pc + 2); | |
| 126 pc += BC_SET_REGISTER_TO_CP_LENGTH; | |
| 127 break; | |
| 128 BYTECODE(SET_CP_TO_REGISTER) | |
| 129 current = registers[pc[1]]; | |
| 130 pc += BC_SET_CP_TO_REGISTER_LENGTH; | |
| 131 break; | |
| 132 BYTECODE(SET_REGISTER_TO_SP) | |
| 133 registers[pc[1]] = backtrack_sp - backtrack_stack; | |
| 134 pc += BC_SET_REGISTER_TO_SP_LENGTH; | |
| 135 break; | |
| 136 BYTECODE(SET_SP_TO_REGISTER) | |
| 137 backtrack_sp = backtrack_stack + registers[pc[1]]; | |
| 138 backtrack_stack_space = kBacktrackStackSize - | |
| 139 (backtrack_sp - backtrack_stack); | |
| 140 pc += BC_SET_SP_TO_REGISTER_LENGTH; | |
| 141 break; | |
| 142 BYTECODE(POP_CP) | |
| 143 backtrack_stack_space++; | |
| 144 --backtrack_sp; | |
| 145 current = *backtrack_sp; | |
| 146 pc += BC_POP_CP_LENGTH; | |
| 147 break; | |
| 148 BYTECODE(POP_BT) | |
| 149 backtrack_stack_space++; | |
| 150 --backtrack_sp; | |
| 151 pc = code_base + *backtrack_sp; | |
| 152 break; | |
| 153 BYTECODE(POP_REGISTER) | |
| 154 backtrack_stack_space++; | |
| 155 --backtrack_sp; | |
| 156 registers[pc[1]] = *backtrack_sp; | |
| 157 pc += BC_POP_REGISTER_LENGTH; | |
| 158 break; | |
| 159 BYTECODE(FAIL) | |
| 160 return false; | |
| 161 BYTECODE(SUCCEED) | |
| 162 return true; | |
| 163 BYTECODE(ADVANCE_CP) | |
| 164 current += Load32(pc + 1); | |
| 165 pc += BC_ADVANCE_CP_LENGTH; | |
| 166 break; | |
| 167 BYTECODE(GOTO) | |
| 168 pc = code_base + Load32(pc + 1); | |
| 169 break; | |
| 170 BYTECODE(LOAD_CURRENT_CHAR) { | |
| 171 int pos = current + Load32(pc + 1); | |
| 172 if (pos >= subject.length()) { | |
| 173 pc = code_base + Load32(pc + 5); | |
| 174 } else { | |
| 175 current_char = subject[pos]; | |
| 176 pc += BC_LOAD_CURRENT_CHAR_LENGTH; | |
| 177 } | |
| 178 break; | |
| 179 } | |
| 180 BYTECODE(CHECK_CHAR) { | |
| 181 int c = Load16(pc + 1); | |
| 182 if (c == current_char) { | |
| 183 pc = code_base + Load32(pc + 3); | |
| 184 } else { | |
| 185 pc += BC_CHECK_CHAR_LENGTH; | |
| 186 } | |
| 187 break; | |
| 188 } | |
| 189 BYTECODE(CHECK_NOT_CHAR) { | |
| 190 int c = Load16(pc + 1); | |
| 191 if (c != current_char) { | |
| 192 pc = code_base + Load32(pc + 3); | |
| 193 } else { | |
| 194 pc += BC_CHECK_NOT_CHAR_LENGTH; | |
| 195 } | |
| 196 break; | |
| 197 } | |
| 198 BYTECODE(OR_CHECK_NOT_CHAR) { | |
| 199 int c = Load16(pc + 1); | |
| 200 if (c != (current_char | Load16(pc + 3))) { | |
| 201 pc = code_base + Load32(pc + 5); | |
| 202 } else { | |
| 203 pc += BC_OR_CHECK_NOT_CHAR_LENGTH; | |
| 204 } | |
| 205 break; | |
| 206 } | |
| 207 BYTECODE(MINUS_OR_CHECK_NOT_CHAR) { | |
| 208 int c = Load16(pc + 1); | |
| 209 int m = Load16(pc + 3); | |
| 210 if (c != ((current_char - m) | m)) { | |
| 211 pc = code_base + Load32(pc + 5); | |
| 212 } else { | |
| 213 pc += BC_MINUS_OR_CHECK_NOT_CHAR_LENGTH; | |
| 214 } | |
| 215 break; | |
| 216 } | |
| 217 BYTECODE(CHECK_LT) { | |
| 218 int limit = Load16(pc + 1); | |
| 219 if (current_char < limit) { | |
| 220 pc = code_base + Load32(pc + 3); | |
| 221 } else { | |
| 222 pc += BC_CHECK_LT_LENGTH; | |
| 223 } | |
| 224 break; | |
| 225 } | |
| 226 BYTECODE(CHECK_GT) { | |
| 227 int limit = Load16(pc + 1); | |
| 228 if (current_char > limit) { | |
| 229 pc = code_base + Load32(pc + 3); | |
| 230 } else { | |
| 231 pc += BC_CHECK_GT_LENGTH; | |
| 232 } | |
| 233 break; | |
| 234 } | |
| 235 BYTECODE(CHECK_REGISTER_LT) | |
| 236 if (registers[pc[1]] < Load16(pc + 2)) { | |
| 237 pc = code_base + Load32(pc + 4); | |
| 238 } else { | |
| 239 pc += BC_CHECK_REGISTER_LT_LENGTH; | |
| 240 } | |
| 241 break; | |
| 242 BYTECODE(CHECK_REGISTER_GE) | |
| 243 if (registers[pc[1]] >= Load16(pc + 2)) { | |
| 244 pc = code_base + Load32(pc + 4); | |
| 245 } else { | |
| 246 pc += BC_CHECK_REGISTER_GE_LENGTH; | |
| 247 } | |
| 248 break; | |
| 249 BYTECODE(LOOKUP_MAP1) { | |
| 250 // Look up character in a bitmap. If we find a 0, then jump to the | |
| 251 // location at pc + 7. Otherwise fall through! | |
| 252 int index = current_char - Load16(pc + 1); | |
| 253 byte map = code_base[Load32(pc + 3) + (index >> 3)]; | |
| 254 map = ((map >> (index & 7)) & 1); | |
| 255 if (map == 0) { | |
| 256 pc = code_base + Load32(pc + 7); | |
| 257 } else { | |
| 258 pc += BC_LOOKUP_MAP1_LENGTH; | |
| 259 } | |
| 260 break; | |
| 261 } | |
| 262 BYTECODE(LOOKUP_MAP2) { | |
| 263 // Look up character in a half-nibble map. If we find 00, then jump to | |
| 264 // the location at pc + 7. If we find 01 then jump to location at | |
| 265 // pc + 11, etc. | |
| 266 int index = (current_char - Load16(pc + 1)) << 1; | |
| 267 byte map = code_base[Load32(pc + 3) + (index >> 3)]; | |
| 268 map = ((map >> (index & 7)) & 3); | |
| 269 if (map < 2) { | |
| 270 if (map == 0) { | |
| 271 pc = code_base + Load32(pc + 7); | |
| 272 } else { | |
| 273 pc = code_base + Load32(pc + 11); | |
| 274 } | |
| 275 } else { | |
| 276 if (map == 2) { | |
| 277 pc = code_base + Load32(pc + 15); | |
| 278 } else { | |
| 279 pc = code_base + Load32(pc + 19); | |
| 280 } | |
| 281 } | |
| 282 break; | |
| 283 } | |
| 284 BYTECODE(LOOKUP_MAP8) { | |
| 285 // Look up character in a byte map. Use the byte as an index into a | |
| 286 // table that follows this instruction immediately. | |
| 287 int index = current_char - Load16(pc + 1); | |
| 288 byte map = code_base[Load32(pc + 3) + index]; | |
| 289 const byte* new_pc = code_base + Load32(pc + 7) + (map << 2); | |
| 290 pc = code_base + Load32(new_pc); | |
| 291 break; | |
| 292 } | |
| 293 BYTECODE(LOOKUP_HI_MAP8) { | |
| 294 // Look up high byte of this character in a byte map. Use the byte as | |
| 295 // an index into a table that follows this instruction immediately. | |
| 296 int index = (current_char >> 8) - pc[1]; | |
| 297 byte map = code_base[Load32(pc + 2) + index]; | |
| 298 const byte* new_pc = code_base + Load32(pc + 6) + (map << 2); | |
| 299 pc = code_base + Load32(new_pc); | |
| 300 break; | |
| 301 } | |
| 302 BYTECODE(CHECK_NOT_BACK_REF) { | |
| 303 int from = registers[pc[1]]; | |
| 304 int len = registers[pc[1] + 1] - from; | |
| 305 if (current + len > subject.length()) { | |
| 306 pc = code_base + Load32(pc + 2); | |
| 307 break; | |
| 308 } else { | |
| 309 int i; | |
| 310 for (i = 0; i < len; i++) { | |
| 311 if (subject[from + i] != subject[current + i]) { | |
| 312 pc = code_base + Load32(pc + 2); | |
| 313 break; | |
| 314 } | |
| 315 } | |
| 316 if (i < len) break; | |
| 317 current += len; | |
| 318 } | |
| 319 pc += BC_CHECK_NOT_BACK_REF_LENGTH; | |
| 320 break; | |
| 321 } | |
| 322 default: | |
| 323 UNREACHABLE(); | |
| 324 break; | |
| 325 } | |
| 326 } | |
| 327 } | |
| 328 | |
| 329 | |
| 330 bool IrregexpInterpreter::Match(Handle<ByteArray> code_array, | |
| 331 Handle<String> subject16, | |
| 332 int* registers, | |
| 333 int start_position) { | |
| 334 ASSERT(StringShape(*subject16).IsTwoByteRepresentation()); | |
| 335 ASSERT(subject16->IsFlat(StringShape(*subject16))); | |
| 336 | |
|
Mads Ager (chromium)
2008/11/25 21:09:41
Remove one of the empty lines?
Erik Corry
2008/11/26 12:18:36
Fixed.
| |
| 337 | |
| 338 AssertNoAllocation a; | |
| 339 const byte* code_base = code_array->GetDataStartAddress(); | |
| 340 return RawMatch(code_base, | |
| 341 Vector<const uc16>(subject16->GetTwoByteData(), | |
| 342 subject16->length()), | |
| 343 registers, | |
| 344 start_position); | |
| 345 } | |
| 346 | |
| 347 } } // namespace v8::internal | |
| OLD | NEW |