| OLD | NEW |
| (Empty) |
| 1 // Copyright 2011 the V8 project authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 // A simple interpreter for the Irregexp byte code. | |
| 6 | |
| 7 | |
| 8 #include "src/v8.h" | |
| 9 | |
| 10 #include "src/ast.h" | |
| 11 #include "src/bytecodes-irregexp.h" | |
| 12 #include "src/interpreter-irregexp.h" | |
| 13 #include "src/jsregexp.h" | |
| 14 #include "src/regexp-macro-assembler.h" | |
| 15 #include "src/unicode.h" | |
| 16 #include "src/utils.h" | |
| 17 | |
| 18 namespace v8 { | |
| 19 namespace internal { | |
| 20 | |
| 21 | |
| 22 typedef unibrow::Mapping<unibrow::Ecma262Canonicalize> Canonicalize; | |
| 23 | |
| 24 static bool BackRefMatchesNoCase(Canonicalize* interp_canonicalize, | |
| 25 int from, | |
| 26 int current, | |
| 27 int len, | |
| 28 Vector<const uc16> subject) { | |
| 29 for (int i = 0; i < len; i++) { | |
| 30 unibrow::uchar old_char = subject[from++]; | |
| 31 unibrow::uchar new_char = subject[current++]; | |
| 32 if (old_char == new_char) continue; | |
| 33 unibrow::uchar old_string[1] = { old_char }; | |
| 34 unibrow::uchar new_string[1] = { new_char }; | |
| 35 interp_canonicalize->get(old_char, '\0', old_string); | |
| 36 interp_canonicalize->get(new_char, '\0', new_string); | |
| 37 if (old_string[0] != new_string[0]) { | |
| 38 return false; | |
| 39 } | |
| 40 } | |
| 41 return true; | |
| 42 } | |
| 43 | |
| 44 | |
| 45 static bool BackRefMatchesNoCase(Canonicalize* interp_canonicalize, | |
| 46 int from, | |
| 47 int current, | |
| 48 int len, | |
| 49 Vector<const uint8_t> subject) { | |
| 50 for (int i = 0; i < len; i++) { | |
| 51 unsigned int old_char = subject[from++]; | |
| 52 unsigned int new_char = subject[current++]; | |
| 53 if (old_char == new_char) continue; | |
| 54 // Convert both characters to lower case. | |
| 55 old_char |= 0x20; | |
| 56 new_char |= 0x20; | |
| 57 if (old_char != new_char) return false; | |
| 58 // Not letters in the ASCII range and Latin-1 range. | |
| 59 if (!(old_char - 'a' <= 'z' - 'a') && | |
| 60 !(old_char - 224 <= 254 - 224 && old_char != 247)) { | |
| 61 return false; | |
| 62 } | |
| 63 } | |
| 64 return true; | |
| 65 } | |
| 66 | |
| 67 | |
| 68 #ifdef DEBUG | |
| 69 static void TraceInterpreter(const byte* code_base, | |
| 70 const byte* pc, | |
| 71 int stack_depth, | |
| 72 int current_position, | |
| 73 uint32_t current_char, | |
| 74 int bytecode_length, | |
| 75 const char* bytecode_name) { | |
| 76 if (FLAG_trace_regexp_bytecodes) { | |
| 77 bool printable = (current_char < 127 && current_char >= 32); | |
| 78 const char* format = | |
| 79 printable ? | |
| 80 "pc = %02x, sp = %d, curpos = %d, curchar = %08x (%c), bc = %s" : | |
| 81 "pc = %02x, sp = %d, curpos = %d, curchar = %08x .%c., bc = %s"; | |
| 82 PrintF(format, | |
| 83 pc - code_base, | |
| 84 stack_depth, | |
| 85 current_position, | |
| 86 current_char, | |
| 87 printable ? current_char : '.', | |
| 88 bytecode_name); | |
| 89 for (int i = 0; i < bytecode_length; i++) { | |
| 90 printf(", %02x", pc[i]); | |
| 91 } | |
| 92 printf(" "); | |
| 93 for (int i = 1; i < bytecode_length; i++) { | |
| 94 unsigned char b = pc[i]; | |
| 95 if (b < 127 && b >= 32) { | |
| 96 printf("%c", b); | |
| 97 } else { | |
| 98 printf("."); | |
| 99 } | |
| 100 } | |
| 101 printf("\n"); | |
| 102 } | |
| 103 } | |
| 104 | |
| 105 | |
| 106 #define BYTECODE(name) \ | |
| 107 case BC_##name: \ | |
| 108 TraceInterpreter(code_base, \ | |
| 109 pc, \ | |
| 110 static_cast<int>(backtrack_sp - backtrack_stack_base), \ | |
| 111 current, \ | |
| 112 current_char, \ | |
| 113 BC_##name##_LENGTH, \ | |
| 114 #name); | |
| 115 #else | |
| 116 #define BYTECODE(name) \ | |
| 117 case BC_##name: | |
| 118 #endif | |
| 119 | |
| 120 | |
| 121 static int32_t Load32Aligned(const byte* pc) { | |
| 122 DCHECK((reinterpret_cast<intptr_t>(pc) & 3) == 0); | |
| 123 return *reinterpret_cast<const int32_t *>(pc); | |
| 124 } | |
| 125 | |
| 126 | |
| 127 static int32_t Load16Aligned(const byte* pc) { | |
| 128 DCHECK((reinterpret_cast<intptr_t>(pc) & 1) == 0); | |
| 129 return *reinterpret_cast<const uint16_t *>(pc); | |
| 130 } | |
| 131 | |
| 132 | |
| 133 // A simple abstraction over the backtracking stack used by the interpreter. | |
| 134 // This backtracking stack does not grow automatically, but it ensures that the | |
| 135 // the memory held by the stack is released or remembered in a cache if the | |
| 136 // matching terminates. | |
| 137 class BacktrackStack { | |
| 138 public: | |
| 139 BacktrackStack() { data_ = NewArray<int>(kBacktrackStackSize); } | |
| 140 | |
| 141 ~BacktrackStack() { | |
| 142 DeleteArray(data_); | |
| 143 } | |
| 144 | |
| 145 int* data() const { return data_; } | |
| 146 | |
| 147 int max_size() const { return kBacktrackStackSize; } | |
| 148 | |
| 149 private: | |
| 150 static const int kBacktrackStackSize = 10000; | |
| 151 | |
| 152 int* data_; | |
| 153 | |
| 154 DISALLOW_COPY_AND_ASSIGN(BacktrackStack); | |
| 155 }; | |
| 156 | |
| 157 | |
| 158 template <typename Char> | |
| 159 static RegExpImpl::IrregexpResult RawMatch(Isolate* isolate, | |
| 160 const byte* code_base, | |
| 161 Vector<const Char> subject, | |
| 162 int* registers, | |
| 163 int current, | |
| 164 uint32_t current_char) { | |
| 165 const byte* pc = code_base; | |
| 166 // BacktrackStack ensures that the memory allocated for the backtracking stack | |
| 167 // is returned to the system or cached if there is no stack being cached at | |
| 168 // the moment. | |
| 169 BacktrackStack backtrack_stack; | |
| 170 int* backtrack_stack_base = backtrack_stack.data(); | |
| 171 int* backtrack_sp = backtrack_stack_base; | |
| 172 int backtrack_stack_space = backtrack_stack.max_size(); | |
| 173 #ifdef DEBUG | |
| 174 if (FLAG_trace_regexp_bytecodes) { | |
| 175 PrintF("\n\nStart bytecode interpreter\n\n"); | |
| 176 } | |
| 177 #endif | |
| 178 while (true) { | |
| 179 int32_t insn = Load32Aligned(pc); | |
| 180 switch (insn & BYTECODE_MASK) { | |
| 181 BYTECODE(BREAK) | |
| 182 UNREACHABLE(); | |
| 183 return RegExpImpl::RE_FAILURE; | |
| 184 BYTECODE(PUSH_CP) | |
| 185 if (--backtrack_stack_space < 0) { | |
| 186 return RegExpImpl::RE_EXCEPTION; | |
| 187 } | |
| 188 *backtrack_sp++ = current; | |
| 189 pc += BC_PUSH_CP_LENGTH; | |
| 190 break; | |
| 191 BYTECODE(PUSH_BT) | |
| 192 if (--backtrack_stack_space < 0) { | |
| 193 return RegExpImpl::RE_EXCEPTION; | |
| 194 } | |
| 195 *backtrack_sp++ = Load32Aligned(pc + 4); | |
| 196 pc += BC_PUSH_BT_LENGTH; | |
| 197 break; | |
| 198 BYTECODE(PUSH_REGISTER) | |
| 199 if (--backtrack_stack_space < 0) { | |
| 200 return RegExpImpl::RE_EXCEPTION; | |
| 201 } | |
| 202 *backtrack_sp++ = registers[insn >> BYTECODE_SHIFT]; | |
| 203 pc += BC_PUSH_REGISTER_LENGTH; | |
| 204 break; | |
| 205 BYTECODE(SET_REGISTER) | |
| 206 registers[insn >> BYTECODE_SHIFT] = Load32Aligned(pc + 4); | |
| 207 pc += BC_SET_REGISTER_LENGTH; | |
| 208 break; | |
| 209 BYTECODE(ADVANCE_REGISTER) | |
| 210 registers[insn >> BYTECODE_SHIFT] += Load32Aligned(pc + 4); | |
| 211 pc += BC_ADVANCE_REGISTER_LENGTH; | |
| 212 break; | |
| 213 BYTECODE(SET_REGISTER_TO_CP) | |
| 214 registers[insn >> BYTECODE_SHIFT] = current + Load32Aligned(pc + 4); | |
| 215 pc += BC_SET_REGISTER_TO_CP_LENGTH; | |
| 216 break; | |
| 217 BYTECODE(SET_CP_TO_REGISTER) | |
| 218 current = registers[insn >> BYTECODE_SHIFT]; | |
| 219 pc += BC_SET_CP_TO_REGISTER_LENGTH; | |
| 220 break; | |
| 221 BYTECODE(SET_REGISTER_TO_SP) | |
| 222 registers[insn >> BYTECODE_SHIFT] = | |
| 223 static_cast<int>(backtrack_sp - backtrack_stack_base); | |
| 224 pc += BC_SET_REGISTER_TO_SP_LENGTH; | |
| 225 break; | |
| 226 BYTECODE(SET_SP_TO_REGISTER) | |
| 227 backtrack_sp = backtrack_stack_base + registers[insn >> BYTECODE_SHIFT]; | |
| 228 backtrack_stack_space = backtrack_stack.max_size() - | |
| 229 static_cast<int>(backtrack_sp - backtrack_stack_base); | |
| 230 pc += BC_SET_SP_TO_REGISTER_LENGTH; | |
| 231 break; | |
| 232 BYTECODE(POP_CP) | |
| 233 backtrack_stack_space++; | |
| 234 --backtrack_sp; | |
| 235 current = *backtrack_sp; | |
| 236 pc += BC_POP_CP_LENGTH; | |
| 237 break; | |
| 238 BYTECODE(POP_BT) | |
| 239 backtrack_stack_space++; | |
| 240 --backtrack_sp; | |
| 241 pc = code_base + *backtrack_sp; | |
| 242 break; | |
| 243 BYTECODE(POP_REGISTER) | |
| 244 backtrack_stack_space++; | |
| 245 --backtrack_sp; | |
| 246 registers[insn >> BYTECODE_SHIFT] = *backtrack_sp; | |
| 247 pc += BC_POP_REGISTER_LENGTH; | |
| 248 break; | |
| 249 BYTECODE(FAIL) | |
| 250 return RegExpImpl::RE_FAILURE; | |
| 251 BYTECODE(SUCCEED) | |
| 252 return RegExpImpl::RE_SUCCESS; | |
| 253 BYTECODE(ADVANCE_CP) | |
| 254 current += insn >> BYTECODE_SHIFT; | |
| 255 pc += BC_ADVANCE_CP_LENGTH; | |
| 256 break; | |
| 257 BYTECODE(GOTO) | |
| 258 pc = code_base + Load32Aligned(pc + 4); | |
| 259 break; | |
| 260 BYTECODE(ADVANCE_CP_AND_GOTO) | |
| 261 current += insn >> BYTECODE_SHIFT; | |
| 262 pc = code_base + Load32Aligned(pc + 4); | |
| 263 break; | |
| 264 BYTECODE(CHECK_GREEDY) | |
| 265 if (current == backtrack_sp[-1]) { | |
| 266 backtrack_sp--; | |
| 267 backtrack_stack_space++; | |
| 268 pc = code_base + Load32Aligned(pc + 4); | |
| 269 } else { | |
| 270 pc += BC_CHECK_GREEDY_LENGTH; | |
| 271 } | |
| 272 break; | |
| 273 BYTECODE(LOAD_CURRENT_CHAR) { | |
| 274 int pos = current + (insn >> BYTECODE_SHIFT); | |
| 275 if (pos >= subject.length()) { | |
| 276 pc = code_base + Load32Aligned(pc + 4); | |
| 277 } else { | |
| 278 current_char = subject[pos]; | |
| 279 pc += BC_LOAD_CURRENT_CHAR_LENGTH; | |
| 280 } | |
| 281 break; | |
| 282 } | |
| 283 BYTECODE(LOAD_CURRENT_CHAR_UNCHECKED) { | |
| 284 int pos = current + (insn >> BYTECODE_SHIFT); | |
| 285 current_char = subject[pos]; | |
| 286 pc += BC_LOAD_CURRENT_CHAR_UNCHECKED_LENGTH; | |
| 287 break; | |
| 288 } | |
| 289 BYTECODE(LOAD_2_CURRENT_CHARS) { | |
| 290 int pos = current + (insn >> BYTECODE_SHIFT); | |
| 291 if (pos + 2 > subject.length()) { | |
| 292 pc = code_base + Load32Aligned(pc + 4); | |
| 293 } else { | |
| 294 Char next = subject[pos + 1]; | |
| 295 current_char = | |
| 296 (subject[pos] | (next << (kBitsPerByte * sizeof(Char)))); | |
| 297 pc += BC_LOAD_2_CURRENT_CHARS_LENGTH; | |
| 298 } | |
| 299 break; | |
| 300 } | |
| 301 BYTECODE(LOAD_2_CURRENT_CHARS_UNCHECKED) { | |
| 302 int pos = current + (insn >> BYTECODE_SHIFT); | |
| 303 Char next = subject[pos + 1]; | |
| 304 current_char = (subject[pos] | (next << (kBitsPerByte * sizeof(Char)))); | |
| 305 pc += BC_LOAD_2_CURRENT_CHARS_UNCHECKED_LENGTH; | |
| 306 break; | |
| 307 } | |
| 308 BYTECODE(LOAD_4_CURRENT_CHARS) { | |
| 309 DCHECK(sizeof(Char) == 1); | |
| 310 int pos = current + (insn >> BYTECODE_SHIFT); | |
| 311 if (pos + 4 > subject.length()) { | |
| 312 pc = code_base + Load32Aligned(pc + 4); | |
| 313 } else { | |
| 314 Char next1 = subject[pos + 1]; | |
| 315 Char next2 = subject[pos + 2]; | |
| 316 Char next3 = subject[pos + 3]; | |
| 317 current_char = (subject[pos] | | |
| 318 (next1 << 8) | | |
| 319 (next2 << 16) | | |
| 320 (next3 << 24)); | |
| 321 pc += BC_LOAD_4_CURRENT_CHARS_LENGTH; | |
| 322 } | |
| 323 break; | |
| 324 } | |
| 325 BYTECODE(LOAD_4_CURRENT_CHARS_UNCHECKED) { | |
| 326 DCHECK(sizeof(Char) == 1); | |
| 327 int pos = current + (insn >> BYTECODE_SHIFT); | |
| 328 Char next1 = subject[pos + 1]; | |
| 329 Char next2 = subject[pos + 2]; | |
| 330 Char next3 = subject[pos + 3]; | |
| 331 current_char = (subject[pos] | | |
| 332 (next1 << 8) | | |
| 333 (next2 << 16) | | |
| 334 (next3 << 24)); | |
| 335 pc += BC_LOAD_4_CURRENT_CHARS_UNCHECKED_LENGTH; | |
| 336 break; | |
| 337 } | |
| 338 BYTECODE(CHECK_4_CHARS) { | |
| 339 uint32_t c = Load32Aligned(pc + 4); | |
| 340 if (c == current_char) { | |
| 341 pc = code_base + Load32Aligned(pc + 8); | |
| 342 } else { | |
| 343 pc += BC_CHECK_4_CHARS_LENGTH; | |
| 344 } | |
| 345 break; | |
| 346 } | |
| 347 BYTECODE(CHECK_CHAR) { | |
| 348 uint32_t c = (insn >> BYTECODE_SHIFT); | |
| 349 if (c == current_char) { | |
| 350 pc = code_base + Load32Aligned(pc + 4); | |
| 351 } else { | |
| 352 pc += BC_CHECK_CHAR_LENGTH; | |
| 353 } | |
| 354 break; | |
| 355 } | |
| 356 BYTECODE(CHECK_NOT_4_CHARS) { | |
| 357 uint32_t c = Load32Aligned(pc + 4); | |
| 358 if (c != current_char) { | |
| 359 pc = code_base + Load32Aligned(pc + 8); | |
| 360 } else { | |
| 361 pc += BC_CHECK_NOT_4_CHARS_LENGTH; | |
| 362 } | |
| 363 break; | |
| 364 } | |
| 365 BYTECODE(CHECK_NOT_CHAR) { | |
| 366 uint32_t c = (insn >> BYTECODE_SHIFT); | |
| 367 if (c != current_char) { | |
| 368 pc = code_base + Load32Aligned(pc + 4); | |
| 369 } else { | |
| 370 pc += BC_CHECK_NOT_CHAR_LENGTH; | |
| 371 } | |
| 372 break; | |
| 373 } | |
| 374 BYTECODE(AND_CHECK_4_CHARS) { | |
| 375 uint32_t c = Load32Aligned(pc + 4); | |
| 376 if (c == (current_char & Load32Aligned(pc + 8))) { | |
| 377 pc = code_base + Load32Aligned(pc + 12); | |
| 378 } else { | |
| 379 pc += BC_AND_CHECK_4_CHARS_LENGTH; | |
| 380 } | |
| 381 break; | |
| 382 } | |
| 383 BYTECODE(AND_CHECK_CHAR) { | |
| 384 uint32_t c = (insn >> BYTECODE_SHIFT); | |
| 385 if (c == (current_char & Load32Aligned(pc + 4))) { | |
| 386 pc = code_base + Load32Aligned(pc + 8); | |
| 387 } else { | |
| 388 pc += BC_AND_CHECK_CHAR_LENGTH; | |
| 389 } | |
| 390 break; | |
| 391 } | |
| 392 BYTECODE(AND_CHECK_NOT_4_CHARS) { | |
| 393 uint32_t c = Load32Aligned(pc + 4); | |
| 394 if (c != (current_char & Load32Aligned(pc + 8))) { | |
| 395 pc = code_base + Load32Aligned(pc + 12); | |
| 396 } else { | |
| 397 pc += BC_AND_CHECK_NOT_4_CHARS_LENGTH; | |
| 398 } | |
| 399 break; | |
| 400 } | |
| 401 BYTECODE(AND_CHECK_NOT_CHAR) { | |
| 402 uint32_t c = (insn >> BYTECODE_SHIFT); | |
| 403 if (c != (current_char & Load32Aligned(pc + 4))) { | |
| 404 pc = code_base + Load32Aligned(pc + 8); | |
| 405 } else { | |
| 406 pc += BC_AND_CHECK_NOT_CHAR_LENGTH; | |
| 407 } | |
| 408 break; | |
| 409 } | |
| 410 BYTECODE(MINUS_AND_CHECK_NOT_CHAR) { | |
| 411 uint32_t c = (insn >> BYTECODE_SHIFT); | |
| 412 uint32_t minus = Load16Aligned(pc + 4); | |
| 413 uint32_t mask = Load16Aligned(pc + 6); | |
| 414 if (c != ((current_char - minus) & mask)) { | |
| 415 pc = code_base + Load32Aligned(pc + 8); | |
| 416 } else { | |
| 417 pc += BC_MINUS_AND_CHECK_NOT_CHAR_LENGTH; | |
| 418 } | |
| 419 break; | |
| 420 } | |
| 421 BYTECODE(CHECK_CHAR_IN_RANGE) { | |
| 422 uint32_t from = Load16Aligned(pc + 4); | |
| 423 uint32_t to = Load16Aligned(pc + 6); | |
| 424 if (from <= current_char && current_char <= to) { | |
| 425 pc = code_base + Load32Aligned(pc + 8); | |
| 426 } else { | |
| 427 pc += BC_CHECK_CHAR_IN_RANGE_LENGTH; | |
| 428 } | |
| 429 break; | |
| 430 } | |
| 431 BYTECODE(CHECK_CHAR_NOT_IN_RANGE) { | |
| 432 uint32_t from = Load16Aligned(pc + 4); | |
| 433 uint32_t to = Load16Aligned(pc + 6); | |
| 434 if (from > current_char || current_char > to) { | |
| 435 pc = code_base + Load32Aligned(pc + 8); | |
| 436 } else { | |
| 437 pc += BC_CHECK_CHAR_NOT_IN_RANGE_LENGTH; | |
| 438 } | |
| 439 break; | |
| 440 } | |
| 441 BYTECODE(CHECK_BIT_IN_TABLE) { | |
| 442 int mask = RegExpMacroAssembler::kTableMask; | |
| 443 byte b = pc[8 + ((current_char & mask) >> kBitsPerByteLog2)]; | |
| 444 int bit = (current_char & (kBitsPerByte - 1)); | |
| 445 if ((b & (1 << bit)) != 0) { | |
| 446 pc = code_base + Load32Aligned(pc + 4); | |
| 447 } else { | |
| 448 pc += BC_CHECK_BIT_IN_TABLE_LENGTH; | |
| 449 } | |
| 450 break; | |
| 451 } | |
| 452 BYTECODE(CHECK_LT) { | |
| 453 uint32_t limit = (insn >> BYTECODE_SHIFT); | |
| 454 if (current_char < limit) { | |
| 455 pc = code_base + Load32Aligned(pc + 4); | |
| 456 } else { | |
| 457 pc += BC_CHECK_LT_LENGTH; | |
| 458 } | |
| 459 break; | |
| 460 } | |
| 461 BYTECODE(CHECK_GT) { | |
| 462 uint32_t limit = (insn >> BYTECODE_SHIFT); | |
| 463 if (current_char > limit) { | |
| 464 pc = code_base + Load32Aligned(pc + 4); | |
| 465 } else { | |
| 466 pc += BC_CHECK_GT_LENGTH; | |
| 467 } | |
| 468 break; | |
| 469 } | |
| 470 BYTECODE(CHECK_REGISTER_LT) | |
| 471 if (registers[insn >> BYTECODE_SHIFT] < Load32Aligned(pc + 4)) { | |
| 472 pc = code_base + Load32Aligned(pc + 8); | |
| 473 } else { | |
| 474 pc += BC_CHECK_REGISTER_LT_LENGTH; | |
| 475 } | |
| 476 break; | |
| 477 BYTECODE(CHECK_REGISTER_GE) | |
| 478 if (registers[insn >> BYTECODE_SHIFT] >= Load32Aligned(pc + 4)) { | |
| 479 pc = code_base + Load32Aligned(pc + 8); | |
| 480 } else { | |
| 481 pc += BC_CHECK_REGISTER_GE_LENGTH; | |
| 482 } | |
| 483 break; | |
| 484 BYTECODE(CHECK_REGISTER_EQ_POS) | |
| 485 if (registers[insn >> BYTECODE_SHIFT] == current) { | |
| 486 pc = code_base + Load32Aligned(pc + 4); | |
| 487 } else { | |
| 488 pc += BC_CHECK_REGISTER_EQ_POS_LENGTH; | |
| 489 } | |
| 490 break; | |
| 491 BYTECODE(CHECK_NOT_REGS_EQUAL) | |
| 492 if (registers[insn >> BYTECODE_SHIFT] == | |
| 493 registers[Load32Aligned(pc + 4)]) { | |
| 494 pc += BC_CHECK_NOT_REGS_EQUAL_LENGTH; | |
| 495 } else { | |
| 496 pc = code_base + Load32Aligned(pc + 8); | |
| 497 } | |
| 498 break; | |
| 499 BYTECODE(CHECK_NOT_BACK_REF) { | |
| 500 int from = registers[insn >> BYTECODE_SHIFT]; | |
| 501 int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from; | |
| 502 if (from < 0 || len <= 0) { | |
| 503 pc += BC_CHECK_NOT_BACK_REF_LENGTH; | |
| 504 break; | |
| 505 } | |
| 506 if (current + len > subject.length()) { | |
| 507 pc = code_base + Load32Aligned(pc + 4); | |
| 508 break; | |
| 509 } else { | |
| 510 int i; | |
| 511 for (i = 0; i < len; i++) { | |
| 512 if (subject[from + i] != subject[current + i]) { | |
| 513 pc = code_base + Load32Aligned(pc + 4); | |
| 514 break; | |
| 515 } | |
| 516 } | |
| 517 if (i < len) break; | |
| 518 current += len; | |
| 519 } | |
| 520 pc += BC_CHECK_NOT_BACK_REF_LENGTH; | |
| 521 break; | |
| 522 } | |
| 523 BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) { | |
| 524 int from = registers[insn >> BYTECODE_SHIFT]; | |
| 525 int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from; | |
| 526 if (from < 0 || len <= 0) { | |
| 527 pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH; | |
| 528 break; | |
| 529 } | |
| 530 if (current + len > subject.length()) { | |
| 531 pc = code_base + Load32Aligned(pc + 4); | |
| 532 break; | |
| 533 } else { | |
| 534 if (BackRefMatchesNoCase(isolate->interp_canonicalize_mapping(), | |
| 535 from, current, len, subject)) { | |
| 536 current += len; | |
| 537 pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH; | |
| 538 } else { | |
| 539 pc = code_base + Load32Aligned(pc + 4); | |
| 540 } | |
| 541 } | |
| 542 break; | |
| 543 } | |
| 544 BYTECODE(CHECK_AT_START) | |
| 545 if (current == 0) { | |
| 546 pc = code_base + Load32Aligned(pc + 4); | |
| 547 } else { | |
| 548 pc += BC_CHECK_AT_START_LENGTH; | |
| 549 } | |
| 550 break; | |
| 551 BYTECODE(CHECK_NOT_AT_START) | |
| 552 if (current == 0) { | |
| 553 pc += BC_CHECK_NOT_AT_START_LENGTH; | |
| 554 } else { | |
| 555 pc = code_base + Load32Aligned(pc + 4); | |
| 556 } | |
| 557 break; | |
| 558 BYTECODE(SET_CURRENT_POSITION_FROM_END) { | |
| 559 int by = static_cast<uint32_t>(insn) >> BYTECODE_SHIFT; | |
| 560 if (subject.length() - current > by) { | |
| 561 current = subject.length() - by; | |
| 562 current_char = subject[current - 1]; | |
| 563 } | |
| 564 pc += BC_SET_CURRENT_POSITION_FROM_END_LENGTH; | |
| 565 break; | |
| 566 } | |
| 567 default: | |
| 568 UNREACHABLE(); | |
| 569 break; | |
| 570 } | |
| 571 } | |
| 572 } | |
| 573 | |
| 574 | |
| 575 RegExpImpl::IrregexpResult IrregexpInterpreter::Match( | |
| 576 Isolate* isolate, | |
| 577 Handle<ByteArray> code_array, | |
| 578 Handle<String> subject, | |
| 579 int* registers, | |
| 580 int start_position) { | |
| 581 DCHECK(subject->IsFlat()); | |
| 582 | |
| 583 DisallowHeapAllocation no_gc; | |
| 584 const byte* code_base = code_array->GetDataStartAddress(); | |
| 585 uc16 previous_char = '\n'; | |
| 586 String::FlatContent subject_content = subject->GetFlatContent(); | |
| 587 if (subject_content.IsOneByte()) { | |
| 588 Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector(); | |
| 589 if (start_position != 0) previous_char = subject_vector[start_position - 1]; | |
| 590 return RawMatch(isolate, | |
| 591 code_base, | |
| 592 subject_vector, | |
| 593 registers, | |
| 594 start_position, | |
| 595 previous_char); | |
| 596 } else { | |
| 597 DCHECK(subject_content.IsTwoByte()); | |
| 598 Vector<const uc16> subject_vector = subject_content.ToUC16Vector(); | |
| 599 if (start_position != 0) previous_char = subject_vector[start_position - 1]; | |
| 600 return RawMatch(isolate, | |
| 601 code_base, | |
| 602 subject_vector, | |
| 603 registers, | |
| 604 start_position, | |
| 605 previous_char); | |
| 606 } | |
| 607 } | |
| 608 | |
| 609 } // namespace internal | |
| 610 } // namespace v8 | |
| OLD | NEW |