| OLD | NEW |
| 1 // Copyright 2006-2009 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2009 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 126 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 137 // Parse-tree is a single atom that is equal to the pattern. | 137 // Parse-tree is a single atom that is equal to the pattern. |
| 138 AtomCompile(re, pattern, flags, pattern); | 138 AtomCompile(re, pattern, flags, pattern); |
| 139 } else if (parse_result.tree->IsAtom() && | 139 } else if (parse_result.tree->IsAtom() && |
| 140 !flags.is_ignore_case() && | 140 !flags.is_ignore_case() && |
| 141 parse_result.capture_count == 0) { | 141 parse_result.capture_count == 0) { |
| 142 RegExpAtom* atom = parse_result.tree->AsAtom(); | 142 RegExpAtom* atom = parse_result.tree->AsAtom(); |
| 143 Vector<const uc16> atom_pattern = atom->data(); | 143 Vector<const uc16> atom_pattern = atom->data(); |
| 144 Handle<String> atom_string = Factory::NewStringFromTwoByte(atom_pattern); | 144 Handle<String> atom_string = Factory::NewStringFromTwoByte(atom_pattern); |
| 145 AtomCompile(re, pattern, flags, atom_string); | 145 AtomCompile(re, pattern, flags, atom_string); |
| 146 } else { | 146 } else { |
| 147 IrregexpPrepare(re, pattern, flags, parse_result.capture_count); | 147 IrregexpInitialize(re, pattern, flags, parse_result.capture_count); |
| 148 } | 148 } |
| 149 ASSERT(re->data()->IsFixedArray()); | 149 ASSERT(re->data()->IsFixedArray()); |
| 150 // Compilation succeeded so the data is set on the regexp | 150 // Compilation succeeded so the data is set on the regexp |
| 151 // and we can store it in the cache. | 151 // and we can store it in the cache. |
| 152 Handle<FixedArray> data(FixedArray::cast(re->data())); | 152 Handle<FixedArray> data(FixedArray::cast(re->data())); |
| 153 CompilationCache::PutRegExp(pattern, flags, data); | 153 CompilationCache::PutRegExp(pattern, flags, data); |
| 154 | 154 |
| 155 return re; | 155 return re; |
| 156 } | 156 } |
| 157 | 157 |
| (...skipping 171 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 329 ByteArray* RegExpImpl::IrregexpByteCode(FixedArray* re, bool is_ascii) { | 329 ByteArray* RegExpImpl::IrregexpByteCode(FixedArray* re, bool is_ascii) { |
| 330 return ByteArray::cast(re->get(JSRegExp::code_index(is_ascii))); | 330 return ByteArray::cast(re->get(JSRegExp::code_index(is_ascii))); |
| 331 } | 331 } |
| 332 | 332 |
| 333 | 333 |
| 334 Code* RegExpImpl::IrregexpNativeCode(FixedArray* re, bool is_ascii) { | 334 Code* RegExpImpl::IrregexpNativeCode(FixedArray* re, bool is_ascii) { |
| 335 return Code::cast(re->get(JSRegExp::code_index(is_ascii))); | 335 return Code::cast(re->get(JSRegExp::code_index(is_ascii))); |
| 336 } | 336 } |
| 337 | 337 |
| 338 | 338 |
| 339 void RegExpImpl::IrregexpPrepare(Handle<JSRegExp> re, | 339 void RegExpImpl::IrregexpInitialize(Handle<JSRegExp> re, |
| 340 Handle<String> pattern, | 340 Handle<String> pattern, |
| 341 JSRegExp::Flags flags, | 341 JSRegExp::Flags flags, |
| 342 int capture_count) { | 342 int capture_count) { |
| 343 // Initialize compiled code entries to null. | 343 // Initialize compiled code entries to null. |
| 344 Factory::SetRegExpIrregexpData(re, | 344 Factory::SetRegExpIrregexpData(re, |
| 345 JSRegExp::IRREGEXP, | 345 JSRegExp::IRREGEXP, |
| 346 pattern, | 346 pattern, |
| 347 flags, | 347 flags, |
| 348 capture_count); | 348 capture_count); |
| 349 } | 349 } |
| 350 | 350 |
| 351 | 351 |
| 352 int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp, |
| 353 Handle<String> subject) { |
| 354 if (!subject->IsFlat()) { |
| 355 FlattenString(subject); |
| 356 } |
| 357 bool is_ascii = subject->IsAsciiRepresentation(); |
| 358 if (!EnsureCompiledIrregexp(regexp, is_ascii)) { |
| 359 return -1; |
| 360 } |
| 361 #ifdef V8_NATIVE_REGEXP |
| 362 // Native regexp only needs room to output captures. Registers are handled |
| 363 // internally. |
| 364 return (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2; |
| 365 #else // !V8_NATIVE_REGEXP |
| 366 // Byte-code regexp needs space allocated for all its registers. |
| 367 return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data())); |
| 368 #endif // V8_NATIVE_REGEXP |
| 369 } |
| 370 |
| 371 |
| 372 RegExpImpl::IrregexpResult RegExpImpl::IrregexpExecOnce(Handle<JSRegExp> regexp, |
| 373 Handle<String> subject, |
| 374 int index, |
| 375 Vector<int> output) { |
| 376 Handle<FixedArray> irregexp(FixedArray::cast(regexp->data())); |
| 377 |
| 378 ASSERT(index >= 0); |
| 379 ASSERT(index <= subject->length()); |
| 380 ASSERT(subject->IsFlat()); |
| 381 |
| 382 #ifdef V8_NATIVE_REGEXP |
| 383 ASSERT(output.length() >= |
| 384 (IrregexpNumberOfCaptures(*irregexp) + 1) * 2); |
| 385 do { |
| 386 bool is_ascii = subject->IsAsciiRepresentation(); |
| 387 Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii)); |
| 388 NativeRegExpMacroAssembler::Result res = |
| 389 NativeRegExpMacroAssembler::Match(code, |
| 390 subject, |
| 391 output.start(), |
| 392 output.length(), |
| 393 index); |
| 394 if (res != NativeRegExpMacroAssembler::RETRY) { |
| 395 ASSERT(res != NativeRegExpMacroAssembler::EXCEPTION || |
| 396 Top::has_pending_exception()); |
| 397 STATIC_ASSERT( |
| 398 static_cast<int>(NativeRegExpMacroAssembler::SUCCESS) == RE_SUCCESS); |
| 399 STATIC_ASSERT( |
| 400 static_cast<int>(NativeRegExpMacroAssembler::FAILURE) == RE_FAILURE); |
| 401 STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::EXCEPTION) |
| 402 == RE_EXCEPTION); |
| 403 return static_cast<IrregexpResult>(res); |
| 404 } |
| 405 // If result is RETRY, the string has changed representation, and we |
| 406 // must restart from scratch. |
| 407 // In this case, it means we must make sure we are prepared to handle |
| 408 // the, potentially, differen subject (the string can switch between |
| 409 // being internal and external, and even between being ASCII and UC16, |
| 410 // but the characters are always the same). |
| 411 IrregexpPrepare(regexp, subject); |
| 412 } while (true); |
| 413 UNREACHABLE(); |
| 414 return RE_EXCEPTION; |
| 415 #else // ndef V8_NATIVE_REGEXP |
| 416 |
| 417 ASSERT(output.length() >= IrregexpNumberOfRegisters(*irregexp)); |
| 418 bool is_ascii = subject->IsAsciiRepresentation(); |
| 419 // We must have done EnsureCompiledIrregexp, so we can get the number of |
| 420 // registers. |
| 421 int* register_vector = output.start(); |
| 422 int number_of_capture_registers = |
| 423 (IrregexpNumberOfCaptures(*irregexp) + 1) * 2; |
| 424 for (int i = number_of_capture_registers - 1; i >= 0; i--) { |
| 425 register_vector[i] = -1; |
| 426 } |
| 427 Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_ascii)); |
| 428 |
| 429 if (IrregexpInterpreter::Match(byte_codes, |
| 430 subject, |
| 431 register_vector, |
| 432 index)) { |
| 433 return RE_SUCCESS; |
| 434 } |
| 435 return RE_FAILURE; |
| 436 #endif // ndef V8_NATIVE_REGEXP |
| 437 } |
| 438 |
| 439 |
| 352 Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> jsregexp, | 440 Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> jsregexp, |
| 353 Handle<String> subject, | 441 Handle<String> subject, |
| 354 int previous_index, | 442 int previous_index, |
| 355 Handle<JSArray> last_match_info) { | 443 Handle<JSArray> last_match_info) { |
| 356 ASSERT_EQ(jsregexp->TypeTag(), JSRegExp::IRREGEXP); | 444 ASSERT_EQ(jsregexp->TypeTag(), JSRegExp::IRREGEXP); |
| 357 | 445 |
| 358 // Prepare space for the return values. | 446 // Prepare space for the return values. |
| 359 int number_of_capture_registers = | |
| 360 (IrregexpNumberOfCaptures(FixedArray::cast(jsregexp->data())) + 1) * 2; | |
| 361 | |
| 362 #ifndef V8_NATIVE_REGEXP | 447 #ifndef V8_NATIVE_REGEXP |
| 363 #ifdef DEBUG | 448 #ifdef DEBUG |
| 364 if (FLAG_trace_regexp_bytecodes) { | 449 if (FLAG_trace_regexp_bytecodes) { |
| 365 String* pattern = jsregexp->Pattern(); | 450 String* pattern = jsregexp->Pattern(); |
| 366 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); | 451 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); |
| 367 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); | 452 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); |
| 368 } | 453 } |
| 369 #endif | 454 #endif |
| 370 #endif | 455 #endif |
| 371 | 456 int required_registers = RegExpImpl::IrregexpPrepare(jsregexp, subject); |
| 372 if (!subject->IsFlat()) { | 457 if (required_registers < 0) { |
| 373 FlattenString(subject); | 458 // Compiling failed with an exception. |
| 374 } | |
| 375 | |
| 376 last_match_info->EnsureSize(number_of_capture_registers + kLastMatchOverhead); | |
| 377 | |
| 378 Handle<FixedArray> array; | |
| 379 | |
| 380 // Dispatch to the correct RegExp implementation. | |
| 381 Handle<FixedArray> regexp(FixedArray::cast(jsregexp->data())); | |
| 382 | |
| 383 #ifdef V8_NATIVE_REGEXP | |
| 384 | |
| 385 OffsetsVector captures(number_of_capture_registers); | |
| 386 int* captures_vector = captures.vector(); | |
| 387 NativeRegExpMacroAssembler::Result res; | |
| 388 do { | |
| 389 bool is_ascii = subject->IsAsciiRepresentation(); | |
| 390 if (!EnsureCompiledIrregexp(jsregexp, is_ascii)) { | |
| 391 return Handle<Object>::null(); | |
| 392 } | |
| 393 Handle<Code> code(RegExpImpl::IrregexpNativeCode(*regexp, is_ascii)); | |
| 394 res = NativeRegExpMacroAssembler::Match(code, | |
| 395 subject, | |
| 396 captures_vector, | |
| 397 captures.length(), | |
| 398 previous_index); | |
| 399 // If result is RETRY, the string have changed representation, and we | |
| 400 // must restart from scratch. | |
| 401 } while (res == NativeRegExpMacroAssembler::RETRY); | |
| 402 if (res == NativeRegExpMacroAssembler::EXCEPTION) { | |
| 403 ASSERT(Top::has_pending_exception()); | 459 ASSERT(Top::has_pending_exception()); |
| 404 return Handle<Object>::null(); | 460 return Handle<Object>::null(); |
| 405 } | 461 } |
| 406 ASSERT(res == NativeRegExpMacroAssembler::SUCCESS | |
| 407 || res == NativeRegExpMacroAssembler::FAILURE); | |
| 408 | 462 |
| 409 if (res != NativeRegExpMacroAssembler::SUCCESS) return Factory::null_value(); | 463 OffsetsVector registers(required_registers); |
| 410 | 464 |
| 411 array = Handle<FixedArray>(FixedArray::cast(last_match_info->elements())); | 465 IrregexpResult res = IrregexpExecOnce(jsregexp, |
| 412 ASSERT(array->length() >= number_of_capture_registers + kLastMatchOverhead); | 466 subject, |
| 413 // The captures come in (start, end+1) pairs. | 467 previous_index, |
| 414 for (int i = 0; i < number_of_capture_registers; i += 2) { | 468 Vector<int>(registers.vector(), |
| 415 // Capture values are relative to start_offset only. | 469 registers.length())); |
| 416 // Convert them to be relative to start of string. | 470 if (res == RE_SUCCESS) { |
| 417 if (captures_vector[i] >= 0) { | 471 int capture_register_count = |
| 418 captures_vector[i] += previous_index; | 472 (IrregexpNumberOfCaptures(FixedArray::cast(jsregexp->data())) + 1) * 2; |
| 473 last_match_info->EnsureSize(capture_register_count + kLastMatchOverhead); |
| 474 AssertNoAllocation no_gc; |
| 475 int* register_vector = registers.vector(); |
| 476 FixedArray* array = FixedArray::cast(last_match_info->elements()); |
| 477 for (int i = 0; i < capture_register_count; i += 2) { |
| 478 SetCapture(array, i, register_vector[i]); |
| 479 SetCapture(array, i + 1, register_vector[i + 1]); |
| 419 } | 480 } |
| 420 if (captures_vector[i + 1] >= 0) { | 481 SetLastCaptureCount(array, capture_register_count); |
| 421 captures_vector[i + 1] += previous_index; | 482 SetLastSubject(array, *subject); |
| 422 } | 483 SetLastInput(array, *subject); |
| 423 SetCapture(*array, i, captures_vector[i]); | 484 return last_match_info; |
| 424 SetCapture(*array, i + 1, captures_vector[i + 1]); | |
| 425 } | 485 } |
| 426 | 486 if (res == RE_EXCEPTION) { |
| 427 #else // ! V8_NATIVE_REGEXP | 487 ASSERT(Top::has_pending_exception()); |
| 428 | |
| 429 bool is_ascii = subject->IsAsciiRepresentation(); | |
| 430 if (!EnsureCompiledIrregexp(jsregexp, is_ascii)) { | |
| 431 return Handle<Object>::null(); | 488 return Handle<Object>::null(); |
| 432 } | 489 } |
| 433 // Now that we have done EnsureCompiledIrregexp we can get the number of | 490 ASSERT(res == RE_FAILURE); |
| 434 // registers. | 491 return Factory::null_value(); |
| 435 int number_of_registers = | |
| 436 IrregexpNumberOfRegisters(FixedArray::cast(jsregexp->data())); | |
| 437 OffsetsVector registers(number_of_registers); | |
| 438 int* register_vector = registers.vector(); | |
| 439 for (int i = number_of_capture_registers - 1; i >= 0; i--) { | |
| 440 register_vector[i] = -1; | |
| 441 } | |
| 442 Handle<ByteArray> byte_codes(IrregexpByteCode(*regexp, is_ascii)); | |
| 443 | |
| 444 if (!IrregexpInterpreter::Match(byte_codes, | |
| 445 subject, | |
| 446 register_vector, | |
| 447 previous_index)) { | |
| 448 return Factory::null_value(); | |
| 449 } | |
| 450 | |
| 451 array = Handle<FixedArray>(FixedArray::cast(last_match_info->elements())); | |
| 452 ASSERT(array->length() >= number_of_capture_registers + kLastMatchOverhead); | |
| 453 // The captures come in (start, end+1) pairs. | |
| 454 for (int i = 0; i < number_of_capture_registers; i += 2) { | |
| 455 SetCapture(*array, i, register_vector[i]); | |
| 456 SetCapture(*array, i + 1, register_vector[i + 1]); | |
| 457 } | |
| 458 | |
| 459 #endif // V8_NATIVE_REGEXP | |
| 460 | |
| 461 SetLastCaptureCount(*array, number_of_capture_registers); | |
| 462 SetLastSubject(*array, *subject); | |
| 463 SetLastInput(*array, *subject); | |
| 464 | |
| 465 return last_match_info; | |
| 466 } | 492 } |
| 467 | 493 |
| 468 | 494 |
| 469 // ------------------------------------------------------------------- | 495 // ------------------------------------------------------------------- |
| 470 // Implementation of the Irregexp regular expression engine. | 496 // Implementation of the Irregexp regular expression engine. |
| 471 // | 497 // |
| 472 // The Irregexp regular expression engine is intended to be a complete | 498 // The Irregexp regular expression engine is intended to be a complete |
| 473 // implementation of ECMAScript regular expressions. It generates either | 499 // implementation of ECMAScript regular expressions. It generates either |
| 474 // bytecodes or native code. | 500 // bytecodes or native code. |
| 475 | 501 |
| (...skipping 4744 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5220 node, | 5246 node, |
| 5221 data->capture_count, | 5247 data->capture_count, |
| 5222 pattern); | 5248 pattern); |
| 5223 } | 5249 } |
| 5224 | 5250 |
| 5225 | 5251 |
| 5226 int OffsetsVector::static_offsets_vector_[ | 5252 int OffsetsVector::static_offsets_vector_[ |
| 5227 OffsetsVector::kStaticOffsetsVectorSize]; | 5253 OffsetsVector::kStaticOffsetsVectorSize]; |
| 5228 | 5254 |
| 5229 }} // namespace v8::internal | 5255 }} // namespace v8::internal |
| OLD | NEW |