| OLD | NEW |
| 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 22 matching lines...) Expand all Loading... |
| 33 #include "ast.h" | 33 #include "ast.h" |
| 34 #include "execution.h" | 34 #include "execution.h" |
| 35 #include "factory.h" | 35 #include "factory.h" |
| 36 #include "jsregexp-inl.h" | 36 #include "jsregexp-inl.h" |
| 37 #include "platform.h" | 37 #include "platform.h" |
| 38 #include "runtime.h" | 38 #include "runtime.h" |
| 39 #include "top.h" | 39 #include "top.h" |
| 40 #include "compilation-cache.h" | 40 #include "compilation-cache.h" |
| 41 #include "string-stream.h" | 41 #include "string-stream.h" |
| 42 #include "parser.h" | 42 #include "parser.h" |
| 43 #include "assembler-re2k.h" | 43 #include "assembler-irregexp.h" |
| 44 #include "regexp-macro-assembler.h" | 44 #include "regexp-macro-assembler.h" |
| 45 #include "regexp-macro-assembler-re2k.h" | 45 #include "regexp-macro-assembler-irregexp.h" |
| 46 #if defined __arm__ || defined __thumb__ || defined ARM | 46 #if defined __arm__ || defined __thumb__ || defined ARM |
| 47 // include regexp-macro-assembler-arm.h when created. | 47 // include regexp-macro-assembler-arm.h when created. |
| 48 #else // ia32 | 48 #else // ia32 |
| 49 #include "regexp-macro-assembler-ia32.h" | 49 #include "regexp-macro-assembler-ia32.h" |
| 50 #endif | 50 #endif |
| 51 #include "interpreter-re2k.h" | 51 #include "interpreter-irregexp.h" |
| 52 | 52 |
| 53 // Including pcre.h undefines DEBUG to avoid getting debug output from | 53 // Including pcre.h undefines DEBUG to avoid getting debug output from |
| 54 // the JSCRE implementation. Make sure to redefine it in debug mode | 54 // the JSCRE implementation. Make sure to redefine it in debug mode |
| 55 // after having included the header file. | 55 // after having included the header file. |
| 56 #ifdef DEBUG | 56 #ifdef DEBUG |
| 57 #include "third_party/jscre/pcre.h" | 57 #include "third_party/jscre/pcre.h" |
| 58 #define DEBUG | 58 #define DEBUG |
| 59 #else | 59 #else |
| 60 #include "third_party/jscre/pcre.h" | 60 #include "third_party/jscre/pcre.h" |
| 61 #endif | 61 #endif |
| (...skipping 166 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 228 if (parse_result.has_character_escapes) { | 228 if (parse_result.has_character_escapes) { |
| 229 Vector<const uc16> atom_pattern = atom->data(); | 229 Vector<const uc16> atom_pattern = atom->data(); |
| 230 Handle<String> atom_string = | 230 Handle<String> atom_string = |
| 231 Factory::NewStringFromTwoByte(atom_pattern); | 231 Factory::NewStringFromTwoByte(atom_pattern); |
| 232 result = AtomCompile(re, pattern, flags, atom_string); | 232 result = AtomCompile(re, pattern, flags, atom_string); |
| 233 } else { | 233 } else { |
| 234 result = AtomCompile(re, pattern, flags, pattern); | 234 result = AtomCompile(re, pattern, flags, pattern); |
| 235 } | 235 } |
| 236 } else { | 236 } else { |
| 237 RegExpNode* node = NULL; | 237 RegExpNode* node = NULL; |
| 238 Handle<FixedArray> re2k_data = | 238 Handle<FixedArray> irregexp_data = |
| 239 RegExpEngine::Compile(&parse_result, | 239 RegExpEngine::Compile(&parse_result, |
| 240 &node, | 240 &node, |
| 241 flags.is_ignore_case()); | 241 flags.is_ignore_case()); |
| 242 if (re2k_data.is_null()) { | 242 if (irregexp_data.is_null()) { |
| 243 result = JscrePrepare(re, pattern, flags); | 243 result = JscrePrepare(re, pattern, flags); |
| 244 } else { | 244 } else { |
| 245 result = Re2kPrepare(re, pattern, flags, re2k_data); | 245 result = IrregexpPrepare(re, pattern, flags, irregexp_data); |
| 246 } | 246 } |
| 247 } | 247 } |
| 248 Object* data = re->data(); | 248 Object* data = re->data(); |
| 249 if (data->IsFixedArray()) { | 249 if (data->IsFixedArray()) { |
| 250 // If compilation succeeded then the data is set on the regexp | 250 // If compilation succeeded then the data is set on the regexp |
| 251 // and we can store it in the cache. | 251 // and we can store it in the cache. |
| 252 Handle<FixedArray> data(FixedArray::cast(re->data())); | 252 Handle<FixedArray> data(FixedArray::cast(re->data())); |
| 253 CompilationCache::PutRegExp(pattern, flags, data); | 253 CompilationCache::PutRegExp(pattern, flags, data); |
| 254 } | 254 } |
| 255 } | 255 } |
| 256 | 256 |
| 257 LOG(RegExpCompileEvent(re, in_cache)); | 257 LOG(RegExpCompileEvent(re, in_cache)); |
| 258 return result; | 258 return result; |
| 259 } | 259 } |
| 260 | 260 |
| 261 | 261 |
| 262 Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp, | 262 Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp, |
| 263 Handle<String> subject, | 263 Handle<String> subject, |
| 264 Handle<Object> index) { | 264 Handle<Object> index) { |
| 265 switch (regexp->TypeTag()) { | 265 switch (regexp->TypeTag()) { |
| 266 case JSRegExp::JSCRE: | 266 case JSRegExp::JSCRE: |
| 267 return JscreExec(regexp, subject, index); | 267 return JscreExec(regexp, subject, index); |
| 268 case JSRegExp::ATOM: | 268 case JSRegExp::ATOM: |
| 269 return AtomExec(regexp, subject, index); | 269 return AtomExec(regexp, subject, index); |
| 270 case JSRegExp::RE2K: | 270 case JSRegExp::IRREGEXP: |
| 271 return Re2kExec(regexp, subject, index); | 271 return IrregexpExec(regexp, subject, index); |
| 272 default: | 272 default: |
| 273 UNREACHABLE(); | 273 UNREACHABLE(); |
| 274 return Handle<Object>(); | 274 return Handle<Object>(); |
| 275 } | 275 } |
| 276 } | 276 } |
| 277 | 277 |
| 278 | 278 |
| 279 Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp, | 279 Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp, |
| 280 Handle<String> subject) { | 280 Handle<String> subject) { |
| 281 switch (regexp->TypeTag()) { | 281 switch (regexp->TypeTag()) { |
| 282 case JSRegExp::JSCRE: | 282 case JSRegExp::JSCRE: |
| 283 return JscreExecGlobal(regexp, subject); | 283 return JscreExecGlobal(regexp, subject); |
| 284 case JSRegExp::ATOM: | 284 case JSRegExp::ATOM: |
| 285 return AtomExecGlobal(regexp, subject); | 285 return AtomExecGlobal(regexp, subject); |
| 286 case JSRegExp::RE2K: | 286 case JSRegExp::IRREGEXP: |
| 287 return Re2kExecGlobal(regexp, subject); | 287 return IrregexpExecGlobal(regexp, subject); |
| 288 default: | 288 default: |
| 289 UNREACHABLE(); | 289 UNREACHABLE(); |
| 290 return Handle<Object>(); | 290 return Handle<Object>(); |
| 291 } | 291 } |
| 292 } | 292 } |
| 293 | 293 |
| 294 | 294 |
| 295 Handle<Object> RegExpImpl::AtomCompile(Handle<JSRegExp> re, | 295 Handle<Object> RegExpImpl::AtomCompile(Handle<JSRegExp> re, |
| 296 Handle<String> pattern, | 296 Handle<String> pattern, |
| 297 JSRegExp::Flags flags, | 297 JSRegExp::Flags flags, |
| (...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 355 | 355 |
| 356 Handle<Object>RegExpImpl::JscrePrepare(Handle<JSRegExp> re, | 356 Handle<Object>RegExpImpl::JscrePrepare(Handle<JSRegExp> re, |
| 357 Handle<String> pattern, | 357 Handle<String> pattern, |
| 358 JSRegExp::Flags flags) { | 358 JSRegExp::Flags flags) { |
| 359 Handle<Object> value(Heap::undefined_value()); | 359 Handle<Object> value(Heap::undefined_value()); |
| 360 Factory::SetRegExpData(re, JSRegExp::JSCRE, pattern, flags, value); | 360 Factory::SetRegExpData(re, JSRegExp::JSCRE, pattern, flags, value); |
| 361 return re; | 361 return re; |
| 362 } | 362 } |
| 363 | 363 |
| 364 | 364 |
| 365 Handle<Object>RegExpImpl::Re2kPrepare(Handle<JSRegExp> re, | 365 Handle<Object>RegExpImpl::IrregexpPrepare(Handle<JSRegExp> re, |
| 366 Handle<String> pattern, | 366 Handle<String> pattern, |
| 367 JSRegExp::Flags flags, | 367 JSRegExp::Flags flags, |
| 368 Handle<FixedArray> re2k_data) { | 368 Handle<FixedArray> irregexp_data) { |
| 369 Factory::SetRegExpData(re, JSRegExp::RE2K, pattern, flags, re2k_data); | 369 Factory::SetRegExpData(re, JSRegExp::IRREGEXP, pattern, flags, irregexp_data); |
| 370 return re; | 370 return re; |
| 371 } | 371 } |
| 372 | 372 |
| 373 | 373 |
| 374 static inline Object* DoCompile(String* pattern, | 374 static inline Object* DoCompile(String* pattern, |
| 375 JSRegExp::Flags flags, | 375 JSRegExp::Flags flags, |
| 376 unsigned* number_of_captures, | 376 unsigned* number_of_captures, |
| 377 const char** error_message, | 377 const char** error_message, |
| 378 JscreRegExp** code) { | 378 JscreRegExp** code) { |
| 379 JSRegExpIgnoreCaseOption case_option = flags.is_ignore_case() | 379 JSRegExpIgnoreCaseOption case_option = flags.is_ignore_case() |
| (...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 456 | 456 |
| 457 Handle<FixedArray> value = Factory::NewFixedArray(kJscreDataLength); | 457 Handle<FixedArray> value = Factory::NewFixedArray(kJscreDataLength); |
| 458 value->set(kJscreNumberOfCapturesIndex, Smi::FromInt(number_of_captures)); | 458 value->set(kJscreNumberOfCapturesIndex, Smi::FromInt(number_of_captures)); |
| 459 value->set(kJscreInternalIndex, *internal); | 459 value->set(kJscreInternalIndex, *internal); |
| 460 Factory::SetRegExpData(re, JSRegExp::JSCRE, pattern, flags, value); | 460 Factory::SetRegExpData(re, JSRegExp::JSCRE, pattern, flags, value); |
| 461 | 461 |
| 462 return re; | 462 return re; |
| 463 } | 463 } |
| 464 | 464 |
| 465 | 465 |
| 466 Handle<Object> RegExpImpl::Re2kExecOnce(Handle<JSRegExp> regexp, | 466 Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<JSRegExp> regexp, |
| 467 int num_captures, | 467 int num_captures, |
| 468 Handle<String> two_byte_subject, | 468 Handle<String> two_byte_subject, |
| 469 int previous_index, | 469 int previous_index, |
| 470 int* offsets_vector, | 470 int* offsets_vector, |
| 471 int offsets_vector_length) { | 471 int offsets_vector_length) { |
| 472 #ifdef DEBUG | 472 #ifdef DEBUG |
| 473 if (FLAG_trace_regexp_bytecodes) { | 473 if (FLAG_trace_regexp_bytecodes) { |
| 474 String* pattern = regexp->Pattern(); | 474 String* pattern = regexp->Pattern(); |
| 475 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); | 475 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); |
| 476 PrintF("\n\nSubject string: '%s'\n\n", *(two_byte_subject->ToCString())); | 476 PrintF("\n\nSubject string: '%s'\n\n", *(two_byte_subject->ToCString())); |
| 477 } | 477 } |
| 478 #endif | 478 #endif |
| 479 ASSERT(StringShape(*two_byte_subject).IsTwoByteRepresentation()); | 479 ASSERT(StringShape(*two_byte_subject).IsTwoByteRepresentation()); |
| 480 ASSERT(two_byte_subject->IsFlat(StringShape(*two_byte_subject))); | 480 ASSERT(two_byte_subject->IsFlat(StringShape(*two_byte_subject))); |
| 481 bool rc; | 481 bool rc; |
| 482 { | 482 { |
| 483 for (int i = (num_captures + 1) * 2 - 1; i >= 0; i--) { | 483 for (int i = (num_captures + 1) * 2 - 1; i >= 0; i--) { |
| 484 offsets_vector[i] = -1; | 484 offsets_vector[i] = -1; |
| 485 } | 485 } |
| 486 | 486 |
| 487 LOG(RegExpExecEvent(regexp, previous_index, two_byte_subject)); | 487 LOG(RegExpExecEvent(regexp, previous_index, two_byte_subject)); |
| 488 | 488 |
| 489 FixedArray* re2k = | 489 FixedArray* irregexp = |
| 490 FixedArray::cast(regexp->DataAt(JSRegExp::kRe2kDataIndex)); | 490 FixedArray::cast(regexp->DataAt(JSRegExp::kIrregexpDataIndex)); |
| 491 int tag = Smi::cast(re2k->get(kRe2kImplementationIndex))->value(); | 491 int tag = Smi::cast(irregexp->get(kIrregexpImplementationIndex))->value(); |
| 492 | 492 |
| 493 switch (tag) { | 493 switch (tag) { |
| 494 case RegExpMacroAssembler::kIA32Implementation: { | 494 case RegExpMacroAssembler::kIA32Implementation: { |
| 495 Code* code = Code::cast(re2k->get(kRe2kCodeIndex)); | 495 Code* code = Code::cast(irregexp->get(kIrregexpCodeIndex)); |
| 496 SmartPointer<int> captures(NewArray<int>((num_captures + 1) * 2)); | 496 SmartPointer<int> captures(NewArray<int>((num_captures + 1) * 2)); |
| 497 Address start_addr = | 497 Address start_addr = |
| 498 Handle<SeqTwoByteString>::cast(two_byte_subject)->GetCharsAddress(); | 498 Handle<SeqTwoByteString>::cast(two_byte_subject)->GetCharsAddress(); |
| 499 int start_offset = | 499 int start_offset = |
| 500 start_addr - reinterpret_cast<Address>(*two_byte_subject); | 500 start_addr - reinterpret_cast<Address>(*two_byte_subject); |
| 501 int end_offset = | 501 int end_offset = |
| 502 start_offset + (two_byte_subject->length() - previous_index) * 2; | 502 start_offset + (two_byte_subject->length() - previous_index) * 2; |
| 503 typedef bool testfunc(String**, int, int, int*); | 503 typedef bool testfunc(String**, int, int, int*); |
| 504 testfunc* test = FUNCTION_CAST<testfunc*>(code->entry()); | 504 testfunc* test = FUNCTION_CAST<testfunc*>(code->entry()); |
| 505 rc = test(two_byte_subject.location(), | 505 rc = test(two_byte_subject.location(), |
| 506 start_offset, | 506 start_offset, |
| 507 end_offset, | 507 end_offset, |
| 508 *captures); | 508 *captures); |
| 509 if (rc) { | 509 if (rc) { |
| 510 // Capture values are relative to start_offset only. | 510 // Capture values are relative to start_offset only. |
| 511 for (int i = 0; i < offsets_vector_length; i++) { | 511 for (int i = 0; i < offsets_vector_length; i++) { |
| 512 if (offsets_vector[i] >= 0) { | 512 if (offsets_vector[i] >= 0) { |
| 513 offsets_vector[i] += previous_index; | 513 offsets_vector[i] += previous_index; |
| 514 } | 514 } |
| 515 } | 515 } |
| 516 } | 516 } |
| 517 break; | 517 break; |
| 518 } | 518 } |
| 519 default: | 519 default: |
| 520 case RegExpMacroAssembler::kARMImplementation: | 520 case RegExpMacroAssembler::kARMImplementation: |
| 521 UNREACHABLE(); | 521 UNREACHABLE(); |
| 522 rc = false; | 522 rc = false; |
| 523 break; | 523 break; |
| 524 case RegExpMacroAssembler::kBytecodeImplementation: { | 524 case RegExpMacroAssembler::kBytecodeImplementation: { |
| 525 Handle<ByteArray> byte_codes = Re2kCode(regexp); | 525 Handle<ByteArray> byte_codes = IrregexpCode(regexp); |
| 526 | 526 |
| 527 rc = Re2kInterpreter::Match(byte_codes, | 527 rc = IrregexpInterpreter::Match(byte_codes, |
| 528 two_byte_subject, | 528 two_byte_subject, |
| 529 offsets_vector, | 529 offsets_vector, |
| 530 previous_index); | 530 previous_index); |
| 531 break; | 531 break; |
| 532 } | 532 } |
| 533 } | 533 } |
| 534 } | 534 } |
| 535 | 535 |
| 536 if (!rc) { | 536 if (!rc) { |
| 537 return Factory::null_value(); | 537 return Factory::null_value(); |
| 538 } | 538 } |
| 539 | 539 |
| 540 Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1)); | 540 Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1)); |
| (...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 632 int offsets_vector_length_; | 632 int offsets_vector_length_; |
| 633 static const int kStaticOffsetsVectorSize = 50; | 633 static const int kStaticOffsetsVectorSize = 50; |
| 634 static int static_offsets_vector_[kStaticOffsetsVectorSize]; | 634 static int static_offsets_vector_[kStaticOffsetsVectorSize]; |
| 635 }; | 635 }; |
| 636 | 636 |
| 637 | 637 |
| 638 int OffsetsVector::static_offsets_vector_[ | 638 int OffsetsVector::static_offsets_vector_[ |
| 639 OffsetsVector::kStaticOffsetsVectorSize]; | 639 OffsetsVector::kStaticOffsetsVectorSize]; |
| 640 | 640 |
| 641 | 641 |
| 642 Handle<Object> RegExpImpl::Re2kExec(Handle<JSRegExp> regexp, | 642 Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp, |
| 643 Handle<String> subject, | 643 Handle<String> subject, |
| 644 Handle<Object> index) { | 644 Handle<Object> index) { |
| 645 ASSERT_EQ(regexp->TypeTag(), JSRegExp::RE2K); | 645 ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); |
| 646 ASSERT(!regexp->DataAt(JSRegExp::kRe2kDataIndex)->IsUndefined()); | 646 ASSERT(!regexp->DataAt(JSRegExp::kIrregexpDataIndex)->IsUndefined()); |
| 647 | 647 |
| 648 // Prepare space for the return values. | 648 // Prepare space for the return values. |
| 649 int number_of_registers = Re2kNumberOfRegisters(regexp); | 649 int number_of_registers = IrregexpNumberOfRegisters(regexp); |
| 650 OffsetsVector offsets(number_of_registers); | 650 OffsetsVector offsets(number_of_registers); |
| 651 | 651 |
| 652 int num_captures = Re2kNumberOfCaptures(regexp); | 652 int num_captures = IrregexpNumberOfCaptures(regexp); |
| 653 | 653 |
| 654 int previous_index = static_cast<int>(DoubleToInteger(index->Number())); | 654 int previous_index = static_cast<int>(DoubleToInteger(index->Number())); |
| 655 | 655 |
| 656 Handle<String> subject16 = CachedStringToTwoByte(subject); | 656 Handle<String> subject16 = CachedStringToTwoByte(subject); |
| 657 | 657 |
| 658 Handle<Object> result( | 658 Handle<Object> result( |
| 659 Re2kExecOnce(regexp, | 659 IrregexpExecOnce(regexp, |
| 660 num_captures, | 660 num_captures, |
| 661 subject16, | 661 subject16, |
| 662 previous_index, | 662 previous_index, |
| 663 offsets.vector(), | 663 offsets.vector(), |
| 664 offsets.length())); | 664 offsets.length())); |
| 665 return result; | 665 return result; |
| 666 } | 666 } |
| 667 | 667 |
| 668 | 668 |
| 669 Handle<Object> RegExpImpl::JscreExec(Handle<JSRegExp> regexp, | 669 Handle<Object> RegExpImpl::JscreExec(Handle<JSRegExp> regexp, |
| 670 Handle<String> subject, | 670 Handle<String> subject, |
| 671 Handle<Object> index) { | 671 Handle<Object> index) { |
| 672 ASSERT_EQ(regexp->TypeTag(), JSRegExp::JSCRE); | 672 ASSERT_EQ(regexp->TypeTag(), JSRegExp::JSCRE); |
| 673 if (regexp->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined()) { | 673 if (regexp->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined()) { |
| 674 Handle<Object> compile_result = JscreCompile(regexp); | 674 Handle<Object> compile_result = JscreCompile(regexp); |
| 675 if (compile_result->IsException()) return compile_result; | 675 if (compile_result.is_null()) return compile_result; |
| 676 } | 676 } |
| 677 ASSERT(regexp->DataAt(JSRegExp::kJscreDataIndex)->IsFixedArray()); | 677 ASSERT(regexp->DataAt(JSRegExp::kJscreDataIndex)->IsFixedArray()); |
| 678 | 678 |
| 679 int num_captures = JscreNumberOfCaptures(regexp); | 679 int num_captures = JscreNumberOfCaptures(regexp); |
| 680 | 680 |
| 681 OffsetsVector offsets((num_captures + 1) * 3); | 681 OffsetsVector offsets((num_captures + 1) * 3); |
| 682 | 682 |
| 683 int previous_index = static_cast<int>(DoubleToInteger(index->Number())); | 683 int previous_index = static_cast<int>(DoubleToInteger(index->Number())); |
| 684 | 684 |
| 685 Handle<String> subject16 = CachedStringToTwoByte(subject); | 685 Handle<String> subject16 = CachedStringToTwoByte(subject); |
| 686 | 686 |
| 687 Handle<Object> result(JscreExecOnce(regexp, | 687 Handle<Object> result(JscreExecOnce(regexp, |
| 688 num_captures, | 688 num_captures, |
| 689 subject, | 689 subject, |
| 690 previous_index, | 690 previous_index, |
| 691 subject16->GetTwoByteData(), | 691 subject16->GetTwoByteData(), |
| 692 offsets.vector(), | 692 offsets.vector(), |
| 693 offsets.length())); | 693 offsets.length())); |
| 694 | 694 |
| 695 return result; | 695 return result; |
| 696 } | 696 } |
| 697 | 697 |
| 698 | 698 |
| 699 Handle<Object> RegExpImpl::Re2kExecGlobal(Handle<JSRegExp> regexp, | 699 Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp, |
| 700 Handle<String> subject) { | 700 Handle<String> subject) { |
| 701 ASSERT_EQ(regexp->TypeTag(), JSRegExp::RE2K); | 701 ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); |
| 702 ASSERT(!regexp->DataAt(JSRegExp::kRe2kDataIndex)->IsUndefined()); | 702 ASSERT(!regexp->DataAt(JSRegExp::kIrregexpDataIndex)->IsUndefined()); |
| 703 | 703 |
| 704 // Prepare space for the return values. | 704 // Prepare space for the return values. |
| 705 int number_of_registers = Re2kNumberOfRegisters(regexp); | 705 int number_of_registers = IrregexpNumberOfRegisters(regexp); |
| 706 OffsetsVector offsets(number_of_registers); | 706 OffsetsVector offsets(number_of_registers); |
| 707 | 707 |
| 708 int previous_index = 0; | 708 int previous_index = 0; |
| 709 | 709 |
| 710 Handle<JSArray> result = Factory::NewJSArray(0); | 710 Handle<JSArray> result = Factory::NewJSArray(0); |
| 711 int i = 0; | 711 int i = 0; |
| 712 Handle<Object> matches; | 712 Handle<Object> matches; |
| 713 | 713 |
| 714 Handle<String> subject16 = CachedStringToTwoByte(subject); | 714 Handle<String> subject16 = CachedStringToTwoByte(subject); |
| 715 | 715 |
| 716 do { | 716 do { |
| 717 if (previous_index > subject->length() || previous_index < 0) { | 717 if (previous_index > subject->length() || previous_index < 0) { |
| 718 // Per ECMA-262 15.10.6.2, if the previous index is greater than the | 718 // Per ECMA-262 15.10.6.2, if the previous index is greater than the |
| 719 // string length, there is no match. | 719 // string length, there is no match. |
| 720 matches = Factory::null_value(); | 720 matches = Factory::null_value(); |
| 721 } else { | 721 } else { |
| 722 matches = Re2kExecOnce(regexp, | 722 matches = IrregexpExecOnce(regexp, |
| 723 Re2kNumberOfCaptures(regexp), | 723 IrregexpNumberOfCaptures(regexp), |
| 724 subject16, | 724 subject16, |
| 725 previous_index, | 725 previous_index, |
| 726 offsets.vector(), | 726 offsets.vector(), |
| 727 offsets.length()); | 727 offsets.length()); |
| 728 | 728 |
| 729 if (matches->IsJSArray()) { | 729 if (matches->IsJSArray()) { |
| 730 SetElement(result, i, matches); | 730 SetElement(result, i, matches); |
| 731 i++; | 731 i++; |
| 732 previous_index = offsets.vector()[1]; | 732 previous_index = offsets.vector()[1]; |
| 733 if (offsets.vector()[0] == offsets.vector()[1]) { | 733 if (offsets.vector()[0] == offsets.vector()[1]) { |
| 734 previous_index++; | 734 previous_index++; |
| 735 } | 735 } |
| 736 } | 736 } |
| 737 } | 737 } |
| (...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 808 value(); | 808 value(); |
| 809 } | 809 } |
| 810 | 810 |
| 811 | 811 |
| 812 ByteArray* RegExpImpl::JscreInternal(Handle<JSRegExp> re) { | 812 ByteArray* RegExpImpl::JscreInternal(Handle<JSRegExp> re) { |
| 813 FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex)); | 813 FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex)); |
| 814 return ByteArray::cast(value->get(kJscreInternalIndex)); | 814 return ByteArray::cast(value->get(kJscreInternalIndex)); |
| 815 } | 815 } |
| 816 | 816 |
| 817 | 817 |
| 818 int RegExpImpl::Re2kNumberOfCaptures(Handle<JSRegExp> re) { | 818 int RegExpImpl::IrregexpNumberOfCaptures(Handle<JSRegExp> re) { |
| 819 FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kRe2kDataIndex)); | 819 FixedArray* value = |
| 820 return Smi::cast(value->get(kRe2kNumberOfCapturesIndex))->value(); | 820 FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex)); |
| 821 return Smi::cast(value->get(kIrregexpNumberOfCapturesIndex))->value(); |
| 821 } | 822 } |
| 822 | 823 |
| 823 | 824 |
| 824 int RegExpImpl::Re2kNumberOfRegisters(Handle<JSRegExp> re) { | 825 int RegExpImpl::IrregexpNumberOfRegisters(Handle<JSRegExp> re) { |
| 825 FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kRe2kDataIndex)); | 826 FixedArray* value = |
| 826 return Smi::cast(value->get(kRe2kNumberOfRegistersIndex))->value(); | 827 FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex)); |
| 828 return Smi::cast(value->get(kIrregexpNumberOfRegistersIndex))->value(); |
| 827 } | 829 } |
| 828 | 830 |
| 829 | 831 |
| 830 Handle<ByteArray> RegExpImpl::Re2kCode(Handle<JSRegExp> re) { | 832 Handle<ByteArray> RegExpImpl::IrregexpCode(Handle<JSRegExp> re) { |
| 831 FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kRe2kDataIndex)); | 833 FixedArray* value = |
| 832 return Handle<ByteArray>(ByteArray::cast(value->get(kRe2kCodeIndex))); | 834 FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex)); |
| 835 return Handle<ByteArray>(ByteArray::cast(value->get(kIrregexpCodeIndex))); |
| 833 } | 836 } |
| 834 | 837 |
| 835 | 838 |
| 836 // ------------------------------------------------------------------- | 839 // ------------------------------------------------------------------- |
| 837 // New regular expression engine | 840 // New regular expression engine |
| 838 | 841 |
| 839 | 842 |
| 840 void RegExpTree::AppendToText(RegExpText* text) { | 843 void RegExpTree::AppendToText(RegExpText* text) { |
| 841 UNREACHABLE(); | 844 UNREACHABLE(); |
| 842 } | 845 } |
| (...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 904 EndNode* accept_; | 907 EndNode* accept_; |
| 905 EndNode* backtrack_; | 908 EndNode* backtrack_; |
| 906 int next_register_; | 909 int next_register_; |
| 907 List<RegExpNode*>* work_list_; | 910 List<RegExpNode*>* work_list_; |
| 908 int recursion_depth_; | 911 int recursion_depth_; |
| 909 RegExpMacroAssembler* macro_assembler_; | 912 RegExpMacroAssembler* macro_assembler_; |
| 910 bool is_case_independent_; | 913 bool is_case_independent_; |
| 911 }; | 914 }; |
| 912 | 915 |
| 913 | 916 |
| 914 // Attempts to compile the regexp using a Regexp2000 code generator. Returns | 917 // Attempts to compile the regexp using an Irregexp code generator. Returns |
| 915 // a fixed array or a null handle depending on whether it succeeded. | 918 // a fixed array or a null handle depending on whether it succeeded. |
| 916 RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case) | 919 RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case) |
| 917 : next_register_(2 * (capture_count + 1)), | 920 : next_register_(2 * (capture_count + 1)), |
| 918 work_list_(NULL), | 921 work_list_(NULL), |
| 919 recursion_depth_(0), | 922 recursion_depth_(0), |
| 920 is_case_independent_(ignore_case) { | 923 is_case_independent_(ignore_case) { |
| 921 accept_ = new EndNode(EndNode::ACCEPT); | 924 accept_ = new EndNode(EndNode::ACCEPT); |
| 922 backtrack_ = new EndNode(EndNode::BACKTRACK); | 925 backtrack_ = new EndNode(EndNode::BACKTRACK); |
| 923 } | 926 } |
| 924 | 927 |
| (...skipping 16 matching lines...) Expand all Loading... |
| 941 } | 944 } |
| 942 while (!work_list.is_empty()) { | 945 while (!work_list.is_empty()) { |
| 943 if (!work_list.RemoveLast()->GoTo(this)) { | 946 if (!work_list.RemoveLast()->GoTo(this)) { |
| 944 fail.Unuse(); | 947 fail.Unuse(); |
| 945 return Handle<FixedArray>::null(); | 948 return Handle<FixedArray>::null(); |
| 946 } | 949 } |
| 947 } | 950 } |
| 948 macro_assembler->Bind(&fail); | 951 macro_assembler->Bind(&fail); |
| 949 macro_assembler->Fail(); | 952 macro_assembler->Fail(); |
| 950 Handle<FixedArray> array = | 953 Handle<FixedArray> array = |
| 951 Factory::NewFixedArray(RegExpImpl::kRe2kDataLength); | 954 Factory::NewFixedArray(RegExpImpl::kIrregexpDataLength); |
| 952 array->set(RegExpImpl::kRe2kImplementationIndex, | 955 array->set(RegExpImpl::kIrregexpImplementationIndex, |
| 953 Smi::FromInt(macro_assembler->Implementation())); | 956 Smi::FromInt(macro_assembler->Implementation())); |
| 954 array->set(RegExpImpl::kRe2kNumberOfRegistersIndex, | 957 array->set(RegExpImpl::kIrregexpNumberOfRegistersIndex, |
| 955 Smi::FromInt(next_register_)); | 958 Smi::FromInt(next_register_)); |
| 956 array->set(RegExpImpl::kRe2kNumberOfCapturesIndex, | 959 array->set(RegExpImpl::kIrregexpNumberOfCapturesIndex, |
| 957 Smi::FromInt(capture_count)); | 960 Smi::FromInt(capture_count)); |
| 958 Handle<Object> code = macro_assembler->GetCode(); | 961 Handle<Object> code = macro_assembler->GetCode(); |
| 959 array->set(RegExpImpl::kRe2kCodeIndex, *code); | 962 array->set(RegExpImpl::kIrregexpCodeIndex, *code); |
| 960 work_list_ = NULL; | 963 work_list_ = NULL; |
| 961 return array; | 964 return array; |
| 962 } | 965 } |
| 963 | 966 |
| 964 | 967 |
| 965 bool RegExpNode::GoTo(RegExpCompiler* compiler) { | 968 bool RegExpNode::GoTo(RegExpCompiler* compiler) { |
| 966 // TODO(erikcorry): Implement support. | 969 // TODO(erikcorry): Implement support. |
| 967 if (info_.follows_word_interest || | 970 if (info_.follows_word_interest || |
| 968 info_.follows_newline_interest || | 971 info_.follows_newline_interest || |
| 969 info_.follows_start_interest) { | 972 info_.follows_start_interest) { |
| (...skipping 327 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1297 // First, handle straight character matches. | 1300 // First, handle straight character matches. |
| 1298 for (int i = 0; i < element_count; i++) { | 1301 for (int i = 0; i < element_count; i++) { |
| 1299 TextElement elm = elms_->at(i); | 1302 TextElement elm = elms_->at(i); |
| 1300 if (elm.type == TextElement::ATOM) { | 1303 if (elm.type == TextElement::ATOM) { |
| 1301 Vector<const uc16> quarks = elm.data.u_atom->data(); | 1304 Vector<const uc16> quarks = elm.data.u_atom->data(); |
| 1302 if (!compiler->is_case_independent()) { | 1305 if (!compiler->is_case_independent()) { |
| 1303 macro_assembler->CheckCharacters(quarks, | 1306 macro_assembler->CheckCharacters(quarks, |
| 1304 cp_offset, | 1307 cp_offset, |
| 1305 on_failure_->label()); | 1308 on_failure_->label()); |
| 1306 } else { | 1309 } else { |
| 1307 EmitAtomNonLetters(macro_assembler, elm, quarks, on_failure_->label(), c
p_offset); | 1310 EmitAtomNonLetters(macro_assembler, |
| 1311 elm, |
| 1312 quarks, |
| 1313 on_failure_->label(), |
| 1314 cp_offset); |
| 1308 } | 1315 } |
| 1309 cp_offset += quarks.length(); | 1316 cp_offset += quarks.length(); |
| 1310 } else { | 1317 } else { |
| 1311 ASSERT_EQ(elm.type, TextElement::CHAR_CLASS); | 1318 ASSERT_EQ(elm.type, TextElement::CHAR_CLASS); |
| 1312 cp_offset++; | 1319 cp_offset++; |
| 1313 } | 1320 } |
| 1314 } | 1321 } |
| 1315 // Second, handle case independent letter matches if any. | 1322 // Second, handle case independent letter matches if any. |
| 1316 if (compiler->is_case_independent()) { | 1323 if (compiler->is_case_independent()) { |
| 1317 cp_offset = 0; | 1324 cp_offset = 0; |
| 1318 for (int i = 0; i < element_count; i++) { | 1325 for (int i = 0; i < element_count; i++) { |
| 1319 TextElement elm = elms_->at(i); | 1326 TextElement elm = elms_->at(i); |
| 1320 if (elm.type == TextElement::ATOM) { | 1327 if (elm.type == TextElement::ATOM) { |
| 1321 Vector<const uc16> quarks = elm.data.u_atom->data(); | 1328 Vector<const uc16> quarks = elm.data.u_atom->data(); |
| 1322 EmitAtomLetters(macro_assembler, elm, quarks, on_failure_->label(), cp_o
ffset); | 1329 EmitAtomLetters(macro_assembler, |
| 1330 elm, |
| 1331 quarks, |
| 1332 on_failure_->label(), |
| 1333 cp_offset); |
| 1323 cp_offset += quarks.length(); | 1334 cp_offset += quarks.length(); |
| 1324 } else { | 1335 } else { |
| 1325 cp_offset++; | 1336 cp_offset++; |
| 1326 } | 1337 } |
| 1327 } | 1338 } |
| 1328 } | 1339 } |
| 1329 // If the fast character matches passed then do the character classes. | 1340 // If the fast character matches passed then do the character classes. |
| 1330 cp_offset = 0; | 1341 cp_offset = 0; |
| 1331 for (int i = 0; i < element_count; i++) { | 1342 for (int i = 0; i < element_count; i++) { |
| 1332 TextElement elm = elms_->at(i); | 1343 TextElement elm = elms_->at(i); |
| (...skipping 1219 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2552 RegExpQuantifier::kInfinity, | 2563 RegExpQuantifier::kInfinity, |
| 2553 false, | 2564 false, |
| 2554 new RegExpCharacterClass('*'), | 2565 new RegExpCharacterClass('*'), |
| 2555 &compiler, | 2566 &compiler, |
| 2556 captured_body, | 2567 captured_body, |
| 2557 compiler.backtrack()); | 2568 compiler.backtrack()); |
| 2558 if (node_return != NULL) *node_return = node; | 2569 if (node_return != NULL) *node_return = node; |
| 2559 Analysis analysis; | 2570 Analysis analysis; |
| 2560 analysis.EnsureAnalyzed(node); | 2571 analysis.EnsureAnalyzed(node); |
| 2561 | 2572 |
| 2573 if (!FLAG_irregexp) { |
| 2574 return Handle<FixedArray>::null(); |
| 2575 } |
| 2576 |
| 2562 #if !(defined ARM || defined __arm__ || defined __thumb__) | 2577 #if !(defined ARM || defined __arm__ || defined __thumb__) |
| 2563 if (FLAG_re2k_native) { // Flag only checked in IA32 mode. | 2578 if (FLAG_irregexp_native) { // Flag only checked in IA32 mode. |
| 2564 // TODO(lrn) Move compilation to a later point in the life-cycle | 2579 // TODO(lrn) Move compilation to a later point in the life-cycle |
| 2565 // of the RegExp. We don't know the type of input string yet. | 2580 // of the RegExp. We don't know the type of input string yet. |
| 2566 // For now, always assume two-byte strings. | 2581 // For now, always assume two-byte strings. |
| 2567 RegExpMacroAssemblerIA32 macro_assembler(RegExpMacroAssemblerIA32::UC16, | 2582 RegExpMacroAssemblerIA32 macro_assembler(RegExpMacroAssemblerIA32::UC16, |
| 2568 (input->capture_count + 1) * 2, | 2583 (input->capture_count + 1) * 2, |
| 2569 ignore_case); | 2584 ignore_case); |
| 2570 return compiler.Assemble(¯o_assembler, | 2585 return compiler.Assemble(¯o_assembler, |
| 2571 node, | 2586 node, |
| 2572 input->capture_count); | 2587 input->capture_count); |
| 2573 } | 2588 } |
| 2574 #endif | 2589 #endif |
| 2575 byte codes[1024]; | 2590 byte codes[1024]; |
| 2576 Re2kAssembler assembler(Vector<byte>(codes, 1024)); | 2591 IrregexpAssembler assembler(Vector<byte>(codes, 1024)); |
| 2577 RegExpMacroAssemblerRe2k macro_assembler(&assembler); | 2592 RegExpMacroAssemblerIrregexp macro_assembler(&assembler); |
| 2578 return compiler.Assemble(¯o_assembler, | 2593 return compiler.Assemble(¯o_assembler, |
| 2579 node, | 2594 node, |
| 2580 input->capture_count); | 2595 input->capture_count); |
| 2581 } | 2596 } |
| 2582 | 2597 |
| 2583 | 2598 |
| 2584 }} // namespace v8::internal | 2599 }} // namespace v8::internal |
| OLD | NEW |