| OLD | NEW |
| 1 // Copyright 2006-2009 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2009 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 187 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 198 ASSERT(!result.is_null() || Top::has_pending_exception()); | 198 ASSERT(!result.is_null() || Top::has_pending_exception()); |
| 199 return result; | 199 return result; |
| 200 } | 200 } |
| 201 default: | 201 default: |
| 202 UNREACHABLE(); | 202 UNREACHABLE(); |
| 203 return Handle<Object>::null(); | 203 return Handle<Object>::null(); |
| 204 } | 204 } |
| 205 } | 205 } |
| 206 | 206 |
| 207 | 207 |
| 208 Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp, | |
| 209 Handle<String> subject, | |
| 210 Handle<JSArray> last_match_info) { | |
| 211 switch (regexp->TypeTag()) { | |
| 212 case JSRegExp::ATOM: | |
| 213 return AtomExecGlobal(regexp, subject, last_match_info); | |
| 214 case JSRegExp::IRREGEXP: { | |
| 215 Handle<Object> result = | |
| 216 IrregexpExecGlobal(regexp, subject, last_match_info); | |
| 217 ASSERT(!result.is_null() || Top::has_pending_exception()); | |
| 218 return result; | |
| 219 } | |
| 220 default: | |
| 221 UNREACHABLE(); | |
| 222 return Handle<Object>::null(); | |
| 223 } | |
| 224 } | |
| 225 | |
| 226 | |
| 227 // RegExp Atom implementation: Simple string search using indexOf. | 208 // RegExp Atom implementation: Simple string search using indexOf. |
| 228 | 209 |
| 229 | 210 |
| 230 void RegExpImpl::AtomCompile(Handle<JSRegExp> re, | 211 void RegExpImpl::AtomCompile(Handle<JSRegExp> re, |
| 231 Handle<String> pattern, | 212 Handle<String> pattern, |
| 232 JSRegExp::Flags flags, | 213 JSRegExp::Flags flags, |
| 233 Handle<String> match_pattern) { | 214 Handle<String> match_pattern) { |
| 234 Factory::SetRegExpAtomData(re, | 215 Factory::SetRegExpAtomData(re, |
| 235 JSRegExp::ATOM, | 216 JSRegExp::ATOM, |
| 236 pattern, | 217 pattern, |
| (...skipping 29 matching lines...) Expand all Loading... |
| 266 | 247 |
| 267 { | 248 { |
| 268 NoHandleAllocation no_handles; | 249 NoHandleAllocation no_handles; |
| 269 FixedArray* array = last_match_info->elements(); | 250 FixedArray* array = last_match_info->elements(); |
| 270 SetAtomLastCapture(array, *subject, value, value + needle->length()); | 251 SetAtomLastCapture(array, *subject, value, value + needle->length()); |
| 271 } | 252 } |
| 272 return last_match_info; | 253 return last_match_info; |
| 273 } | 254 } |
| 274 | 255 |
| 275 | 256 |
| 276 Handle<Object> RegExpImpl::AtomExecGlobal(Handle<JSRegExp> re, | |
| 277 Handle<String> subject, | |
| 278 Handle<JSArray> last_match_info) { | |
| 279 Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex))); | |
| 280 ASSERT(last_match_info->HasFastElements()); | |
| 281 Handle<JSArray> result = Factory::NewJSArray(1); | |
| 282 int index = 0; | |
| 283 int match_count = 0; | |
| 284 int subject_length = subject->length(); | |
| 285 int needle_length = needle->length(); | |
| 286 int last_value = -1; | |
| 287 while (true) { | |
| 288 HandleScope scope; | |
| 289 int value = -1; | |
| 290 if (index + needle_length <= subject_length) { | |
| 291 value = Runtime::StringMatch(subject, needle, index); | |
| 292 } | |
| 293 if (value == -1) { | |
| 294 if (last_value != -1) { | |
| 295 Handle<FixedArray> array(last_match_info->elements()); | |
| 296 SetAtomLastCapture(*array, | |
| 297 *subject, | |
| 298 last_value, | |
| 299 last_value + needle->length()); | |
| 300 } | |
| 301 break; | |
| 302 } | |
| 303 | |
| 304 int end = value + needle_length; | |
| 305 | |
| 306 // Create an array that looks like the static last_match_info array | |
| 307 // that is attached to the global RegExp object. We will be returning | |
| 308 // an array of these. | |
| 309 Handle<FixedArray> array = Factory::NewFixedArray(kFirstCapture + 2); | |
| 310 SetLastCaptureCount(*array, 2); | |
| 311 // Ignore subject and input fields. | |
| 312 SetCapture(*array, 0, value); | |
| 313 SetCapture(*array, 1, end); | |
| 314 Handle<JSArray> pair = Factory::NewJSArrayWithElements(array); | |
| 315 SetElement(result, match_count, pair); | |
| 316 match_count++; | |
| 317 index = end; | |
| 318 if (needle_length == 0) index++; | |
| 319 last_value = value; | |
| 320 } | |
| 321 return result; | |
| 322 } | |
| 323 | |
| 324 | |
| 325 // Irregexp implementation. | 257 // Irregexp implementation. |
| 326 | 258 |
| 327 | 259 |
| 328 // Ensures that the regexp object contains a compiled version of the | 260 // Ensures that the regexp object contains a compiled version of the |
| 329 // source for either ASCII or non-ASCII strings. | 261 // source for either ASCII or non-ASCII strings. |
| 330 // If the compiled version doesn't already exist, it is compiled | 262 // If the compiled version doesn't already exist, it is compiled |
| 331 // from the source pattern. | 263 // from the source pattern. |
| 332 // If compilation fails, an exception is thrown and this function | 264 // If compilation fails, an exception is thrown and this function |
| 333 // returns false. | 265 // returns false. |
| 334 bool RegExpImpl::EnsureCompiledIrregexp(Handle<JSRegExp> re, | 266 bool RegExpImpl::EnsureCompiledIrregexp(Handle<JSRegExp> re, bool is_ascii) { |
| 335 bool is_ascii) { | |
| 336 int index; | 267 int index; |
| 337 if (is_ascii) { | 268 if (is_ascii) { |
| 338 index = JSRegExp::kIrregexpASCIICodeIndex; | 269 index = JSRegExp::kIrregexpASCIICodeIndex; |
| 339 } else { | 270 } else { |
| 340 index = JSRegExp::kIrregexpUC16CodeIndex; | 271 index = JSRegExp::kIrregexpUC16CodeIndex; |
| 341 } | 272 } |
| 342 Object* entry = re->DataAt(index); | 273 Object* entry = re->DataAt(index); |
| 343 if (!entry->IsTheHole()) { | 274 if (!entry->IsTheHole()) { |
| 344 // A value has already been compiled. | 275 // A value has already been compiled. |
| 345 if (entry->IsJSObject()) { | 276 if (entry->IsJSObject()) { |
| (...skipping 107 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 453 int capture_count) { | 384 int capture_count) { |
| 454 // Initialize compiled code entries to null. | 385 // Initialize compiled code entries to null. |
| 455 Factory::SetRegExpIrregexpData(re, | 386 Factory::SetRegExpIrregexpData(re, |
| 456 JSRegExp::IRREGEXP, | 387 JSRegExp::IRREGEXP, |
| 457 pattern, | 388 pattern, |
| 458 flags, | 389 flags, |
| 459 capture_count); | 390 capture_count); |
| 460 } | 391 } |
| 461 | 392 |
| 462 | 393 |
| 463 Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp, | 394 Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> jsregexp, |
| 464 Handle<String> subject, | 395 Handle<String> subject, |
| 465 int index, | 396 int previous_index, |
| 466 Handle<JSArray> last_match_info) { | 397 Handle<JSArray> last_match_info) { |
| 467 ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); | 398 ASSERT_EQ(jsregexp->TypeTag(), JSRegExp::IRREGEXP); |
| 468 | 399 |
| 469 // Prepare space for the return values. | 400 // Prepare space for the return values. |
| 470 int number_of_capture_registers = | 401 int number_of_capture_registers = |
| 471 (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2; | 402 (IrregexpNumberOfCaptures(FixedArray::cast(jsregexp->data())) + 1) * 2; |
| 472 OffsetsVector offsets(number_of_capture_registers); | 403 OffsetsVector offsets(number_of_capture_registers); |
| 473 | 404 |
| 474 int previous_index = index; | |
| 475 | |
| 476 #ifdef DEBUG | 405 #ifdef DEBUG |
| 477 if (FLAG_trace_regexp_bytecodes) { | 406 if (FLAG_trace_regexp_bytecodes) { |
| 478 String* pattern = regexp->Pattern(); | 407 String* pattern = jsregexp->Pattern(); |
| 479 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); | 408 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); |
| 480 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); | 409 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); |
| 481 } | 410 } |
| 482 #endif | 411 #endif |
| 483 | 412 |
| 484 if (!subject->IsFlat()) { | 413 if (!subject->IsFlat()) { |
| 485 FlattenString(subject); | 414 FlattenString(subject); |
| 486 } | 415 } |
| 487 | 416 |
| 488 last_match_info->EnsureSize(number_of_capture_registers + kLastMatchOverhead); | 417 last_match_info->EnsureSize(number_of_capture_registers + kLastMatchOverhead); |
| 489 | 418 |
| 490 return IrregexpExecOnce(regexp, | 419 int* offsets_vector = offsets.vector(); |
| 491 number_of_capture_registers, | 420 int offsets_vector_length = offsets.length(); |
| 492 last_match_info, | 421 bool rc; |
| 493 subject, | |
| 494 previous_index, | |
| 495 offsets.vector(), | |
| 496 offsets.length()); | |
| 497 } | |
| 498 | 422 |
| 499 | 423 // Dispatch to the correct RegExp implementation. |
| 500 Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp, | |
| 501 Handle<String> subject, | |
| 502 Handle<JSArray> last_match_info) { | |
| 503 ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); | |
| 504 | |
| 505 // Prepare space for the return values. | |
| 506 int number_of_capture_registers = | |
| 507 (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2; | |
| 508 OffsetsVector offsets(number_of_capture_registers); | |
| 509 | |
| 510 int previous_index = 0; | |
| 511 | |
| 512 Handle<JSArray> result = Factory::NewJSArray(0); | |
| 513 int result_length = 0; | |
| 514 Handle<Object> matches; | |
| 515 | |
| 516 if (!subject->IsFlat()) { | |
| 517 FlattenString(subject); | |
| 518 } | |
| 519 | |
| 520 last_match_info->EnsureSize(number_of_capture_registers + kLastMatchOverhead); | |
| 521 | |
| 522 while (true) { | |
| 523 if (previous_index > subject->length() || previous_index < 0) { | |
| 524 // Per ECMA-262 15.10.6.2, if the previous index is greater than the | |
| 525 // string length, there is no match. | |
| 526 return result; | |
| 527 } else { | |
| 528 #ifdef DEBUG | |
| 529 if (FLAG_trace_regexp_bytecodes) { | |
| 530 String* pattern = regexp->Pattern(); | |
| 531 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); | |
| 532 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); | |
| 533 } | |
| 534 #endif | |
| 535 HandleScope scope; | |
| 536 matches = IrregexpExecOnce(regexp, | |
| 537 number_of_capture_registers, | |
| 538 last_match_info, | |
| 539 subject, | |
| 540 previous_index, | |
| 541 offsets.vector(), | |
| 542 offsets.length()); | |
| 543 | |
| 544 if (matches.is_null()) { | |
| 545 ASSERT(Top::has_pending_exception()); | |
| 546 return matches; | |
| 547 } | |
| 548 | |
| 549 if (matches->IsJSArray()) { | |
| 550 // Create an array that looks like the static last_match_info array | |
| 551 // that is attached to the global RegExp object. We will be returning | |
| 552 // an array of these. | |
| 553 int match_length = kFirstCapture + number_of_capture_registers; | |
| 554 Handle<JSArray> latest_match = | |
| 555 Factory::NewJSArray(match_length); | |
| 556 | |
| 557 AssertNoAllocation no_allocation; | |
| 558 FixedArray* match_array = JSArray::cast(*matches)->elements(); | |
| 559 match_array->CopyTo(0, | |
| 560 latest_match->elements(), | |
| 561 0, | |
| 562 match_length); | |
| 563 SetElement(result, result_length, latest_match); | |
| 564 result_length++; | |
| 565 previous_index = GetCapture(match_array, 1); | |
| 566 if (GetCapture(match_array, 0) == previous_index) { | |
| 567 previous_index++; | |
| 568 } | |
| 569 } else { | |
| 570 ASSERT(matches->IsNull()); | |
| 571 return result; | |
| 572 } | |
| 573 } | |
| 574 } | |
| 575 } | |
| 576 | |
| 577 | |
| 578 Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<JSRegExp> jsregexp, | |
| 579 int number_of_capture_registers, | |
| 580 Handle<JSArray> last_match_info, | |
| 581 Handle<String> subject, | |
| 582 int previous_index, | |
| 583 int* offsets_vector, | |
| 584 int offsets_vector_length) { | |
| 585 ASSERT(subject->IsFlat()); | |
| 586 bool rc; | |
| 587 | 424 |
| 588 Handle<String> original_subject = subject; | 425 Handle<String> original_subject = subject; |
| 589 Handle<FixedArray> regexp(FixedArray::cast(jsregexp->data())); | 426 Handle<FixedArray> regexp(FixedArray::cast(jsregexp->data())); |
| 590 if (UseNativeRegexp()) { | 427 if (UseNativeRegexp()) { |
| 591 #ifdef ARM | 428 #ifdef ARM |
| 592 UNREACHABLE(); | 429 UNREACHABLE(); |
| 593 #else | 430 #else |
| 594 RegExpMacroAssemblerIA32::Result res; | 431 RegExpMacroAssemblerIA32::Result res; |
| 595 do { | 432 do { |
| 596 bool is_ascii = StringShape(*subject).IsAsciiRepresentation(); | 433 bool is_ascii = StringShape(*subject).IsAsciiRepresentation(); |
| (...skipping 27 matching lines...) Expand all Loading... |
| 624 offsets_vector[i] = -1; | 461 offsets_vector[i] = -1; |
| 625 } | 462 } |
| 626 Handle<ByteArray> byte_codes(IrregexpByteCode(*regexp, is_ascii)); | 463 Handle<ByteArray> byte_codes(IrregexpByteCode(*regexp, is_ascii)); |
| 627 | 464 |
| 628 rc = IrregexpInterpreter::Match(byte_codes, | 465 rc = IrregexpInterpreter::Match(byte_codes, |
| 629 subject, | 466 subject, |
| 630 offsets_vector, | 467 offsets_vector, |
| 631 previous_index); | 468 previous_index); |
| 632 } | 469 } |
| 633 | 470 |
| 471 // Handle results from RegExp implementation. |
| 472 |
| 634 if (!rc) { | 473 if (!rc) { |
| 635 return Factory::null_value(); | 474 return Factory::null_value(); |
| 636 } | 475 } |
| 637 | 476 |
| 638 FixedArray* array = last_match_info->elements(); | 477 FixedArray* array = last_match_info->elements(); |
| 639 ASSERT(array->length() >= number_of_capture_registers + kLastMatchOverhead); | 478 ASSERT(array->length() >= number_of_capture_registers + kLastMatchOverhead); |
| 640 // The captures come in (start, end+1) pairs. | 479 // The captures come in (start, end+1) pairs. |
| 641 SetLastCaptureCount(array, number_of_capture_registers); | 480 SetLastCaptureCount(array, number_of_capture_registers); |
| 642 SetLastSubject(array, *original_subject); | 481 SetLastSubject(array, *original_subject); |
| 643 SetLastInput(array, *original_subject); | 482 SetLastInput(array, *original_subject); |
| (...skipping 3972 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4616 EmbeddedVector<byte, 1024> codes; | 4455 EmbeddedVector<byte, 1024> codes; |
| 4617 RegExpMacroAssemblerIrregexp macro_assembler(codes); | 4456 RegExpMacroAssemblerIrregexp macro_assembler(codes); |
| 4618 return compiler.Assemble(¯o_assembler, | 4457 return compiler.Assemble(¯o_assembler, |
| 4619 node, | 4458 node, |
| 4620 data->capture_count, | 4459 data->capture_count, |
| 4621 pattern); | 4460 pattern); |
| 4622 } | 4461 } |
| 4623 | 4462 |
| 4624 | 4463 |
| 4625 }} // namespace v8::internal | 4464 }} // namespace v8::internal |
| OLD | NEW |