Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1585)

Side by Side Diff: src/jsregexp.cc

Issue 146019: Merge two regular expression engine fixes to 1.1 branch. (Closed) Base URL: http://v8.googlecode.com/svn/branches/1.1/
Patch Set: Created 11 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/api.cc ('k') | test/mjsunit/regexp-captures.js » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2009 the V8 project authors. All rights reserved. 1 // Copyright 2006-2009 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 382 matching lines...) Expand 10 before | Expand all | Expand 10 after
393 393
394 Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> jsregexp, 394 Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> jsregexp,
395 Handle<String> subject, 395 Handle<String> subject,
396 int previous_index, 396 int previous_index,
397 Handle<JSArray> last_match_info) { 397 Handle<JSArray> last_match_info) {
398 ASSERT_EQ(jsregexp->TypeTag(), JSRegExp::IRREGEXP); 398 ASSERT_EQ(jsregexp->TypeTag(), JSRegExp::IRREGEXP);
399 399
400 // Prepare space for the return values. 400 // Prepare space for the return values.
401 int number_of_capture_registers = 401 int number_of_capture_registers =
402 (IrregexpNumberOfCaptures(FixedArray::cast(jsregexp->data())) + 1) * 2; 402 (IrregexpNumberOfCaptures(FixedArray::cast(jsregexp->data())) + 1) * 2;
403 OffsetsVector offsets(number_of_capture_registers);
404 403
405 #ifdef DEBUG 404 #ifdef DEBUG
406 if (FLAG_trace_regexp_bytecodes) { 405 if (FLAG_trace_regexp_bytecodes) {
407 String* pattern = jsregexp->Pattern(); 406 String* pattern = jsregexp->Pattern();
408 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); 407 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString()));
409 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); 408 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString()));
410 } 409 }
411 #endif 410 #endif
412 411
413 if (!subject->IsFlat()) { 412 if (!subject->IsFlat()) {
414 FlattenString(subject); 413 FlattenString(subject);
415 } 414 }
416 415
417 last_match_info->EnsureSize(number_of_capture_registers + kLastMatchOverhead); 416 last_match_info->EnsureSize(number_of_capture_registers + kLastMatchOverhead);
418 417
419 int* offsets_vector = offsets.vector();
420 bool rc; 418 bool rc;
419 FixedArray* array;
421 420
422 // Dispatch to the correct RegExp implementation. 421 // Dispatch to the correct RegExp implementation.
423
424 Handle<String> original_subject = subject; 422 Handle<String> original_subject = subject;
425 Handle<FixedArray> regexp(FixedArray::cast(jsregexp->data())); 423 Handle<FixedArray> regexp(FixedArray::cast(jsregexp->data()));
426 if (UseNativeRegexp()) { 424 if (UseNativeRegexp()) {
427 #ifdef ARM 425 #ifdef ARM
428 UNREACHABLE(); 426 UNREACHABLE();
429 #else 427 #else
428 OffsetsVector captures(number_of_capture_registers);
429 int* captures_vector = captures.vector();
430 RegExpMacroAssemblerIA32::Result res; 430 RegExpMacroAssemblerIA32::Result res;
431 do { 431 do {
432 bool is_ascii = StringShape(*subject).IsAsciiRepresentation(); 432 bool is_ascii = StringShape(*subject).IsAsciiRepresentation();
433 if (!EnsureCompiledIrregexp(jsregexp, is_ascii)) { 433 if (!EnsureCompiledIrregexp(jsregexp, is_ascii)) {
434 return Handle<Object>::null(); 434 return Handle<Object>::null();
435 } 435 }
436 Handle<Code> code(RegExpImpl::IrregexpNativeCode(*regexp, is_ascii)); 436 Handle<Code> code(RegExpImpl::IrregexpNativeCode(*regexp, is_ascii));
437 res = RegExpMacroAssemblerIA32::Match(code, 437 res = RegExpMacroAssemblerIA32::Match(code,
438 subject, 438 subject,
439 offsets_vector, 439 captures_vector,
440 offsets.length(), 440 captures.length(),
441 previous_index); 441 previous_index);
442 // If result is RETRY, the string have changed representation, and we 442 // If result is RETRY, the string have changed representation, and we
443 // must restart from scratch. 443 // must restart from scratch.
444 } while (res == RegExpMacroAssemblerIA32::RETRY); 444 } while (res == RegExpMacroAssemblerIA32::RETRY);
445 if (res == RegExpMacroAssemblerIA32::EXCEPTION) { 445 if (res == RegExpMacroAssemblerIA32::EXCEPTION) {
446 ASSERT(Top::has_pending_exception()); 446 ASSERT(Top::has_pending_exception());
447 return Handle<Object>::null(); 447 return Handle<Object>::null();
448 } 448 }
449 ASSERT(res == RegExpMacroAssemblerIA32::SUCCESS 449 ASSERT(res == RegExpMacroAssemblerIA32::SUCCESS
450 || res == RegExpMacroAssemblerIA32::FAILURE); 450 || res == RegExpMacroAssemblerIA32::FAILURE);
451 451
452 rc = (res == RegExpMacroAssemblerIA32::SUCCESS); 452 rc = (res == RegExpMacroAssemblerIA32::SUCCESS);
453 if (!rc) return Factory::null_value();
454
455 array = last_match_info->elements();
456 ASSERT(array->length() >= number_of_capture_registers + kLastMatchOverhead);
457 // The captures come in (start, end+1) pairs.
458 for (int i = 0; i < number_of_capture_registers; i += 2) {
459 SetCapture(array, i, captures_vector[i]);
460 SetCapture(array, i + 1, captures_vector[i + 1]);
461 }
453 #endif 462 #endif
454 } else { 463 } else {
455 bool is_ascii = StringShape(*subject).IsAsciiRepresentation(); 464 bool is_ascii = StringShape(*subject).IsAsciiRepresentation();
456 if (!EnsureCompiledIrregexp(jsregexp, is_ascii)) { 465 if (!EnsureCompiledIrregexp(jsregexp, is_ascii)) {
457 return Handle<Object>::null(); 466 return Handle<Object>::null();
458 } 467 }
468 // Now that we have done EnsureCompiledIrregexp we can get the number of
469 // registers.
470 int number_of_registers =
471 IrregexpNumberOfRegisters(FixedArray::cast(jsregexp->data()));
472 OffsetsVector registers(number_of_registers);
473 int* register_vector = registers.vector();
459 for (int i = number_of_capture_registers - 1; i >= 0; i--) { 474 for (int i = number_of_capture_registers - 1; i >= 0; i--) {
460 offsets_vector[i] = -1; 475 register_vector[i] = -1;
461 } 476 }
462 Handle<ByteArray> byte_codes(IrregexpByteCode(*regexp, is_ascii)); 477 Handle<ByteArray> byte_codes(IrregexpByteCode(*regexp, is_ascii));
463 478
464 rc = IrregexpInterpreter::Match(byte_codes, 479 rc = IrregexpInterpreter::Match(byte_codes,
465 subject, 480 subject,
466 offsets_vector, 481 register_vector,
467 previous_index); 482 previous_index);
483 if (!rc) return Factory::null_value();
484
485 array = last_match_info->elements();
486 ASSERT(array->length() >= number_of_capture_registers + kLastMatchOverhead);
487 // The captures come in (start, end+1) pairs.
488 for (int i = 0; i < number_of_capture_registers; i += 2) {
489 SetCapture(array, i, register_vector[i]);
490 SetCapture(array, i + 1, register_vector[i + 1]);
491 }
468 } 492 }
469 493
470 // Handle results from RegExp implementation.
471
472 if (!rc) {
473 return Factory::null_value();
474 }
475
476 FixedArray* array = last_match_info->elements();
477 ASSERT(array->length() >= number_of_capture_registers + kLastMatchOverhead);
478 // The captures come in (start, end+1) pairs.
479 SetLastCaptureCount(array, number_of_capture_registers); 494 SetLastCaptureCount(array, number_of_capture_registers);
480 SetLastSubject(array, *original_subject); 495 SetLastSubject(array, *original_subject);
481 SetLastInput(array, *original_subject); 496 SetLastInput(array, *original_subject);
482 for (int i = 0; i < number_of_capture_registers; i+=2) { 497
483 SetCapture(array, i, offsets_vector[i]);
484 SetCapture(array, i + 1, offsets_vector[i + 1]);
485 }
486 return last_match_info; 498 return last_match_info;
487 } 499 }
488 500
489 501
490 // ------------------------------------------------------------------- 502 // -------------------------------------------------------------------
491 // Implementation of the Irregexp regular expression engine. 503 // Implementation of the Irregexp regular expression engine.
492 // 504 //
493 // The Irregexp regular expression engine is intended to be a complete 505 // The Irregexp regular expression engine is intended to be a complete
494 // implementation of ECMAScript regular expressions. It generates either 506 // implementation of ECMAScript regular expressions. It generates either
495 // bytecodes or native code. 507 // bytecodes or native code.
(...skipping 388 matching lines...) Expand 10 before | Expand all | Expand 10 after
884 896
885 897
886 void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler, 898 void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,
887 int max_register, 899 int max_register,
888 OutSet& affected_registers, 900 OutSet& affected_registers,
889 OutSet* registers_to_pop, 901 OutSet* registers_to_pop,
890 OutSet* registers_to_clear) { 902 OutSet* registers_to_clear) {
891 // The "+1" is to avoid a push_limit of zero if stack_limit_slack() is 1. 903 // The "+1" is to avoid a push_limit of zero if stack_limit_slack() is 1.
892 const int push_limit = (assembler->stack_limit_slack() + 1) / 2; 904 const int push_limit = (assembler->stack_limit_slack() + 1) / 2;
893 905
906 // Count pushes performed to force a stack limit check occasionally.
907 int pushes = 0;
908
894 for (int reg = 0; reg <= max_register; reg++) { 909 for (int reg = 0; reg <= max_register; reg++) {
895 if (!affected_registers.Get(reg)) { 910 if (!affected_registers.Get(reg)) {
896 continue; 911 continue;
897 } 912 }
898 // Count pushes performed to force a stack limit check occasionally.
899 int pushes = 0;
900 913
901 // The chronologically first deferred action in the trace 914 // The chronologically first deferred action in the trace
902 // is used to infer the action needed to restore a register 915 // is used to infer the action needed to restore a register
903 // to its previous state (or not, if it's safe to ignore it). 916 // to its previous state (or not, if it's safe to ignore it).
904 enum DeferredActionUndoType { IGNORE, RESTORE, CLEAR }; 917 enum DeferredActionUndoType { IGNORE, RESTORE, CLEAR };
905 DeferredActionUndoType undo_action = IGNORE; 918 DeferredActionUndoType undo_action = IGNORE;
906 919
907 int value = 0; 920 int value = 0;
908 bool absolute = false; 921 bool absolute = false;
909 bool clear = false; 922 bool clear = false;
(...skipping 963 matching lines...) Expand 10 before | Expand all | Expand 10 after
1873 } 1886 }
1874 CharacterRange range = ranges->at(first_range); 1887 CharacterRange range = ranges->at(first_range);
1875 uc16 from = range.from(); 1888 uc16 from = range.from();
1876 uc16 to = range.to(); 1889 uc16 to = range.to();
1877 if (to > char_mask) { 1890 if (to > char_mask) {
1878 to = char_mask; 1891 to = char_mask;
1879 } 1892 }
1880 uint32_t differing_bits = (from ^ to); 1893 uint32_t differing_bits = (from ^ to);
1881 // A mask and compare is only perfect if the differing bits form a 1894 // A mask and compare is only perfect if the differing bits form a
1882 // number like 00011111 with one single block of trailing 1s. 1895 // number like 00011111 with one single block of trailing 1s.
1883 if ((differing_bits & (differing_bits + 1)) == 0) { 1896 if ((differing_bits & (differing_bits + 1)) == 0 &&
1897 from + differing_bits == to) {
1884 pos->determines_perfectly = true; 1898 pos->determines_perfectly = true;
1885 } 1899 }
1886 uint32_t common_bits = ~SmearBitsRight(differing_bits); 1900 uint32_t common_bits = ~SmearBitsRight(differing_bits);
1887 uint32_t bits = (from & common_bits); 1901 uint32_t bits = (from & common_bits);
1888 for (int i = first_range + 1; i < ranges->length(); i++) { 1902 for (int i = first_range + 1; i < ranges->length(); i++) {
1889 CharacterRange range = ranges->at(i); 1903 CharacterRange range = ranges->at(i);
1890 uc16 from = range.from(); 1904 uc16 from = range.from();
1891 uc16 to = range.to(); 1905 uc16 to = range.to();
1892 if (from > char_mask) continue; 1906 if (from > char_mask) continue;
1893 if (to > char_mask) to = char_mask; 1907 if (to > char_mask) to = char_mask;
(...skipping 2560 matching lines...) Expand 10 before | Expand all | Expand 10 after
4454 EmbeddedVector<byte, 1024> codes; 4468 EmbeddedVector<byte, 1024> codes;
4455 RegExpMacroAssemblerIrregexp macro_assembler(codes); 4469 RegExpMacroAssemblerIrregexp macro_assembler(codes);
4456 return compiler.Assemble(&macro_assembler, 4470 return compiler.Assemble(&macro_assembler,
4457 node, 4471 node,
4458 data->capture_count, 4472 data->capture_count,
4459 pattern); 4473 pattern);
4460 } 4474 }
4461 4475
4462 4476
4463 }} // namespace v8::internal 4477 }} // namespace v8::internal
OLDNEW
« no previous file with comments | « src/api.cc ('k') | test/mjsunit/regexp-captures.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698