Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(163)

Side by Side Diff: src/jsregexp.cc

Issue 17203: Periodic merge from bleeding_edge to experimental code generator... (Closed) Base URL: http://v8.googlecode.com/svn/branches/experimental/toiger/
Patch Set: Created 11 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/jsregexp.h ('k') | src/log.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 249 matching lines...) Expand 10 before | Expand all | Expand 10 after
260 "malformed_regexp"); 260 "malformed_regexp");
261 return Handle<Object>::null(); 261 return Handle<Object>::null();
262 } 262 }
263 263
264 if (parse_result.simple && !flags.is_ignore_case()) { 264 if (parse_result.simple && !flags.is_ignore_case()) {
265 // Parse-tree is a single atom that is equal to the pattern. 265 // Parse-tree is a single atom that is equal to the pattern.
266 result = AtomCompile(re, pattern, flags, pattern); 266 result = AtomCompile(re, pattern, flags, pattern);
267 } else if (parse_result.tree->IsAtom() && 267 } else if (parse_result.tree->IsAtom() &&
268 !flags.is_ignore_case() && 268 !flags.is_ignore_case() &&
269 parse_result.capture_count == 0) { 269 parse_result.capture_count == 0) {
270 // TODO(lrn) Accept capture_count > 0 on atoms.
271 RegExpAtom* atom = parse_result.tree->AsAtom(); 270 RegExpAtom* atom = parse_result.tree->AsAtom();
272 Vector<const uc16> atom_pattern = atom->data(); 271 Vector<const uc16> atom_pattern = atom->data();
273 Handle<String> atom_string = 272 Handle<String> atom_string = Factory::NewStringFromTwoByte(atom_pattern);
274 Factory::NewStringFromTwoByte(atom_pattern);
275 result = AtomCompile(re, pattern, flags, atom_string); 273 result = AtomCompile(re, pattern, flags, atom_string);
276 } else if (FLAG_irregexp) { 274 } else if (FLAG_irregexp) {
277 result = IrregexpPrepare(re, pattern, flags); 275 result = IrregexpPrepare(re, pattern, flags);
278 } else { 276 } else {
279 result = JscrePrepare(re, pattern, flags); 277 result = JscrePrepare(re, pattern, flags);
280 } 278 }
281 Object* data = re->data(); 279 Object* data = re->data();
282 if (data->IsFixedArray()) { 280 if (data->IsFixedArray()) {
283 // If compilation succeeded then the data is set on the regexp 281 // If compilation succeeded then the data is set on the regexp
284 // and we can store it in the cache. 282 // and we can store it in the cache.
(...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after
368 Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re, 366 Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re,
369 Handle<String> subject, 367 Handle<String> subject,
370 Handle<Object> index) { 368 Handle<Object> index) {
371 Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex))); 369 Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex)));
372 370
373 uint32_t start_index; 371 uint32_t start_index;
374 if (!Array::IndexFromObject(*index, &start_index)) { 372 if (!Array::IndexFromObject(*index, &start_index)) {
375 return Handle<Smi>(Smi::FromInt(-1)); 373 return Handle<Smi>(Smi::FromInt(-1));
376 } 374 }
377 375
378 LOG(RegExpExecEvent(re, start_index, subject));
379 int value = Runtime::StringMatch(subject, needle, start_index); 376 int value = Runtime::StringMatch(subject, needle, start_index);
380 if (value == -1) return Factory::null_value(); 377 if (value == -1) return Factory::null_value();
381 378
382 Handle<FixedArray> array = Factory::NewFixedArray(2); 379 Handle<FixedArray> array = Factory::NewFixedArray(2);
383 array->set(0, Smi::FromInt(value)); 380 array->set(0, Smi::FromInt(value));
384 array->set(1, Smi::FromInt(value + needle->length())); 381 array->set(1, Smi::FromInt(value + needle->length()));
385 return Factory::NewJSArrayWithElements(array); 382 return Factory::NewJSArrayWithElements(array);
386 } 383 }
387 384
388 385
389 Handle<Object> RegExpImpl::AtomExecGlobal(Handle<JSRegExp> re, 386 Handle<Object> RegExpImpl::AtomExecGlobal(Handle<JSRegExp> re,
390 Handle<String> subject) { 387 Handle<String> subject) {
391 Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex))); 388 Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex)));
392 Handle<JSArray> result = Factory::NewJSArray(1); 389 Handle<JSArray> result = Factory::NewJSArray(1);
393 int index = 0; 390 int index = 0;
394 int match_count = 0; 391 int match_count = 0;
395 int subject_length = subject->length(); 392 int subject_length = subject->length();
396 int needle_length = needle->length(); 393 int needle_length = needle->length();
397 while (true) { 394 while (true) {
398 LOG(RegExpExecEvent(re, index, subject));
399 int value = -1; 395 int value = -1;
400 if (index + needle_length <= subject_length) { 396 if (index + needle_length <= subject_length) {
401 value = Runtime::StringMatch(subject, needle, index); 397 value = Runtime::StringMatch(subject, needle, index);
402 } 398 }
403 if (value == -1) break; 399 if (value == -1) break;
404 HandleScope scope; 400 HandleScope scope;
405 int end = value + needle_length; 401 int end = value + needle_length;
406 402
407 Handle<FixedArray> array = Factory::NewFixedArray(2); 403 Handle<FixedArray> array = Factory::NewFixedArray(2);
408 array->set(0, Smi::FromInt(value)); 404 array->set(0, Smi::FromInt(value));
(...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after
505 JscreCompileWithRetryAfterGC(two_byte_pattern, 501 JscreCompileWithRetryAfterGC(two_byte_pattern,
506 flags, 502 flags,
507 &number_of_captures, 503 &number_of_captures,
508 &error_message, 504 &error_message,
509 &code); 505 &code);
510 506
511 if (code == NULL) { 507 if (code == NULL) {
512 // Throw an exception. 508 // Throw an exception.
513 Handle<JSArray> array = Factory::NewJSArray(2); 509 Handle<JSArray> array = Factory::NewJSArray(2);
514 SetElement(array, 0, pattern); 510 SetElement(array, 0, pattern);
515 SetElement(array, 1, Factory::NewStringFromUtf8(CStrVector( 511 const char* message =
516 (error_message == NULL) ? "Unknown regexp error" : error_message))); 512 (error_message == NULL) ? "Unknown regexp error" : error_message;
513 SetElement(array, 1, Factory::NewStringFromUtf8(CStrVector(message)));
517 Handle<Object> regexp_err = 514 Handle<Object> regexp_err =
518 Factory::NewSyntaxError("malformed_regexp", array); 515 Factory::NewSyntaxError("malformed_regexp", array);
519 Top::Throw(*regexp_err); 516 Top::Throw(*regexp_err);
520 return Handle<Object>(); 517 return Handle<Object>();
521 } 518 }
522 519
523 // Convert the return address to a ByteArray pointer. 520 // Convert the return address to a ByteArray pointer.
524 Handle<ByteArray> internal( 521 Handle<ByteArray> internal(
525 ByteArray::FromDataStartAddress(reinterpret_cast<Address>(code))); 522 ByteArray::FromDataStartAddress(reinterpret_cast<Address>(code)));
526 523
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after
569 int* offsets_vector, 566 int* offsets_vector,
570 int offsets_vector_length) { 567 int offsets_vector_length) {
571 int rc; 568 int rc;
572 { 569 {
573 AssertNoAllocation a; 570 AssertNoAllocation a;
574 ByteArray* internal = JscreInternal(regexp); 571 ByteArray* internal = JscreInternal(regexp);
575 const v8::jscre::JscreRegExp* js_regexp = 572 const v8::jscre::JscreRegExp* js_regexp =
576 reinterpret_cast<v8::jscre::JscreRegExp*>( 573 reinterpret_cast<v8::jscre::JscreRegExp*>(
577 internal->GetDataStartAddress()); 574 internal->GetDataStartAddress());
578 575
579 LOG(RegExpExecEvent(regexp, previous_index, subject));
580
581 rc = v8::jscre::jsRegExpExecute(js_regexp, 576 rc = v8::jscre::jsRegExpExecute(js_regexp,
582 two_byte_subject, 577 two_byte_subject,
583 subject->length(), 578 subject->length(),
584 previous_index, 579 previous_index,
585 offsets_vector, 580 offsets_vector,
586 offsets_vector_length); 581 offsets_vector_length);
587 } 582 }
588 583
589 // The KJS JavaScript engine returns null (ie, a failed match) when 584 // The KJS JavaScript engine returns null (ie, a failed match) when
590 // JSRE's internal match limit is exceeded. We duplicate that behavior here. 585 // JSRE's internal match limit is exceeded. We duplicate that behavior here.
(...skipping 194 matching lines...) Expand 10 before | Expand all | Expand 10 after
785 780
786 int previous_index = static_cast<int>(DoubleToInteger(index->Number())); 781 int previous_index = static_cast<int>(DoubleToInteger(index->Number()));
787 782
788 #ifdef DEBUG 783 #ifdef DEBUG
789 if (FLAG_trace_regexp_bytecodes) { 784 if (FLAG_trace_regexp_bytecodes) {
790 String* pattern = regexp->Pattern(); 785 String* pattern = regexp->Pattern();
791 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); 786 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString()));
792 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); 787 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString()));
793 } 788 }
794 #endif 789 #endif
795 LOG(RegExpExecEvent(regexp, previous_index, subject));
796 790
797 if (!subject->IsFlat(StringShape(*subject))) { 791 if (!subject->IsFlat(StringShape(*subject))) {
798 FlattenString(subject); 792 FlattenString(subject);
799 } 793 }
800 794
801 return IrregexpExecOnce(irregexp, 795 return IrregexpExecOnce(irregexp,
802 num_captures, 796 num_captures,
803 subject, 797 subject,
804 previous_index, 798 previous_index,
805 offsets.vector(), 799 offsets.vector(),
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
839 matches = Factory::null_value(); 833 matches = Factory::null_value();
840 return result; 834 return result;
841 } else { 835 } else {
842 #ifdef DEBUG 836 #ifdef DEBUG
843 if (FLAG_trace_regexp_bytecodes) { 837 if (FLAG_trace_regexp_bytecodes) {
844 String* pattern = regexp->Pattern(); 838 String* pattern = regexp->Pattern();
845 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); 839 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString()));
846 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); 840 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString()));
847 } 841 }
848 #endif 842 #endif
849 LOG(RegExpExecEvent(regexp, previous_index, subject));
850 matches = IrregexpExecOnce(irregexp, 843 matches = IrregexpExecOnce(irregexp,
851 IrregexpNumberOfCaptures(irregexp), 844 IrregexpNumberOfCaptures(irregexp),
852 subject, 845 subject,
853 previous_index, 846 previous_index,
854 offsets.vector(), 847 offsets.vector(),
855 offsets.length()); 848 offsets.length());
856 849
857 if (matches->IsJSArray()) { 850 if (matches->IsJSArray()) {
858 SetElement(result, i, matches); 851 SetElement(result, i, matches);
859 i++; 852 i++;
(...skipping 556 matching lines...) Expand 10 before | Expand all | Expand 10 after
1416 // This is called as we come into a loop choice node and some other tricky 1409 // This is called as we come into a loop choice node and some other tricky
1417 // nodes. It normalises the state of the code generator to ensure we can 1410 // nodes. It normalises the state of the code generator to ensure we can
1418 // generate generic code. 1411 // generate generic code.
1419 bool GenerationVariant::Flush(RegExpCompiler* compiler, RegExpNode* successor) { 1412 bool GenerationVariant::Flush(RegExpCompiler* compiler, RegExpNode* successor) {
1420 RegExpMacroAssembler* assembler = compiler->macro_assembler(); 1413 RegExpMacroAssembler* assembler = compiler->macro_assembler();
1421 1414
1422 ASSERT(actions_ != NULL || 1415 ASSERT(actions_ != NULL ||
1423 cp_offset_ != 0 || 1416 cp_offset_ != 0 ||
1424 backtrack() != NULL || 1417 backtrack() != NULL ||
1425 characters_preloaded_ != 0 || 1418 characters_preloaded_ != 0 ||
1426 quick_check_performed_.characters() != 0); 1419 quick_check_performed_.characters() != 0 ||
1420 bound_checked_up_to_ != 0);
1427 1421
1428 if (actions_ == NULL && backtrack() == NULL) { 1422 if (actions_ == NULL && backtrack() == NULL) {
1429 // Here we just have some deferred cp advances to fix and we are back to 1423 // Here we just have some deferred cp advances to fix and we are back to
1430 // a normal situation. We may also have to forget some information gained 1424 // a normal situation. We may also have to forget some information gained
1431 // through a quick check that was already performed. 1425 // through a quick check that was already performed.
1432 if (cp_offset_ != 0) assembler->AdvanceCurrentPosition(cp_offset_); 1426 if (cp_offset_ != 0) assembler->AdvanceCurrentPosition(cp_offset_);
1433 // Create a new trivial state and generate the node with that. 1427 // Create a new trivial state and generate the node with that.
1434 GenerationVariant new_state; 1428 GenerationVariant new_state;
1435 return successor->Emit(compiler, &new_state); 1429 return successor->Emit(compiler, &new_state);
1436 } 1430 }
(...skipping 203 matching lines...) Expand 10 before | Expand all | Expand 10 after
1640 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); 1634 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
1641 checked = check; 1635 checked = check;
1642 } 1636 }
1643 macro_assembler->CheckNotCharacter(c, on_failure); 1637 macro_assembler->CheckNotCharacter(c, on_failure);
1644 } 1638 }
1645 return checked; 1639 return checked;
1646 } 1640 }
1647 1641
1648 1642
1649 static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler, 1643 static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,
1644 bool ascii,
1650 uc16 c1, 1645 uc16 c1,
1651 uc16 c2, 1646 uc16 c2,
1652 Label* on_failure) { 1647 Label* on_failure) {
1648 uc16 char_mask;
1649 if (ascii) {
1650 char_mask = String::kMaxAsciiCharCode;
1651 } else {
1652 char_mask = String::kMaxUC16CharCode;
1653 }
1653 uc16 exor = c1 ^ c2; 1654 uc16 exor = c1 ^ c2;
1654 // Check whether exor has only one bit set. 1655 // Check whether exor has only one bit set.
1655 if (((exor - 1) & exor) == 0) { 1656 if (((exor - 1) & exor) == 0) {
1656 // If c1 and c2 differ only by one bit. 1657 // If c1 and c2 differ only by one bit.
1657 // Ecma262UnCanonicalize always gives the highest number last. 1658 // Ecma262UnCanonicalize always gives the highest number last.
1658 ASSERT(c2 > c1); 1659 ASSERT(c2 > c1);
1659 uc16 mask = String::kMaxUC16CharCode ^ exor; 1660 uc16 mask = char_mask ^ exor;
1660 macro_assembler->CheckNotCharacterAfterAnd(c1, mask, on_failure); 1661 macro_assembler->CheckNotCharacterAfterAnd(c1, mask, on_failure);
1661 return true; 1662 return true;
1662 } 1663 }
1663 ASSERT(c2 > c1); 1664 ASSERT(c2 > c1);
1664 uc16 diff = c2 - c1; 1665 uc16 diff = c2 - c1;
1665 if (((diff - 1) & diff) == 0 && c1 >= diff) { 1666 if (((diff - 1) & diff) == 0 && c1 >= diff) {
1666 // If the characters differ by 2^n but don't differ by one bit then 1667 // If the characters differ by 2^n but don't differ by one bit then
1667 // subtract the difference from the found character, then do the or 1668 // subtract the difference from the found character, then do the or
1668 // trick. We avoid the theoretical case where negative numbers are 1669 // trick. We avoid the theoretical case where negative numbers are
1669 // involved in order to simplify code generation. 1670 // involved in order to simplify code generation.
1670 uc16 mask = String::kMaxUC16CharCode ^ diff; 1671 uc16 mask = char_mask ^ diff;
1671 macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff, 1672 macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff,
1672 diff, 1673 diff,
1673 mask, 1674 mask,
1674 on_failure); 1675 on_failure);
1675 return true; 1676 return true;
1676 } 1677 }
1677 return false; 1678 return false;
1678 } 1679 }
1679 1680
1680 1681
1681 // Only emits letters (things that have case). Only used for case independent 1682 // Only emits letters (things that have case). Only used for case independent
1682 // matches. 1683 // matches.
1683 static inline bool EmitAtomLetter( 1684 static inline bool EmitAtomLetter(
1684 RegExpMacroAssembler* macro_assembler, 1685 RegExpMacroAssembler* macro_assembler,
1686 bool ascii,
1685 uc16 c, 1687 uc16 c,
1686 Label* on_failure, 1688 Label* on_failure,
1687 int cp_offset, 1689 int cp_offset,
1688 bool check, 1690 bool check,
1689 bool preloaded) { 1691 bool preloaded) {
1690 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 1692 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1691 int length = uncanonicalize.get(c, '\0', chars); 1693 int length = uncanonicalize.get(c, '\0', chars);
1692 if (length <= 1) return false; 1694 if (length <= 1) return false;
1693 // We may not need to check against the end of the input string 1695 // We may not need to check against the end of the input string
1694 // if this character lies before a character that matched. 1696 // if this character lies before a character that matched.
1695 if (!preloaded) { 1697 if (!preloaded) {
1696 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); 1698 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
1697 } 1699 }
1698 Label ok; 1700 Label ok;
1699 ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4); 1701 ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4);
1700 switch (length) { 1702 switch (length) {
1701 case 2: { 1703 case 2: {
1702 if (ShortCutEmitCharacterPair(macro_assembler, 1704 if (ShortCutEmitCharacterPair(macro_assembler,
1705 ascii,
1703 chars[0], 1706 chars[0],
1704 chars[1], 1707 chars[1],
1705 on_failure)) { 1708 on_failure)) {
1706 } else { 1709 } else {
1707 macro_assembler->CheckCharacter(chars[0], &ok); 1710 macro_assembler->CheckCharacter(chars[0], &ok);
1708 macro_assembler->CheckNotCharacter(chars[1], on_failure); 1711 macro_assembler->CheckNotCharacter(chars[1], on_failure);
1709 macro_assembler->Bind(&ok); 1712 macro_assembler->Bind(&ok);
1710 } 1713 }
1711 break; 1714 break;
1712 } 1715 }
(...skipping 14 matching lines...) Expand all
1727 } 1730 }
1728 1731
1729 1732
1730 static void EmitCharClass(RegExpMacroAssembler* macro_assembler, 1733 static void EmitCharClass(RegExpMacroAssembler* macro_assembler,
1731 RegExpCharacterClass* cc, 1734 RegExpCharacterClass* cc,
1732 int cp_offset, 1735 int cp_offset,
1733 Label* on_failure, 1736 Label* on_failure,
1734 bool check_offset, 1737 bool check_offset,
1735 bool ascii, 1738 bool ascii,
1736 bool preloaded) { 1739 bool preloaded) {
1740 if (cc->is_standard() &&
1741 macro_assembler->CheckSpecialCharacterClass(cc->standard_type(),
1742 cp_offset,
1743 check_offset,
1744 on_failure)) {
1745 return;
1746 }
1747
1737 ZoneList<CharacterRange>* ranges = cc->ranges(); 1748 ZoneList<CharacterRange>* ranges = cc->ranges();
1738 int max_char; 1749 int max_char;
1739 if (ascii) { 1750 if (ascii) {
1740 max_char = String::kMaxAsciiCharCode; 1751 max_char = String::kMaxAsciiCharCode;
1741 } else { 1752 } else {
1742 max_char = String::kMaxUC16CharCode; 1753 max_char = String::kMaxUC16CharCode;
1743 } 1754 }
1744 1755
1745 Label success; 1756 Label success;
1746 1757
(...skipping 253 matching lines...) Expand 10 before | Expand all | Expand 10 after
2000 if (details->characters() == 1) { 2011 if (details->characters() == 1) {
2001 // If number of characters preloaded is 1 then we used a byte or 16 bit 2012 // If number of characters preloaded is 1 then we used a byte or 16 bit
2002 // load so the value is already masked down. 2013 // load so the value is already masked down.
2003 uint32_t char_mask; 2014 uint32_t char_mask;
2004 if (compiler->ascii()) { 2015 if (compiler->ascii()) {
2005 char_mask = String::kMaxAsciiCharCode; 2016 char_mask = String::kMaxAsciiCharCode;
2006 } else { 2017 } else {
2007 char_mask = String::kMaxUC16CharCode; 2018 char_mask = String::kMaxUC16CharCode;
2008 } 2019 }
2009 if ((mask & char_mask) == char_mask) need_mask = false; 2020 if ((mask & char_mask) == char_mask) need_mask = false;
2021 mask &= char_mask;
2010 } else { 2022 } else {
2011 // For 2-character preloads in ASCII mode we also use a 16 bit load with 2023 // For 2-character preloads in ASCII mode we also use a 16 bit load with
2012 // zero extend. 2024 // zero extend.
2013 if (details->characters() == 2 && compiler->ascii()) { 2025 if (details->characters() == 2 && compiler->ascii()) {
2014 if ((mask & 0xffff) == 0xffff) need_mask = false; 2026 if ((mask & 0xffff) == 0xffff) need_mask = false;
2015 } else { 2027 } else {
2016 if (mask == 0xffffffff) need_mask = false; 2028 if (mask == 0xffffffff) need_mask = false;
2017 } 2029 }
2018 } 2030 }
2019 2031
(...skipping 296 matching lines...) Expand 10 before | Expand all | Expand 10 after
2316 assembler->LoadCurrentCharacter(cp_offset + j, 2328 assembler->LoadCurrentCharacter(cp_offset + j,
2317 backtrack, 2329 backtrack,
2318 *checked_up_to < cp_offset + j); 2330 *checked_up_to < cp_offset + j);
2319 } 2331 }
2320 assembler->CheckNotCharacter(quarks[j], backtrack); 2332 assembler->CheckNotCharacter(quarks[j], backtrack);
2321 } 2333 }
2322 } else { 2334 } else {
2323 ASSERT_EQ(pass, CASE_CHARACTER_MATCH); 2335 ASSERT_EQ(pass, CASE_CHARACTER_MATCH);
2324 ASSERT(compiler->ignore_case()); 2336 ASSERT(compiler->ignore_case());
2325 bound_checked = EmitAtomLetter(assembler, 2337 bound_checked = EmitAtomLetter(assembler,
2338 compiler->ascii(),
2326 quarks[j], 2339 quarks[j],
2327 backtrack, 2340 backtrack,
2328 cp_offset + j, 2341 cp_offset + j,
2329 *checked_up_to < cp_offset + j, 2342 *checked_up_to < cp_offset + j,
2330 preloaded); 2343 preloaded);
2331 } 2344 }
2332 if (pass != NON_ASCII_MATCH && bound_checked) { 2345 if (pass != NON_ASCII_MATCH && bound_checked) {
2333 if (cp_offset + j > *checked_up_to) { 2346 if (cp_offset + j > *checked_up_to) {
2334 *checked_up_to = cp_offset + j; 2347 *checked_up_to = cp_offset + j;
2335 } 2348 }
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after
2396 return true; 2409 return true;
2397 } 2410 }
2398 2411
2399 if (compiler->ascii()) { 2412 if (compiler->ascii()) {
2400 int dummy = 0; 2413 int dummy = 0;
2401 TextEmitPass(compiler, NON_ASCII_MATCH, false, variant, false, &dummy); 2414 TextEmitPass(compiler, NON_ASCII_MATCH, false, variant, false, &dummy);
2402 } 2415 }
2403 2416
2404 bool first_elt_done = false; 2417 bool first_elt_done = false;
2405 int bound_checked_to = variant->cp_offset() - 1; 2418 int bound_checked_to = variant->cp_offset() - 1;
2406 QuickCheckDetails* quick_check = variant->quick_check_performed(); 2419 bound_checked_to += variant->bound_checked_up_to();
2407 bound_checked_to += Max(quick_check->characters(),
2408 variant->characters_preloaded());
2409 2420
2410 // If a character is preloaded into the current character register then 2421 // If a character is preloaded into the current character register then
2411 // check that now. 2422 // check that now.
2412 if (variant->characters_preloaded() == 1) { 2423 if (variant->characters_preloaded() == 1) {
2413 TextEmitPass(compiler, 2424 TextEmitPass(compiler,
2414 CHARACTER_MATCH, 2425 CHARACTER_MATCH,
2415 true, 2426 true,
2416 variant, 2427 variant,
2417 false, 2428 false,
2418 &bound_checked_to); 2429 &bound_checked_to);
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after
2465 ASSERT(by > 0); 2476 ASSERT(by > 0);
2466 // We don't have an instruction for shifting the current character register 2477 // We don't have an instruction for shifting the current character register
2467 // down or for using a shifted value for anything so lets just forget that 2478 // down or for using a shifted value for anything so lets just forget that
2468 // we preloaded any characters into it. 2479 // we preloaded any characters into it.
2469 characters_preloaded_ = 0; 2480 characters_preloaded_ = 0;
2470 // Adjust the offsets of the quick check performed information. This 2481 // Adjust the offsets of the quick check performed information. This
2471 // information is used to find out what we already determined about the 2482 // information is used to find out what we already determined about the
2472 // characters by means of mask and compare. 2483 // characters by means of mask and compare.
2473 quick_check_performed_.Advance(by, ascii); 2484 quick_check_performed_.Advance(by, ascii);
2474 cp_offset_ += by; 2485 cp_offset_ += by;
2486 bound_checked_up_to_ = Max(0, bound_checked_up_to_ - by);
2475 } 2487 }
2476 2488
2477 2489
2478 void TextNode::MakeCaseIndependent() { 2490 void TextNode::MakeCaseIndependent() {
2479 int element_count = elms_->length(); 2491 int element_count = elms_->length();
2480 for (int i = 0; i < element_count; i++) { 2492 for (int i = 0; i < element_count; i++) {
2481 TextElement elm = elms_->at(i); 2493 TextElement elm = elms_->at(i);
2482 if (elm.type == TextElement::CHAR_CLASS) { 2494 if (elm.type == TextElement::CHAR_CLASS) {
2483 RegExpCharacterClass* cc = elm.data.u_char_class; 2495 RegExpCharacterClass* cc = elm.data.u_char_class;
2484 ZoneList<CharacterRange>* ranges = cc->ranges(); 2496 ZoneList<CharacterRange>* ranges = cc->ranges();
(...skipping 287 matching lines...) Expand 10 before | Expand all | Expand 10 after
2772 return false; 2784 return false;
2773 } 2785 }
2774 } 2786 }
2775 2787
2776 Label second_choice; // For use in greedy matches. 2788 Label second_choice; // For use in greedy matches.
2777 macro_assembler->Bind(&second_choice); 2789 macro_assembler->Bind(&second_choice);
2778 2790
2779 int first_normal_choice = greedy_loop ? 1 : 0; 2791 int first_normal_choice = greedy_loop ? 1 : 0;
2780 2792
2781 int preload_characters = CalculatePreloadCharacters(compiler); 2793 int preload_characters = CalculatePreloadCharacters(compiler);
2782 bool preload_is_current = false; 2794 bool preload_is_current =
2783 bool preload_has_checked_bounds = false; 2795 (current_variant->characters_preloaded() == preload_characters);
2796 bool preload_has_checked_bounds = preload_is_current;
2784 2797
2785 AlternativeGenerationList alt_gens(choice_count); 2798 AlternativeGenerationList alt_gens(choice_count);
2786 2799
2787 // For now we just call all choices one after the other. The idea ultimately 2800 // For now we just call all choices one after the other. The idea ultimately
2788 // is to use the Dispatch table to try only the relevant ones. 2801 // is to use the Dispatch table to try only the relevant ones.
2789 for (int i = first_normal_choice; i < choice_count; i++) { 2802 for (int i = first_normal_choice; i < choice_count; i++) {
2790 GuardedAlternative alternative = alternatives_->at(i); 2803 GuardedAlternative alternative = alternatives_->at(i);
2791 AlternativeGeneration* alt_gen(alt_gens.at(i)); 2804 AlternativeGeneration* alt_gen(alt_gens.at(i));
2792 alt_gen->quick_check_details.set_characters(preload_characters); 2805 alt_gen->quick_check_details.set_characters(preload_characters);
2793 ZoneList<Guard*>* guards = alternative.guards(); 2806 ZoneList<Guard*>* guards = alternative.guards();
2794 int guard_count = (guards == NULL) ? 0 : guards->length(); 2807 int guard_count = (guards == NULL) ? 0 : guards->length();
2795
2796 GenerationVariant new_variant(*current_variant); 2808 GenerationVariant new_variant(*current_variant);
2797 new_variant.set_characters_preloaded(preload_is_current ? 2809 new_variant.set_characters_preloaded(preload_is_current ?
2798 preload_characters : 2810 preload_characters :
2799 0); 2811 0);
2812 if (preload_has_checked_bounds) {
2813 new_variant.set_bound_checked_up_to(preload_characters);
2814 }
2800 new_variant.quick_check_performed()->Clear(); 2815 new_variant.quick_check_performed()->Clear();
2801 alt_gen->expects_preload = preload_is_current; 2816 alt_gen->expects_preload = preload_is_current;
2802 bool generate_full_check_inline = false; 2817 bool generate_full_check_inline = false;
2803 if (alternative.node()->EmitQuickCheck(compiler, 2818 if (alternative.node()->EmitQuickCheck(compiler,
2804 &new_variant, 2819 &new_variant,
2805 preload_has_checked_bounds, 2820 preload_has_checked_bounds,
2806 &alt_gen->possible_success, 2821 &alt_gen->possible_success,
2807 &alt_gen->quick_check_details, 2822 &alt_gen->quick_check_details,
2808 i < choice_count - 1)) { 2823 i < choice_count - 1)) {
2809 // Quick check was generated for this choice. 2824 // Quick check was generated for this choice.
2810 preload_is_current = true; 2825 preload_is_current = true;
2811 preload_has_checked_bounds = true; 2826 preload_has_checked_bounds = true;
2812 // On the last choice in the ChoiceNode we generated the quick 2827 // On the last choice in the ChoiceNode we generated the quick
2813 // check to fall through on possible success. So now we need to 2828 // check to fall through on possible success. So now we need to
2814 // generate the full check inline. 2829 // generate the full check inline.
2815 if (i == choice_count - 1) { 2830 if (i == choice_count - 1) {
2816 macro_assembler->Bind(&alt_gen->possible_success); 2831 macro_assembler->Bind(&alt_gen->possible_success);
2817 new_variant.set_quick_check_performed(&alt_gen->quick_check_details); 2832 new_variant.set_quick_check_performed(&alt_gen->quick_check_details);
2818 new_variant.set_characters_preloaded(preload_characters); 2833 new_variant.set_characters_preloaded(preload_characters);
2834 new_variant.set_bound_checked_up_to(preload_characters);
2819 generate_full_check_inline = true; 2835 generate_full_check_inline = true;
2820 } 2836 }
2821 } else { 2837 } else {
2822 // No quick check was generated. Put the full code here. 2838 // No quick check was generated. Put the full code here.
2839 // If this is not the first choice then there could be slow checks from
2840 // previous cases that go here when they fail. There's no reason to
2841 // insist that they preload characters since the slow check we are about
2842 // to generate probably can't use it.
2843 if (i != first_normal_choice) {
2844 alt_gen->expects_preload = false;
2845 new_variant.set_characters_preloaded(0);
2846 }
2823 if (i < choice_count - 1) { 2847 if (i < choice_count - 1) {
2824 new_variant.set_backtrack(&alt_gen->after); 2848 new_variant.set_backtrack(&alt_gen->after);
2825 } 2849 }
2826 generate_full_check_inline = true; 2850 generate_full_check_inline = true;
2827 } 2851 }
2828 if (generate_full_check_inline) { 2852 if (generate_full_check_inline) {
2829 if (preload_is_current) {
2830 new_variant.set_characters_preloaded(preload_characters);
2831 }
2832 for (int j = 0; j < guard_count; j++) { 2853 for (int j = 0; j < guard_count; j++) {
2833 GenerateGuard(macro_assembler, guards->at(j), &new_variant); 2854 GenerateGuard(macro_assembler, guards->at(j), &new_variant);
2834 } 2855 }
2835 if (!alternative.node()->Emit(compiler, &new_variant)) { 2856 if (!alternative.node()->Emit(compiler, &new_variant)) {
2836 greedy_loop_label.Unuse(); 2857 greedy_loop_label.Unuse();
2837 return false; 2858 return false;
2838 } 2859 }
2839 preload_is_current = false; 2860 preload_is_current = false;
2840 } 2861 }
2841 macro_assembler->Bind(&alt_gen->after); 2862 macro_assembler->Bind(&alt_gen->after);
(...skipping 476 matching lines...) Expand 10 before | Expand all | Expand 10 after
3318 printer.PrintNode(label, node); 3339 printer.PrintNode(label, node);
3319 } 3340 }
3320 3341
3321 3342
3322 #endif // DEBUG 3343 #endif // DEBUG
3323 3344
3324 3345
3325 // ------------------------------------------------------------------- 3346 // -------------------------------------------------------------------
3326 // Tree to graph conversion 3347 // Tree to graph conversion
3327 3348
3349 static const int kSpaceRangeCount = 20;
3350 static const int kSpaceRangeAsciiCount = 4;
3351 static const uc16 kSpaceRanges[kSpaceRangeCount] = { 0x0009, 0x000D, 0x0020,
3352 0x0020, 0x00A0, 0x00A0, 0x1680, 0x1680, 0x180E, 0x180E, 0x2000, 0x200A,
3353 0x2028, 0x2029, 0x202F, 0x202F, 0x205F, 0x205F, 0x3000, 0x3000 };
3354
3355 static const int kWordRangeCount = 8;
3356 static const uc16 kWordRanges[kWordRangeCount] = { '0', '9', 'A', 'Z', '_',
3357 '_', 'a', 'z' };
3358
3359 static const int kDigitRangeCount = 2;
3360 static const uc16 kDigitRanges[kDigitRangeCount] = { '0', '9' };
3361
3362 static const int kLineTerminatorRangeCount = 6;
3363 static const uc16 kLineTerminatorRanges[kLineTerminatorRangeCount] = { 0x000A,
3364 0x000A, 0x000D, 0x000D, 0x2028, 0x2029 };
3328 3365
3329 RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler, 3366 RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler,
3330 RegExpNode* on_success) { 3367 RegExpNode* on_success) {
3331 ZoneList<TextElement>* elms = new ZoneList<TextElement>(1); 3368 ZoneList<TextElement>* elms = new ZoneList<TextElement>(1);
3332 elms->Add(TextElement::Atom(this)); 3369 elms->Add(TextElement::Atom(this));
3333 return new TextNode(elms, on_success); 3370 return new TextNode(elms, on_success);
3334 } 3371 }
3335 3372
3336 3373
3337 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler, 3374 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler,
3338 RegExpNode* on_success) { 3375 RegExpNode* on_success) {
3339 return new TextNode(elements(), on_success); 3376 return new TextNode(elements(), on_success);
3340 } 3377 }
3341 3378
3379 static bool CompareInverseRanges(ZoneList<CharacterRange>* ranges,
3380 const uc16* special_class,
3381 int length) {
3382 ASSERT(ranges->length() != 0);
3383 ASSERT(length != 0);
3384 ASSERT(special_class[0] != 0);
3385 if (ranges->length() != (length >> 1) + 1) {
3386 return false;
3387 }
3388 CharacterRange range = ranges->at(0);
3389 if (range.from() != 0) {
3390 return false;
3391 }
3392 for (int i = 0; i < length; i += 2) {
3393 if (special_class[i] != (range.to() + 1)) {
3394 return false;
3395 }
3396 range = ranges->at((i >> 1) + 1);
3397 if (special_class[i+1] != range.from() - 1) {
3398 return false;
3399 }
3400 }
3401 if (range.to() != 0xffff) {
3402 return false;
3403 }
3404 return true;
3405 }
3406
3407
3408 static bool CompareRanges(ZoneList<CharacterRange>* ranges,
3409 const uc16* special_class,
3410 int length) {
3411 if (ranges->length() * 2 != length) {
3412 return false;
3413 }
3414 for (int i = 0; i < length; i += 2) {
3415 CharacterRange range = ranges->at(i >> 1);
3416 if (range.from() != special_class[i] || range.to() != special_class[i+1]) {
3417 return false;
3418 }
3419 }
3420 return true;
3421 }
3422
3423
3424 bool RegExpCharacterClass::is_standard() {
3425 // TODO(lrn): Remove need for this function, by not throwing away information
3426 // along the way.
3427 if (is_negated_) {
3428 return false;
3429 }
3430 if (set_.is_standard()) {
3431 return true;
3432 }
3433 if (CompareRanges(set_.ranges(), kSpaceRanges, kSpaceRangeCount)) {
3434 set_.set_standard_set_type('s');
3435 return true;
3436 }
3437 if (CompareInverseRanges(set_.ranges(), kSpaceRanges, kSpaceRangeCount)) {
3438 set_.set_standard_set_type('S');
3439 return true;
3440 }
3441 if (CompareInverseRanges(set_.ranges(),
3442 kLineTerminatorRanges,
3443 kLineTerminatorRangeCount)) {
3444 set_.set_standard_set_type('.');
3445 return true;
3446 }
3447 return false;
3448 }
3449
3342 3450
3343 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, 3451 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
3344 RegExpNode* on_success) { 3452 RegExpNode* on_success) {
3345 return new TextNode(this, on_success); 3453 return new TextNode(this, on_success);
3346 } 3454 }
3347 3455
3348 3456
3349 RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler, 3457 RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler,
3350 RegExpNode* on_success) { 3458 RegExpNode* on_success) {
3351 ZoneList<RegExpTree*>* alternatives = this->alternatives(); 3459 ZoneList<RegExpTree*>* alternatives = this->alternatives();
(...skipping 221 matching lines...) Expand 10 before | Expand all | Expand 10 after
3573 RegExpNode* on_success) { 3681 RegExpNode* on_success) {
3574 ZoneList<RegExpTree*>* children = nodes(); 3682 ZoneList<RegExpTree*>* children = nodes();
3575 RegExpNode* current = on_success; 3683 RegExpNode* current = on_success;
3576 for (int i = children->length() - 1; i >= 0; i--) { 3684 for (int i = children->length() - 1; i >= 0; i--) {
3577 current = children->at(i)->ToNode(compiler, current); 3685 current = children->at(i)->ToNode(compiler, current);
3578 } 3686 }
3579 return current; 3687 return current;
3580 } 3688 }
3581 3689
3582 3690
3583 static const int kSpaceRangeCount = 20;
3584 static const uc16 kSpaceRanges[kSpaceRangeCount] = {
3585 0x0009, 0x000D, 0x0020, 0x0020, 0x00A0, 0x00A0, 0x1680,
3586 0x1680, 0x180E, 0x180E, 0x2000, 0x200A, 0x2028, 0x2029,
3587 0x202F, 0x202F, 0x205F, 0x205F, 0x3000, 0x3000
3588 };
3589
3590
3591 static const int kWordRangeCount = 8;
3592 static const uc16 kWordRanges[kWordRangeCount] = {
3593 '0', '9', 'A', 'Z', '_', '_', 'a', 'z'
3594 };
3595
3596
3597 static const int kDigitRangeCount = 2;
3598 static const uc16 kDigitRanges[kDigitRangeCount] = {
3599 '0', '9'
3600 };
3601
3602
3603 static const int kLineTerminatorRangeCount = 6;
3604 static const uc16 kLineTerminatorRanges[kLineTerminatorRangeCount] = {
3605 0x000A, 0x000A, 0x000D, 0x000D, 0x2028, 0x2029
3606 };
3607
3608
3609 static void AddClass(const uc16* elmv, 3691 static void AddClass(const uc16* elmv,
3610 int elmc, 3692 int elmc,
3611 ZoneList<CharacterRange>* ranges) { 3693 ZoneList<CharacterRange>* ranges) {
3612 for (int i = 0; i < elmc; i += 2) { 3694 for (int i = 0; i < elmc; i += 2) {
3613 ASSERT(elmv[i] <= elmv[i + 1]); 3695 ASSERT(elmv[i] <= elmv[i + 1]);
3614 ranges->Add(CharacterRange(elmv[i], elmv[i + 1])); 3696 ranges->Add(CharacterRange(elmv[i], elmv[i + 1]));
3615 } 3697 }
3616 } 3698 }
3617 3699
3618 3700
(...skipping 175 matching lines...) Expand 10 before | Expand all | Expand 10 after
3794 } 3876 }
3795 } 3877 }
3796 start = pos = block_end + 1; 3878 start = pos = block_end + 1;
3797 } 3879 }
3798 } else { 3880 } else {
3799 // TODO(plesner) when we've fixed the 2^11 bug in unibrow. 3881 // TODO(plesner) when we've fixed the 2^11 bug in unibrow.
3800 } 3882 }
3801 } 3883 }
3802 3884
3803 3885
3886 ZoneList<CharacterRange>* CharacterSet::ranges() {
3887 if (ranges_ == NULL) {
3888 ranges_ = new ZoneList<CharacterRange>(2);
3889 CharacterRange::AddClassEscape(standard_set_type_, ranges_);
3890 }
3891 return ranges_;
3892 }
3893
3894
3895
3804 // ------------------------------------------------------------------- 3896 // -------------------------------------------------------------------
3805 // Interest propagation 3897 // Interest propagation
3806 3898
3807 3899
3808 RegExpNode* RegExpNode::TryGetSibling(NodeInfo* info) { 3900 RegExpNode* RegExpNode::TryGetSibling(NodeInfo* info) {
3809 for (int i = 0; i < siblings_.length(); i++) { 3901 for (int i = 0; i < siblings_.length(); i++) {
3810 RegExpNode* sibling = siblings_.Get(i); 3902 RegExpNode* sibling = siblings_.Get(i);
3811 if (sibling->info()->Matches(info)) 3903 if (sibling->info()->Matches(info))
3812 return sibling; 3904 return sibling;
3813 } 3905 }
(...skipping 481 matching lines...) Expand 10 before | Expand all | Expand 10 after
4295 EmbeddedVector<byte, 1024> codes; 4387 EmbeddedVector<byte, 1024> codes;
4296 RegExpMacroAssemblerIrregexp macro_assembler(codes); 4388 RegExpMacroAssemblerIrregexp macro_assembler(codes);
4297 return compiler.Assemble(&macro_assembler, 4389 return compiler.Assemble(&macro_assembler,
4298 node, 4390 node,
4299 data->capture_count, 4391 data->capture_count,
4300 pattern); 4392 pattern);
4301 } 4393 }
4302 4394
4303 4395
4304 }} // namespace v8::internal 4396 }} // namespace v8::internal
OLDNEW
« no previous file with comments | « src/jsregexp.h ('k') | src/log.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698