src/jsregexp.cc - Issue 20499: Experimental: merge from bleeding edge 1298:1307. Port the eval...

Side by Side Diff: src/jsregexp.cc

Issue 20499: Experimental: merge from bleeding edge 1298:1307. Port the eval... (Closed) Base URL: http://v8.googlecode.com/svn/branches/experimental/toiger/

Patch Set: '' Created 11 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved.	1 // Copyright 2006-2008 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 1721 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1732 trace->backtrack());	1732 trace->backtrack());

1733 break;	1733 break;

1734 }	1734 }

1735 }	1735 }

1736	1736

1737	1737

1738 static unibrow::Mapping<unibrow::Ecma262UnCanonicalize> uncanonicalize;	1738 static unibrow::Mapping<unibrow::Ecma262UnCanonicalize> uncanonicalize;

1739 static unibrow::Mapping<unibrow::CanonicalizationRange> canonrange;	1739 static unibrow::Mapping<unibrow::CanonicalizationRange> canonrange;

1740	1740

1741	1741

	1742 // Returns the number of characters in the equivalence class, omitting those

	1743 // that cannot occur in the source string because it is ASCII.

	1744 static int GetCaseIndependentLetters(uc16 character,

	1745 bool ascii_subject,

	1746 unibrow::uchar* letters) {

	1747 int length = uncanonicalize.get(character, '\0', letters);

	1748 // Unibrow returns 0 or 1 for characters where case independependence is

	1749 // trivial.

	1750 if (length == 0) {

	1751 letters[0] = character;

	1752 length = 1;

	1753 }

	1754 if (!ascii_subject \|\| character <= String::kMaxAsciiCharCode) {

	1755 return length;

	1756 }

	1757 // The standard requires that non-ASCII characters cannot have ASCII

	1758 // character codes in their equivalence class.

	1759 return 0;

	1760 }

	1761

	1762

	1763 static inline bool EmitSimpleCharacter(RegExpCompiler* compiler,

	1764 uc16 c,

	1765 Label* on_failure,

	1766 int cp_offset,

	1767 bool check,

	1768 bool preloaded) {

	1769 RegExpMacroAssembler* assembler = compiler->macro_assembler();

	1770 bool bound_checked = false;

	1771 if (!preloaded) {

	1772 assembler->LoadCurrentCharacter(

	1773 cp_offset,

	1774 on_failure,

	1775 check);

	1776 bound_checked = true;

	1777 }

	1778 assembler->CheckNotCharacter(c, on_failure);

	1779 return bound_checked;

	1780 }

	1781

	1782

1742 // Only emits non-letters (things that don't have case). Only used for case	1783 // Only emits non-letters (things that don't have case). Only used for case

1743 // independent matches.	1784 // independent matches.

1744 static inline bool EmitAtomNonLetter(	1785 static inline bool EmitAtomNonLetter(RegExpCompiler* compiler,

1745 RegExpMacroAssembler* macro_assembler,	1786 uc16 c,

1746 uc16 c,	1787 Label* on_failure,

1747 Label* on_failure,	1788 int cp_offset,

1748 int cp_offset,	1789 bool check,

1749 bool check,	1790 bool preloaded) {

1750 bool preloaded) {	1791 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();

	1792 bool ascii = compiler->ascii();

1751 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];	1793 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];

1752 int length = uncanonicalize.get(c, '\0', chars);	1794 int length = GetCaseIndependentLetters(c, ascii, chars);

	1795 if (length < 1) {

	1796 // This can't match. Must be an ASCII subject and a non-ASCII character.

	1797 // We do not need to do anything since the ASCII pass already handled this.

	1798 return false; // Bounds not checked.

	1799 }

1753 bool checked = false;	1800 bool checked = false;

1754 if (length <= 1) {	1801 // We handle the length > 1 case in a later pass.

	1802 if (length == 1) {

	1803 if (ascii && c > String::kMaxAsciiCharCodeU) {

	1804 // Can't match - see above.

	1805 return false; // Bounds not checked.

	1806 }

1755 if (!preloaded) {	1807 if (!preloaded) {

1756 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);	1808 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);

1757 checked = check;	1809 checked = check;

1758 }	1810 }

1759 macro_assembler->CheckNotCharacter(c, on_failure);	1811 macro_assembler->CheckNotCharacter(c, on_failure);

1760 }	1812 }

1761 return checked;	1813 return checked;

1762 }	1814 }

1763	1815

1764	1816

(...skipping 29 matching lines...) Expand all Loading...
1794 macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff,	1846 macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff,

1795 diff,	1847 diff,

1796 mask,	1848 mask,

1797 on_failure);	1849 on_failure);

1798 return true;	1850 return true;

1799 }	1851 }

1800 return false;	1852 return false;

1801 }	1853 }

1802	1854

1803	1855

	1856 typedef bool EmitCharacterFunction(RegExpCompiler* compiler,

	1857 uc16 c,

	1858 Label* on_failure,

	1859 int cp_offset,

	1860 bool check,

	1861 bool preloaded);

	1862

1804 // Only emits letters (things that have case). Only used for case independent	1863 // Only emits letters (things that have case). Only used for case independent

1805 // matches.	1864 // matches.

1806 static inline bool EmitAtomLetter(	1865 static inline bool EmitAtomLetter(RegExpCompiler* compiler,

1807 RegExpMacroAssembler* macro_assembler,	1866 uc16 c,

1808 bool ascii,	1867 Label* on_failure,

1809 uc16 c,	1868 int cp_offset,

1810 Label* on_failure,	1869 bool check,

1811 int cp_offset,	1870 bool preloaded) {

1812 bool check,	1871 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();

1813 bool preloaded) {	1872 bool ascii = compiler->ascii();

1814 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];	1873 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];

1815 int length = uncanonicalize.get(c, '\0', chars);	1874 int length = GetCaseIndependentLetters(c, ascii, chars);

1816 if (length <= 1) return false;	1875 if (length <= 1) return false;

1817 // We may not need to check against the end of the input string	1876 // We may not need to check against the end of the input string

1818 // if this character lies before a character that matched.	1877 // if this character lies before a character that matched.

1819 if (!preloaded) {	1878 if (!preloaded) {

1820 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);	1879 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);

1821 }	1880 }

1822 Label ok;	1881 Label ok;

1823 ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4);	1882 ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4);

1824 switch (length) {	1883 switch (length) {

1825 case 2: {	1884 case 2: {

(...skipping 21 matching lines...) Expand all Loading...
1847 default:	1906 default:

1848 UNREACHABLE();	1907 UNREACHABLE();

1849 break;	1908 break;

1850 }	1909 }

1851 return true;	1910 return true;

1852 }	1911 }

1853	1912

1854	1913

1855 static void EmitCharClass(RegExpMacroAssembler* macro_assembler,	1914 static void EmitCharClass(RegExpMacroAssembler* macro_assembler,

1856 RegExpCharacterClass* cc,	1915 RegExpCharacterClass* cc,

	1916 bool ascii,

	1917 Label* on_failure,

1857 int cp_offset,	1918 int cp_offset,

1858 Label* on_failure,

1859 bool check_offset,	1919 bool check_offset,

1860 bool ascii,

1861 bool preloaded) {	1920 bool preloaded) {

1862 if (cc->is_standard() &&	1921 if (cc->is_standard() &&

1863 macro_assembler->CheckSpecialCharacterClass(cc->standard_type(),	1922 macro_assembler->CheckSpecialCharacterClass(cc->standard_type(),

1864 cp_offset,	1923 cp_offset,

1865 check_offset,	1924 check_offset,

1866 on_failure)) {	1925 on_failure)) {

1867 return;	1926 return;

1868 }	1927 }

1869	1928

1870 ZoneList<CharacterRange>* ranges = cc->ranges();	1929 ZoneList<CharacterRange>* ranges = cc->ranges();

(...skipping 360 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2231 for (int i = 0; i < characters && i < quarks.length(); i++) {	2290 for (int i = 0; i < characters && i < quarks.length(); i++) {

2232 QuickCheckDetails::Position* pos =	2291 QuickCheckDetails::Position* pos =

2233 details->positions(characters_filled_in);	2292 details->positions(characters_filled_in);

2234 uc16 c = quarks[i];	2293 uc16 c = quarks[i];

2235 if (c > char_mask) {	2294 if (c > char_mask) {

2236 // If we expect a non-ASCII character from an ASCII string,	2295 // If we expect a non-ASCII character from an ASCII string,

2237 // there is no way we can match. Not even case independent	2296 // there is no way we can match. Not even case independent

2238 // matching can turn an ASCII character into non-ASCII or	2297 // matching can turn an ASCII character into non-ASCII or

2239 // vice versa.	2298 // vice versa.

2240 details->set_cannot_match();	2299 details->set_cannot_match();

	2300 pos->determines_perfectly = false;

2241 return;	2301 return;

2242 }	2302 }

2243 if (compiler->ignore_case()) {	2303 if (compiler->ignore_case()) {

2244 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];	2304 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];

2245 int length = uncanonicalize.get(c, '\0', chars);	2305 int length = GetCaseIndependentLetters(c, compiler->ascii(), chars);

2246 if (length < 2) {	2306 ASSERT(length != 0); // Can only happen if c > char_mask (see above).

	2307 if (length == 1) {

2247 // This letter has no case equivalents, so it's nice and simple	2308 // This letter has no case equivalents, so it's nice and simple

2248 // and the mask-compare will determine definitely whether we have	2309 // and the mask-compare will determine definitely whether we have

2249 // a match at this character position.	2310 // a match at this character position.

2250 pos->mask = char_mask;	2311 pos->mask = char_mask;

2251 pos->value = c;	2312 pos->value = c;

2252 pos->determines_perfectly = true;	2313 pos->determines_perfectly = true;

2253 } else {	2314 } else {

2254 uint32_t common_bits = char_mask;	2315 uint32_t common_bits = char_mask;

2255 uint32_t bits = chars[0];	2316 uint32_t bits = chars[0];

2256 for (int j = 1; j < length; j++) {	2317 for (int j = 1; j < length; j++) {

(...skipping 24 matching lines...) Expand all Loading...
2281 ASSERT(characters_filled_in <= details->characters());	2342 ASSERT(characters_filled_in <= details->characters());

2282 if (characters_filled_in == details->characters()) {	2343 if (characters_filled_in == details->characters()) {

2283 return;	2344 return;

2284 }	2345 }

2285 }	2346 }

2286 } else {	2347 } else {

2287 QuickCheckDetails::Position* pos =	2348 QuickCheckDetails::Position* pos =

2288 details->positions(characters_filled_in);	2349 details->positions(characters_filled_in);

2289 RegExpCharacterClass* tree = elm.data.u_char_class;	2350 RegExpCharacterClass* tree = elm.data.u_char_class;

2290 ZoneList<CharacterRange>* ranges = tree->ranges();	2351 ZoneList<CharacterRange>* ranges = tree->ranges();

2291 CharacterRange range = ranges->at(0);

2292 if (tree->is_negated()) {	2352 if (tree->is_negated()) {

2293 // A quick check uses multi-character mask and compare. There is no	2353 // A quick check uses multi-character mask and compare. There is no

2294 // useful way to incorporate a negative char class into this scheme	2354 // useful way to incorporate a negative char class into this scheme

2295 // so we just conservatively create a mask and value that will always	2355 // so we just conservatively create a mask and value that will always

2296 // succeed.	2356 // succeed.

2297 pos->mask = 0;	2357 pos->mask = 0;

2298 pos->value = 0;	2358 pos->value = 0;

2299 } else {	2359 } else {

2300 uint32_t differing_bits = (range.from() ^ range.to());	2360 int first_range = 0;

	2361 while (ranges->at(first_range).from() > char_mask) {

	2362 first_range++;

	2363 if (first_range == ranges->length()) {

	2364 details->set_cannot_match();

	2365 pos->determines_perfectly = false;

	2366 return;

	2367 }

	2368 }

	2369 CharacterRange range = ranges->at(first_range);

	2370 uc16 from = range.from();

	2371 uc16 to = range.to();

	2372 if (to > char_mask) {

	2373 to = char_mask;

	2374 }

	2375 uint32_t differing_bits = (from ^ to);

2301 // A mask and compare is only perfect if the differing bits form a	2376 // A mask and compare is only perfect if the differing bits form a

2302 // number like 00011111 with one single block of trailing 1s.	2377 // number like 00011111 with one single block of trailing 1s.

2303 if ((differing_bits & (differing_bits + 1)) == 0) {	2378 if ((differing_bits & (differing_bits + 1)) == 0) {

2304 pos->determines_perfectly = true;	2379 pos->determines_perfectly = true;

2305 }	2380 }

2306 uint32_t common_bits = ~SmearBitsRight(differing_bits);	2381 uint32_t common_bits = ~SmearBitsRight(differing_bits);

2307 uint32_t bits = (range.from() & common_bits);	2382 uint32_t bits = (from & common_bits);

2308 for (int i = 1; i < ranges->length(); i++) {	2383 for (int i = first_range + 1; i < ranges->length(); i++) {

	2384 CharacterRange range = ranges->at(i);

	2385 uc16 from = range.from();

	2386 uc16 to = range.to();

	2387 if (from > char_mask) continue;

	2388 if (to > char_mask) to = char_mask;

2309 // Here we are combining more ranges into the mask and compare	2389 // Here we are combining more ranges into the mask and compare

2310 // value. With each new range the mask becomes more sparse and	2390 // value. With each new range the mask becomes more sparse and

2311 // so the chances of a false positive rise. A character class	2391 // so the chances of a false positive rise. A character class

2312 // with multiple ranges is assumed never to be equivalent to a	2392 // with multiple ranges is assumed never to be equivalent to a

2313 // mask and compare operation.	2393 // mask and compare operation.

2314 pos->determines_perfectly = false;	2394 pos->determines_perfectly = false;

2315 CharacterRange range = ranges->at(i);	2395 uint32_t new_common_bits = (from ^ to);

2316 uint32_t new_common_bits = (range.from() ^ range.to());

2317 new_common_bits = ~SmearBitsRight(new_common_bits);	2396 new_common_bits = ~SmearBitsRight(new_common_bits);

2318 common_bits &= new_common_bits;	2397 common_bits &= new_common_bits;

2319 bits &= new_common_bits;	2398 bits &= new_common_bits;

2320 uint32_t differing_bits = (range.from() & common_bits) ^ bits;	2399 uint32_t differing_bits = (from & common_bits) ^ bits;

2321 common_bits ^= differing_bits;	2400 common_bits ^= differing_bits;

2322 bits &= common_bits;	2401 bits &= common_bits;

2323 }	2402 }

2324 pos->mask = common_bits;	2403 pos->mask = common_bits;

2325 pos->value = bits;	2404 pos->value = bits;

2326 }	2405 }

2327 characters_filled_in++;	2406 characters_filled_in++;

2328 ASSERT(characters_filled_in <= details->characters());	2407 ASSERT(characters_filled_in <= details->characters());

2329 if (characters_filled_in == details->characters()) {	2408 if (characters_filled_in == details->characters()) {

2330 return;	2409 return;

(...skipping 281 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2612 return;	2691 return;

2613 case AT_NON_BOUNDARY:	2692 case AT_NON_BOUNDARY:

2614 case AT_BOUNDARY:	2693 case AT_BOUNDARY:

2615 EmitBoundaryCheck(type_, compiler, on_success(), trace);	2694 EmitBoundaryCheck(type_, compiler, on_success(), trace);

2616 return;	2695 return;

2617 }	2696 }

2618 on_success()->Emit(compiler, trace);	2697 on_success()->Emit(compiler, trace);

2619 }	2698 }

2620	2699

2621	2700

	2701 static bool DeterminedAlready(QuickCheckDetails* quick_check, int offset) {

	2702 if (quick_check == NULL) return false;

	2703 if (offset >= quick_check->characters()) return false;

	2704 return quick_check->positions(offset)->determines_perfectly;

	2705 }

	2706

	2707

	2708 static void UpdateBoundsCheck(int index, int* checked_up_to) {

	2709 if (index > *checked_up_to) {

	2710 *checked_up_to = index;

	2711 }

	2712 }

	2713

	2714

2622 // We call this repeatedly to generate code for each pass over the text node.	2715 // We call this repeatedly to generate code for each pass over the text node.

2623 // The passes are in increasing order of difficulty because we hope one	2716 // The passes are in increasing order of difficulty because we hope one

2624 // of the first passes will fail in which case we are saved the work of the	2717 // of the first passes will fail in which case we are saved the work of the

2625 // later passes. for example for the case independent regexp /%[asdfghjkl]a/	2718 // later passes. for example for the case independent regexp /%[asdfghjkl]a/

2626 // we will check the '%' in the first pass, the case independent 'a' in the	2719 // we will check the '%' in the first pass, the case independent 'a' in the

2627 // second pass and the character class in the last pass.	2720 // second pass and the character class in the last pass.

2628 //	2721 //

2629 // The passes are done from right to left, so for example to test for /bar/	2722 // The passes are done from right to left, so for example to test for /bar/

2630 // we will first test for an 'r' with offset 2, then an 'a' with offset 1	2723 // we will first test for an 'r' with offset 2, then an 'a' with offset 1

2631 // and then a 'b' with offset 0. This means we can avoid the end-of-input	2724 // and then a 'b' with offset 0. This means we can avoid the end-of-input

(...skipping 24 matching lines...) Expand all Loading...
2656 int* checked_up_to) {	2749 int* checked_up_to) {

2657 RegExpMacroAssembler* assembler = compiler->macro_assembler();	2750 RegExpMacroAssembler* assembler = compiler->macro_assembler();

2658 bool ascii = compiler->ascii();	2751 bool ascii = compiler->ascii();

2659 Label* backtrack = trace->backtrack();	2752 Label* backtrack = trace->backtrack();

2660 QuickCheckDetails* quick_check = trace->quick_check_performed();	2753 QuickCheckDetails* quick_check = trace->quick_check_performed();

2661 int element_count = elms_->length();	2754 int element_count = elms_->length();

2662 for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) {	2755 for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) {

2663 TextElement elm = elms_->at(i);	2756 TextElement elm = elms_->at(i);

2664 int cp_offset = trace->cp_offset() + elm.cp_offset;	2757 int cp_offset = trace->cp_offset() + elm.cp_offset;

2665 if (elm.type == TextElement::ATOM) {	2758 if (elm.type == TextElement::ATOM) {

2666 if (pass == NON_ASCII_MATCH \|\|	2759 Vector<const uc16> quarks = elm.data.u_atom->data();

2667 pass == CHARACTER_MATCH \|\|	2760 for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) {

2668 pass == CASE_CHARACTER_MATCH) {	2761 if (first_element_checked && i == 0 && j == 0) continue;

2669 Vector<const uc16> quarks = elm.data.u_atom->data();	2762 if (DeterminedAlready(quick_check, elm.cp_offset + j)) continue;

2670 for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) {	2763 EmitCharacterFunction* emit_function = NULL;

2671 bool bound_checked = true; // Most ops will check their bounds.	2764 switch (pass) {

2672 if (first_element_checked && i == 0 && j == 0) continue;	2765 case NON_ASCII_MATCH:

2673 if (pass == NON_ASCII_MATCH) {

2674 ASSERT(ascii);	2766 ASSERT(ascii);

2675 if (quarks[j] > String::kMaxAsciiCharCode) {	2767 if (quarks[j] > String::kMaxAsciiCharCode) {

2676 assembler->GoTo(backtrack);	2768 assembler->GoTo(backtrack);

2677 return;	2769 return;

2678 }	2770 }

2679 } else {	2771 break;

2680 if (quick_check != NULL &&	2772 case NON_LETTER_CHARACTER_MATCH:

2681 elm.cp_offset + j < quick_check->characters() &&	2773 emit_function = &EmitAtomNonLetter;

2682 quick_check->positions(elm.cp_offset + j)->	2774 break;

2683 determines_perfectly) {	2775 case SIMPLE_CHARACTER_MATCH:

2684 continue;	2776 emit_function = &EmitSimpleCharacter;

2685 }	2777 break;

2686 if (pass == CHARACTER_MATCH) {	2778 case CASE_CHARACTER_MATCH:

2687 if (compiler->ignore_case()) {	2779 emit_function = &EmitAtomLetter;

2688 bound_checked = EmitAtomNonLetter(	2780 break;

2689 assembler,	2781 default:

2690 quarks[j],	2782 break;

2691 backtrack,	2783 }

2692 cp_offset + j,	2784 if (emit_function != NULL) {

2693 *checked_up_to < cp_offset + j,	2785 bool bound_checked = emit_function(compiler,

2694 preloaded);

2695 } else {

2696 if (!preloaded) {

2697 assembler->LoadCurrentCharacter(

2698 cp_offset + j,

2699 backtrack,

2700 *checked_up_to < cp_offset + j);

2701 }

2702 assembler->CheckNotCharacter(quarks[j], backtrack);

2703 }

2704 } else {

2705 ASSERT_EQ(pass, CASE_CHARACTER_MATCH);

2706 ASSERT(compiler->ignore_case());

2707 bound_checked = EmitAtomLetter(assembler,

2708 compiler->ascii(),

2709 quarks[j],	2786 quarks[j],

2710 backtrack,	2787 backtrack,

2711 cp_offset + j,	2788 cp_offset + j,

2712 *checked_up_to < cp_offset + j,	2789 *checked_up_to < cp_offset + j,

2713 preloaded);	2790 preloaded);

2714 }	2791 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to);

2715 if (bound_checked) {

2716 if (cp_offset + j > *checked_up_to) {

2717 *checked_up_to = cp_offset + j;

2718 }

2719 }

2720 }

2721 }	2792 }

2722 }	2793 }

2723 } else {	2794 } else {

2724 ASSERT_EQ(elm.type, TextElement::CHAR_CLASS);	2795 ASSERT_EQ(elm.type, TextElement::CHAR_CLASS);

2725 if (first_element_checked && i == 0) continue;

2726 if (quick_check != NULL &&

2727 elm.cp_offset < quick_check->characters() &&

2728 quick_check->positions(elm.cp_offset)->determines_perfectly) {

2729 continue;

2730 }

2731 if (pass == CHARACTER_CLASS_MATCH) {	2796 if (pass == CHARACTER_CLASS_MATCH) {

	2797 if (first_element_checked && i == 0) continue;

	2798 if (DeterminedAlready(quick_check, elm.cp_offset)) continue;

2732 RegExpCharacterClass* cc = elm.data.u_char_class;	2799 RegExpCharacterClass* cc = elm.data.u_char_class;

2733 EmitCharClass(assembler,	2800 EmitCharClass(assembler,

2734 cc,	2801 cc,

	2802 ascii,

	2803 backtrack,

2735 cp_offset,	2804 cp_offset,

2736 backtrack,

2737 *checked_up_to < cp_offset,	2805 *checked_up_to < cp_offset,

2738 ascii,

2739 preloaded);	2806 preloaded);

2740 if (cp_offset > *checked_up_to) {	2807 UpdateBoundsCheck(cp_offset, checked_up_to);

2741 *checked_up_to = cp_offset;

2742 }

2743 }	2808 }

2744 }	2809 }

2745 }	2810 }

2746 }	2811 }

2747	2812

2748	2813

2749 int TextNode::Length() {	2814 int TextNode::Length() {

2750 TextElement elm = elms_->last();	2815 TextElement elm = elms_->last();

2751 ASSERT(elm.cp_offset >= 0);	2816 ASSERT(elm.cp_offset >= 0);

2752 if (elm.type == TextElement::ATOM) {	2817 if (elm.type == TextElement::ATOM) {

2753 return elm.cp_offset + elm.data.u_atom->data().length();	2818 return elm.cp_offset + elm.data.u_atom->data().length();

2754 } else {	2819 } else {

2755 return elm.cp_offset + 1;	2820 return elm.cp_offset + 1;

2756 }	2821 }

2757 }	2822 }

2758	2823

2759	2824

	2825 bool TextNode::SkipPass(int int_pass, bool ignore_case) {

	2826 TextEmitPassType pass = static_cast<TextEmitPassType>(int_pass);

	2827 if (ignore_case) {

	2828 return pass == SIMPLE_CHARACTER_MATCH;

	2829 } else {

	2830 return pass == NON_LETTER_CHARACTER_MATCH \|\| pass == CASE_CHARACTER_MATCH;

	2831 }

	2832 }

	2833

	2834

2760 // This generates the code to match a text node. A text node can contain	2835 // This generates the code to match a text node. A text node can contain

2761 // straight character sequences (possibly to be matched in a case-independent	2836 // straight character sequences (possibly to be matched in a case-independent

2762 // way) and character classes. For efficiency we do not do this in a single	2837 // way) and character classes. For efficiency we do not do this in a single

2763 // pass from left to right. Instead we pass over the text node several times,	2838 // pass from left to right. Instead we pass over the text node several times,

2764 // emitting code for some character positions every time. See the comment on	2839 // emitting code for some character positions every time. See the comment on

2765 // TextEmitPass for details.	2840 // TextEmitPass for details.

2766 void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) {	2841 void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) {

2767 LimitResult limit_result = LimitVersions(compiler, trace);	2842 LimitResult limit_result = LimitVersions(compiler, trace);

2768 if (limit_result == DONE) return;	2843 if (limit_result == DONE) return;

2769 ASSERT(limit_result == CONTINUE);	2844 ASSERT(limit_result == CONTINUE);

2770	2845

2771 if (trace->cp_offset() + Length() > RegExpMacroAssembler::kMaxCPOffset) {	2846 if (trace->cp_offset() + Length() > RegExpMacroAssembler::kMaxCPOffset) {

2772 compiler->SetRegExpTooBig();	2847 compiler->SetRegExpTooBig();

2773 return;	2848 return;

2774 }	2849 }

2775	2850

2776 if (compiler->ascii()) {	2851 if (compiler->ascii()) {

2777 int dummy = 0;	2852 int dummy = 0;

2778 TextEmitPass(compiler, NON_ASCII_MATCH, false, trace, false, &dummy);	2853 TextEmitPass(compiler, NON_ASCII_MATCH, false, trace, false, &dummy);

2779 }	2854 }

2780	2855

2781 bool first_elt_done = false;	2856 bool first_elt_done = false;

2782 int bound_checked_to = trace->cp_offset() - 1;	2857 int bound_checked_to = trace->cp_offset() - 1;

2783 bound_checked_to += trace->bound_checked_up_to();	2858 bound_checked_to += trace->bound_checked_up_to();

2784	2859

2785 // If a character is preloaded into the current character register then	2860 // If a character is preloaded into the current character register then

2786 // check that now.	2861 // check that now.

2787 if (trace->characters_preloaded() == 1) {	2862 if (trace->characters_preloaded() == 1) {

2788 TextEmitPass(compiler,	2863 for (int pass = kFirstRealPass; pass <= kLastPass; pass++) {

2789 CHARACTER_MATCH,	2864 if (!SkipPass(pass, compiler->ignore_case())) {

2790 true,	2865 TextEmitPass(compiler,

2791 trace,	2866 static_cast<TextEmitPassType>(pass),

2792 false,	2867 true,

2793 &bound_checked_to);	2868 trace,

2794 if (compiler->ignore_case()) {	2869 false,

2795 TextEmitPass(compiler,	2870 &bound_checked_to);

2796 CASE_CHARACTER_MATCH,	2871 }

2797 true,

2798 trace,

2799 false,

2800 &bound_checked_to);

2801 }	2872 }

2802 TextEmitPass(compiler,

2803 CHARACTER_CLASS_MATCH,

2804 true,

2805 trace,

2806 false,

2807 &bound_checked_to);

2808 first_elt_done = true;	2873 first_elt_done = true;

2809 }	2874 }

2810	2875

2811 TextEmitPass(compiler,	2876 for (int pass = kFirstRealPass; pass <= kLastPass; pass++) {

2812 CHARACTER_MATCH,	2877 if (!SkipPass(pass, compiler->ignore_case())) {

2813 false,	2878 TextEmitPass(compiler,

2814 trace,	2879 static_cast<TextEmitPassType>(pass),

2815 first_elt_done,	2880 false,

2816 &bound_checked_to);	2881 trace,

2817 if (compiler->ignore_case()) {	2882 first_elt_done,

2818 TextEmitPass(compiler,	2883 &bound_checked_to);

2819 CASE_CHARACTER_MATCH,	2884 }

2820 false,

2821 trace,

2822 first_elt_done,

2823 &bound_checked_to);

2824 }	2885 }

2825 TextEmitPass(compiler,

2826 CHARACTER_CLASS_MATCH,

2827 false,

2828 trace,

2829 first_elt_done,

2830 &bound_checked_to);

2831	2886

2832 Trace successor_trace(*trace);	2887 Trace successor_trace(*trace);

2833 successor_trace.set_at_start(false);	2888 successor_trace.set_at_start(false);

2834 successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler);	2889 successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler);

2835 RecursionCheck rc(compiler);	2890 RecursionCheck rc(compiler);

2836 on_success()->Emit(compiler, &successor_trace);	2891 on_success()->Emit(compiler, &successor_trace);

2837 }	2892 }

2838	2893

2839	2894

2840 void Trace::InvalidateCurrentCharacter() {	2895 void Trace::InvalidateCurrentCharacter() {

(...skipping 2043 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4884 EmbeddedVector<byte, 1024> codes;	4939 EmbeddedVector<byte, 1024> codes;

4885 RegExpMacroAssemblerIrregexp macro_assembler(codes);	4940 RegExpMacroAssemblerIrregexp macro_assembler(codes);

4886 return compiler.Assemble(&macro_assembler,	4941 return compiler.Assemble(&macro_assembler,

4887 node,	4942 node,

4888 data->capture_count,	4943 data->capture_count,

4889 pattern);	4944 pattern);

4890 }	4945 }

4891	4946

4892	4947

4893 }} // namespace v8::internal	4948 }} // namespace v8::internal

OLD	NEW

« no previous file with comments | « src/jsregexp.h ('k') | src/objects.h » ('j') | no next file with comments »