Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(7)

Side by Side Diff: src/jsregexp.cc

Issue 20499: Experimental: merge from bleeding edge 1298:1307. Port the eval... (Closed) Base URL: http://v8.googlecode.com/svn/branches/experimental/toiger/
Patch Set: '' Created 11 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/jsregexp.h ('k') | src/objects.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 1721 matching lines...) Expand 10 before | Expand all | Expand 10 after
1732 trace->backtrack()); 1732 trace->backtrack());
1733 break; 1733 break;
1734 } 1734 }
1735 } 1735 }
1736 1736
1737 1737
1738 static unibrow::Mapping<unibrow::Ecma262UnCanonicalize> uncanonicalize; 1738 static unibrow::Mapping<unibrow::Ecma262UnCanonicalize> uncanonicalize;
1739 static unibrow::Mapping<unibrow::CanonicalizationRange> canonrange; 1739 static unibrow::Mapping<unibrow::CanonicalizationRange> canonrange;
1740 1740
1741 1741
1742 // Returns the number of characters in the equivalence class, omitting those
1743 // that cannot occur in the source string because it is ASCII.
1744 static int GetCaseIndependentLetters(uc16 character,
1745 bool ascii_subject,
1746 unibrow::uchar* letters) {
1747 int length = uncanonicalize.get(character, '\0', letters);
1748 // Unibrow returns 0 or 1 for characters where case independependence is
1749 // trivial.
1750 if (length == 0) {
1751 letters[0] = character;
1752 length = 1;
1753 }
1754 if (!ascii_subject || character <= String::kMaxAsciiCharCode) {
1755 return length;
1756 }
1757 // The standard requires that non-ASCII characters cannot have ASCII
1758 // character codes in their equivalence class.
1759 return 0;
1760 }
1761
1762
1763 static inline bool EmitSimpleCharacter(RegExpCompiler* compiler,
1764 uc16 c,
1765 Label* on_failure,
1766 int cp_offset,
1767 bool check,
1768 bool preloaded) {
1769 RegExpMacroAssembler* assembler = compiler->macro_assembler();
1770 bool bound_checked = false;
1771 if (!preloaded) {
1772 assembler->LoadCurrentCharacter(
1773 cp_offset,
1774 on_failure,
1775 check);
1776 bound_checked = true;
1777 }
1778 assembler->CheckNotCharacter(c, on_failure);
1779 return bound_checked;
1780 }
1781
1782
1742 // Only emits non-letters (things that don't have case). Only used for case 1783 // Only emits non-letters (things that don't have case). Only used for case
1743 // independent matches. 1784 // independent matches.
1744 static inline bool EmitAtomNonLetter( 1785 static inline bool EmitAtomNonLetter(RegExpCompiler* compiler,
1745 RegExpMacroAssembler* macro_assembler, 1786 uc16 c,
1746 uc16 c, 1787 Label* on_failure,
1747 Label* on_failure, 1788 int cp_offset,
1748 int cp_offset, 1789 bool check,
1749 bool check, 1790 bool preloaded) {
1750 bool preloaded) { 1791 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
1792 bool ascii = compiler->ascii();
1751 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 1793 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1752 int length = uncanonicalize.get(c, '\0', chars); 1794 int length = GetCaseIndependentLetters(c, ascii, chars);
1795 if (length < 1) {
1796 // This can't match. Must be an ASCII subject and a non-ASCII character.
1797 // We do not need to do anything since the ASCII pass already handled this.
1798 return false; // Bounds not checked.
1799 }
1753 bool checked = false; 1800 bool checked = false;
1754 if (length <= 1) { 1801 // We handle the length > 1 case in a later pass.
1802 if (length == 1) {
1803 if (ascii && c > String::kMaxAsciiCharCodeU) {
1804 // Can't match - see above.
1805 return false; // Bounds not checked.
1806 }
1755 if (!preloaded) { 1807 if (!preloaded) {
1756 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); 1808 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
1757 checked = check; 1809 checked = check;
1758 } 1810 }
1759 macro_assembler->CheckNotCharacter(c, on_failure); 1811 macro_assembler->CheckNotCharacter(c, on_failure);
1760 } 1812 }
1761 return checked; 1813 return checked;
1762 } 1814 }
1763 1815
1764 1816
(...skipping 29 matching lines...) Expand all
1794 macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff, 1846 macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff,
1795 diff, 1847 diff,
1796 mask, 1848 mask,
1797 on_failure); 1849 on_failure);
1798 return true; 1850 return true;
1799 } 1851 }
1800 return false; 1852 return false;
1801 } 1853 }
1802 1854
1803 1855
1856 typedef bool EmitCharacterFunction(RegExpCompiler* compiler,
1857 uc16 c,
1858 Label* on_failure,
1859 int cp_offset,
1860 bool check,
1861 bool preloaded);
1862
1804 // Only emits letters (things that have case). Only used for case independent 1863 // Only emits letters (things that have case). Only used for case independent
1805 // matches. 1864 // matches.
1806 static inline bool EmitAtomLetter( 1865 static inline bool EmitAtomLetter(RegExpCompiler* compiler,
1807 RegExpMacroAssembler* macro_assembler, 1866 uc16 c,
1808 bool ascii, 1867 Label* on_failure,
1809 uc16 c, 1868 int cp_offset,
1810 Label* on_failure, 1869 bool check,
1811 int cp_offset, 1870 bool preloaded) {
1812 bool check, 1871 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
1813 bool preloaded) { 1872 bool ascii = compiler->ascii();
1814 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 1873 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1815 int length = uncanonicalize.get(c, '\0', chars); 1874 int length = GetCaseIndependentLetters(c, ascii, chars);
1816 if (length <= 1) return false; 1875 if (length <= 1) return false;
1817 // We may not need to check against the end of the input string 1876 // We may not need to check against the end of the input string
1818 // if this character lies before a character that matched. 1877 // if this character lies before a character that matched.
1819 if (!preloaded) { 1878 if (!preloaded) {
1820 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); 1879 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
1821 } 1880 }
1822 Label ok; 1881 Label ok;
1823 ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4); 1882 ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4);
1824 switch (length) { 1883 switch (length) {
1825 case 2: { 1884 case 2: {
(...skipping 21 matching lines...) Expand all
1847 default: 1906 default:
1848 UNREACHABLE(); 1907 UNREACHABLE();
1849 break; 1908 break;
1850 } 1909 }
1851 return true; 1910 return true;
1852 } 1911 }
1853 1912
1854 1913
1855 static void EmitCharClass(RegExpMacroAssembler* macro_assembler, 1914 static void EmitCharClass(RegExpMacroAssembler* macro_assembler,
1856 RegExpCharacterClass* cc, 1915 RegExpCharacterClass* cc,
1916 bool ascii,
1917 Label* on_failure,
1857 int cp_offset, 1918 int cp_offset,
1858 Label* on_failure,
1859 bool check_offset, 1919 bool check_offset,
1860 bool ascii,
1861 bool preloaded) { 1920 bool preloaded) {
1862 if (cc->is_standard() && 1921 if (cc->is_standard() &&
1863 macro_assembler->CheckSpecialCharacterClass(cc->standard_type(), 1922 macro_assembler->CheckSpecialCharacterClass(cc->standard_type(),
1864 cp_offset, 1923 cp_offset,
1865 check_offset, 1924 check_offset,
1866 on_failure)) { 1925 on_failure)) {
1867 return; 1926 return;
1868 } 1927 }
1869 1928
1870 ZoneList<CharacterRange>* ranges = cc->ranges(); 1929 ZoneList<CharacterRange>* ranges = cc->ranges();
(...skipping 360 matching lines...) Expand 10 before | Expand all | Expand 10 after
2231 for (int i = 0; i < characters && i < quarks.length(); i++) { 2290 for (int i = 0; i < characters && i < quarks.length(); i++) {
2232 QuickCheckDetails::Position* pos = 2291 QuickCheckDetails::Position* pos =
2233 details->positions(characters_filled_in); 2292 details->positions(characters_filled_in);
2234 uc16 c = quarks[i]; 2293 uc16 c = quarks[i];
2235 if (c > char_mask) { 2294 if (c > char_mask) {
2236 // If we expect a non-ASCII character from an ASCII string, 2295 // If we expect a non-ASCII character from an ASCII string,
2237 // there is no way we can match. Not even case independent 2296 // there is no way we can match. Not even case independent
2238 // matching can turn an ASCII character into non-ASCII or 2297 // matching can turn an ASCII character into non-ASCII or
2239 // vice versa. 2298 // vice versa.
2240 details->set_cannot_match(); 2299 details->set_cannot_match();
2300 pos->determines_perfectly = false;
2241 return; 2301 return;
2242 } 2302 }
2243 if (compiler->ignore_case()) { 2303 if (compiler->ignore_case()) {
2244 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 2304 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
2245 int length = uncanonicalize.get(c, '\0', chars); 2305 int length = GetCaseIndependentLetters(c, compiler->ascii(), chars);
2246 if (length < 2) { 2306 ASSERT(length != 0); // Can only happen if c > char_mask (see above).
2307 if (length == 1) {
2247 // This letter has no case equivalents, so it's nice and simple 2308 // This letter has no case equivalents, so it's nice and simple
2248 // and the mask-compare will determine definitely whether we have 2309 // and the mask-compare will determine definitely whether we have
2249 // a match at this character position. 2310 // a match at this character position.
2250 pos->mask = char_mask; 2311 pos->mask = char_mask;
2251 pos->value = c; 2312 pos->value = c;
2252 pos->determines_perfectly = true; 2313 pos->determines_perfectly = true;
2253 } else { 2314 } else {
2254 uint32_t common_bits = char_mask; 2315 uint32_t common_bits = char_mask;
2255 uint32_t bits = chars[0]; 2316 uint32_t bits = chars[0];
2256 for (int j = 1; j < length; j++) { 2317 for (int j = 1; j < length; j++) {
(...skipping 24 matching lines...) Expand all
2281 ASSERT(characters_filled_in <= details->characters()); 2342 ASSERT(characters_filled_in <= details->characters());
2282 if (characters_filled_in == details->characters()) { 2343 if (characters_filled_in == details->characters()) {
2283 return; 2344 return;
2284 } 2345 }
2285 } 2346 }
2286 } else { 2347 } else {
2287 QuickCheckDetails::Position* pos = 2348 QuickCheckDetails::Position* pos =
2288 details->positions(characters_filled_in); 2349 details->positions(characters_filled_in);
2289 RegExpCharacterClass* tree = elm.data.u_char_class; 2350 RegExpCharacterClass* tree = elm.data.u_char_class;
2290 ZoneList<CharacterRange>* ranges = tree->ranges(); 2351 ZoneList<CharacterRange>* ranges = tree->ranges();
2291 CharacterRange range = ranges->at(0);
2292 if (tree->is_negated()) { 2352 if (tree->is_negated()) {
2293 // A quick check uses multi-character mask and compare. There is no 2353 // A quick check uses multi-character mask and compare. There is no
2294 // useful way to incorporate a negative char class into this scheme 2354 // useful way to incorporate a negative char class into this scheme
2295 // so we just conservatively create a mask and value that will always 2355 // so we just conservatively create a mask and value that will always
2296 // succeed. 2356 // succeed.
2297 pos->mask = 0; 2357 pos->mask = 0;
2298 pos->value = 0; 2358 pos->value = 0;
2299 } else { 2359 } else {
2300 uint32_t differing_bits = (range.from() ^ range.to()); 2360 int first_range = 0;
2361 while (ranges->at(first_range).from() > char_mask) {
2362 first_range++;
2363 if (first_range == ranges->length()) {
2364 details->set_cannot_match();
2365 pos->determines_perfectly = false;
2366 return;
2367 }
2368 }
2369 CharacterRange range = ranges->at(first_range);
2370 uc16 from = range.from();
2371 uc16 to = range.to();
2372 if (to > char_mask) {
2373 to = char_mask;
2374 }
2375 uint32_t differing_bits = (from ^ to);
2301 // A mask and compare is only perfect if the differing bits form a 2376 // A mask and compare is only perfect if the differing bits form a
2302 // number like 00011111 with one single block of trailing 1s. 2377 // number like 00011111 with one single block of trailing 1s.
2303 if ((differing_bits & (differing_bits + 1)) == 0) { 2378 if ((differing_bits & (differing_bits + 1)) == 0) {
2304 pos->determines_perfectly = true; 2379 pos->determines_perfectly = true;
2305 } 2380 }
2306 uint32_t common_bits = ~SmearBitsRight(differing_bits); 2381 uint32_t common_bits = ~SmearBitsRight(differing_bits);
2307 uint32_t bits = (range.from() & common_bits); 2382 uint32_t bits = (from & common_bits);
2308 for (int i = 1; i < ranges->length(); i++) { 2383 for (int i = first_range + 1; i < ranges->length(); i++) {
2384 CharacterRange range = ranges->at(i);
2385 uc16 from = range.from();
2386 uc16 to = range.to();
2387 if (from > char_mask) continue;
2388 if (to > char_mask) to = char_mask;
2309 // Here we are combining more ranges into the mask and compare 2389 // Here we are combining more ranges into the mask and compare
2310 // value. With each new range the mask becomes more sparse and 2390 // value. With each new range the mask becomes more sparse and
2311 // so the chances of a false positive rise. A character class 2391 // so the chances of a false positive rise. A character class
2312 // with multiple ranges is assumed never to be equivalent to a 2392 // with multiple ranges is assumed never to be equivalent to a
2313 // mask and compare operation. 2393 // mask and compare operation.
2314 pos->determines_perfectly = false; 2394 pos->determines_perfectly = false;
2315 CharacterRange range = ranges->at(i); 2395 uint32_t new_common_bits = (from ^ to);
2316 uint32_t new_common_bits = (range.from() ^ range.to());
2317 new_common_bits = ~SmearBitsRight(new_common_bits); 2396 new_common_bits = ~SmearBitsRight(new_common_bits);
2318 common_bits &= new_common_bits; 2397 common_bits &= new_common_bits;
2319 bits &= new_common_bits; 2398 bits &= new_common_bits;
2320 uint32_t differing_bits = (range.from() & common_bits) ^ bits; 2399 uint32_t differing_bits = (from & common_bits) ^ bits;
2321 common_bits ^= differing_bits; 2400 common_bits ^= differing_bits;
2322 bits &= common_bits; 2401 bits &= common_bits;
2323 } 2402 }
2324 pos->mask = common_bits; 2403 pos->mask = common_bits;
2325 pos->value = bits; 2404 pos->value = bits;
2326 } 2405 }
2327 characters_filled_in++; 2406 characters_filled_in++;
2328 ASSERT(characters_filled_in <= details->characters()); 2407 ASSERT(characters_filled_in <= details->characters());
2329 if (characters_filled_in == details->characters()) { 2408 if (characters_filled_in == details->characters()) {
2330 return; 2409 return;
(...skipping 281 matching lines...) Expand 10 before | Expand all | Expand 10 after
2612 return; 2691 return;
2613 case AT_NON_BOUNDARY: 2692 case AT_NON_BOUNDARY:
2614 case AT_BOUNDARY: 2693 case AT_BOUNDARY:
2615 EmitBoundaryCheck(type_, compiler, on_success(), trace); 2694 EmitBoundaryCheck(type_, compiler, on_success(), trace);
2616 return; 2695 return;
2617 } 2696 }
2618 on_success()->Emit(compiler, trace); 2697 on_success()->Emit(compiler, trace);
2619 } 2698 }
2620 2699
2621 2700
2701 static bool DeterminedAlready(QuickCheckDetails* quick_check, int offset) {
2702 if (quick_check == NULL) return false;
2703 if (offset >= quick_check->characters()) return false;
2704 return quick_check->positions(offset)->determines_perfectly;
2705 }
2706
2707
2708 static void UpdateBoundsCheck(int index, int* checked_up_to) {
2709 if (index > *checked_up_to) {
2710 *checked_up_to = index;
2711 }
2712 }
2713
2714
2622 // We call this repeatedly to generate code for each pass over the text node. 2715 // We call this repeatedly to generate code for each pass over the text node.
2623 // The passes are in increasing order of difficulty because we hope one 2716 // The passes are in increasing order of difficulty because we hope one
2624 // of the first passes will fail in which case we are saved the work of the 2717 // of the first passes will fail in which case we are saved the work of the
2625 // later passes. for example for the case independent regexp /%[asdfghjkl]a/ 2718 // later passes. for example for the case independent regexp /%[asdfghjkl]a/
2626 // we will check the '%' in the first pass, the case independent 'a' in the 2719 // we will check the '%' in the first pass, the case independent 'a' in the
2627 // second pass and the character class in the last pass. 2720 // second pass and the character class in the last pass.
2628 // 2721 //
2629 // The passes are done from right to left, so for example to test for /bar/ 2722 // The passes are done from right to left, so for example to test for /bar/
2630 // we will first test for an 'r' with offset 2, then an 'a' with offset 1 2723 // we will first test for an 'r' with offset 2, then an 'a' with offset 1
2631 // and then a 'b' with offset 0. This means we can avoid the end-of-input 2724 // and then a 'b' with offset 0. This means we can avoid the end-of-input
(...skipping 24 matching lines...) Expand all
2656 int* checked_up_to) { 2749 int* checked_up_to) {
2657 RegExpMacroAssembler* assembler = compiler->macro_assembler(); 2750 RegExpMacroAssembler* assembler = compiler->macro_assembler();
2658 bool ascii = compiler->ascii(); 2751 bool ascii = compiler->ascii();
2659 Label* backtrack = trace->backtrack(); 2752 Label* backtrack = trace->backtrack();
2660 QuickCheckDetails* quick_check = trace->quick_check_performed(); 2753 QuickCheckDetails* quick_check = trace->quick_check_performed();
2661 int element_count = elms_->length(); 2754 int element_count = elms_->length();
2662 for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) { 2755 for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) {
2663 TextElement elm = elms_->at(i); 2756 TextElement elm = elms_->at(i);
2664 int cp_offset = trace->cp_offset() + elm.cp_offset; 2757 int cp_offset = trace->cp_offset() + elm.cp_offset;
2665 if (elm.type == TextElement::ATOM) { 2758 if (elm.type == TextElement::ATOM) {
2666 if (pass == NON_ASCII_MATCH || 2759 Vector<const uc16> quarks = elm.data.u_atom->data();
2667 pass == CHARACTER_MATCH || 2760 for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) {
2668 pass == CASE_CHARACTER_MATCH) { 2761 if (first_element_checked && i == 0 && j == 0) continue;
2669 Vector<const uc16> quarks = elm.data.u_atom->data(); 2762 if (DeterminedAlready(quick_check, elm.cp_offset + j)) continue;
2670 for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) { 2763 EmitCharacterFunction* emit_function = NULL;
2671 bool bound_checked = true; // Most ops will check their bounds. 2764 switch (pass) {
2672 if (first_element_checked && i == 0 && j == 0) continue; 2765 case NON_ASCII_MATCH:
2673 if (pass == NON_ASCII_MATCH) {
2674 ASSERT(ascii); 2766 ASSERT(ascii);
2675 if (quarks[j] > String::kMaxAsciiCharCode) { 2767 if (quarks[j] > String::kMaxAsciiCharCode) {
2676 assembler->GoTo(backtrack); 2768 assembler->GoTo(backtrack);
2677 return; 2769 return;
2678 } 2770 }
2679 } else { 2771 break;
2680 if (quick_check != NULL && 2772 case NON_LETTER_CHARACTER_MATCH:
2681 elm.cp_offset + j < quick_check->characters() && 2773 emit_function = &EmitAtomNonLetter;
2682 quick_check->positions(elm.cp_offset + j)-> 2774 break;
2683 determines_perfectly) { 2775 case SIMPLE_CHARACTER_MATCH:
2684 continue; 2776 emit_function = &EmitSimpleCharacter;
2685 } 2777 break;
2686 if (pass == CHARACTER_MATCH) { 2778 case CASE_CHARACTER_MATCH:
2687 if (compiler->ignore_case()) { 2779 emit_function = &EmitAtomLetter;
2688 bound_checked = EmitAtomNonLetter( 2780 break;
2689 assembler, 2781 default:
2690 quarks[j], 2782 break;
2691 backtrack, 2783 }
2692 cp_offset + j, 2784 if (emit_function != NULL) {
2693 *checked_up_to < cp_offset + j, 2785 bool bound_checked = emit_function(compiler,
2694 preloaded);
2695 } else {
2696 if (!preloaded) {
2697 assembler->LoadCurrentCharacter(
2698 cp_offset + j,
2699 backtrack,
2700 *checked_up_to < cp_offset + j);
2701 }
2702 assembler->CheckNotCharacter(quarks[j], backtrack);
2703 }
2704 } else {
2705 ASSERT_EQ(pass, CASE_CHARACTER_MATCH);
2706 ASSERT(compiler->ignore_case());
2707 bound_checked = EmitAtomLetter(assembler,
2708 compiler->ascii(),
2709 quarks[j], 2786 quarks[j],
2710 backtrack, 2787 backtrack,
2711 cp_offset + j, 2788 cp_offset + j,
2712 *checked_up_to < cp_offset + j, 2789 *checked_up_to < cp_offset + j,
2713 preloaded); 2790 preloaded);
2714 } 2791 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to);
2715 if (bound_checked) {
2716 if (cp_offset + j > *checked_up_to) {
2717 *checked_up_to = cp_offset + j;
2718 }
2719 }
2720 }
2721 } 2792 }
2722 } 2793 }
2723 } else { 2794 } else {
2724 ASSERT_EQ(elm.type, TextElement::CHAR_CLASS); 2795 ASSERT_EQ(elm.type, TextElement::CHAR_CLASS);
2725 if (first_element_checked && i == 0) continue;
2726 if (quick_check != NULL &&
2727 elm.cp_offset < quick_check->characters() &&
2728 quick_check->positions(elm.cp_offset)->determines_perfectly) {
2729 continue;
2730 }
2731 if (pass == CHARACTER_CLASS_MATCH) { 2796 if (pass == CHARACTER_CLASS_MATCH) {
2797 if (first_element_checked && i == 0) continue;
2798 if (DeterminedAlready(quick_check, elm.cp_offset)) continue;
2732 RegExpCharacterClass* cc = elm.data.u_char_class; 2799 RegExpCharacterClass* cc = elm.data.u_char_class;
2733 EmitCharClass(assembler, 2800 EmitCharClass(assembler,
2734 cc, 2801 cc,
2802 ascii,
2803 backtrack,
2735 cp_offset, 2804 cp_offset,
2736 backtrack,
2737 *checked_up_to < cp_offset, 2805 *checked_up_to < cp_offset,
2738 ascii,
2739 preloaded); 2806 preloaded);
2740 if (cp_offset > *checked_up_to) { 2807 UpdateBoundsCheck(cp_offset, checked_up_to);
2741 *checked_up_to = cp_offset;
2742 }
2743 } 2808 }
2744 } 2809 }
2745 } 2810 }
2746 } 2811 }
2747 2812
2748 2813
2749 int TextNode::Length() { 2814 int TextNode::Length() {
2750 TextElement elm = elms_->last(); 2815 TextElement elm = elms_->last();
2751 ASSERT(elm.cp_offset >= 0); 2816 ASSERT(elm.cp_offset >= 0);
2752 if (elm.type == TextElement::ATOM) { 2817 if (elm.type == TextElement::ATOM) {
2753 return elm.cp_offset + elm.data.u_atom->data().length(); 2818 return elm.cp_offset + elm.data.u_atom->data().length();
2754 } else { 2819 } else {
2755 return elm.cp_offset + 1; 2820 return elm.cp_offset + 1;
2756 } 2821 }
2757 } 2822 }
2758 2823
2759 2824
2825 bool TextNode::SkipPass(int int_pass, bool ignore_case) {
2826 TextEmitPassType pass = static_cast<TextEmitPassType>(int_pass);
2827 if (ignore_case) {
2828 return pass == SIMPLE_CHARACTER_MATCH;
2829 } else {
2830 return pass == NON_LETTER_CHARACTER_MATCH || pass == CASE_CHARACTER_MATCH;
2831 }
2832 }
2833
2834
2760 // This generates the code to match a text node. A text node can contain 2835 // This generates the code to match a text node. A text node can contain
2761 // straight character sequences (possibly to be matched in a case-independent 2836 // straight character sequences (possibly to be matched in a case-independent
2762 // way) and character classes. For efficiency we do not do this in a single 2837 // way) and character classes. For efficiency we do not do this in a single
2763 // pass from left to right. Instead we pass over the text node several times, 2838 // pass from left to right. Instead we pass over the text node several times,
2764 // emitting code for some character positions every time. See the comment on 2839 // emitting code for some character positions every time. See the comment on
2765 // TextEmitPass for details. 2840 // TextEmitPass for details.
2766 void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) { 2841 void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) {
2767 LimitResult limit_result = LimitVersions(compiler, trace); 2842 LimitResult limit_result = LimitVersions(compiler, trace);
2768 if (limit_result == DONE) return; 2843 if (limit_result == DONE) return;
2769 ASSERT(limit_result == CONTINUE); 2844 ASSERT(limit_result == CONTINUE);
2770 2845
2771 if (trace->cp_offset() + Length() > RegExpMacroAssembler::kMaxCPOffset) { 2846 if (trace->cp_offset() + Length() > RegExpMacroAssembler::kMaxCPOffset) {
2772 compiler->SetRegExpTooBig(); 2847 compiler->SetRegExpTooBig();
2773 return; 2848 return;
2774 } 2849 }
2775 2850
2776 if (compiler->ascii()) { 2851 if (compiler->ascii()) {
2777 int dummy = 0; 2852 int dummy = 0;
2778 TextEmitPass(compiler, NON_ASCII_MATCH, false, trace, false, &dummy); 2853 TextEmitPass(compiler, NON_ASCII_MATCH, false, trace, false, &dummy);
2779 } 2854 }
2780 2855
2781 bool first_elt_done = false; 2856 bool first_elt_done = false;
2782 int bound_checked_to = trace->cp_offset() - 1; 2857 int bound_checked_to = trace->cp_offset() - 1;
2783 bound_checked_to += trace->bound_checked_up_to(); 2858 bound_checked_to += trace->bound_checked_up_to();
2784 2859
2785 // If a character is preloaded into the current character register then 2860 // If a character is preloaded into the current character register then
2786 // check that now. 2861 // check that now.
2787 if (trace->characters_preloaded() == 1) { 2862 if (trace->characters_preloaded() == 1) {
2788 TextEmitPass(compiler, 2863 for (int pass = kFirstRealPass; pass <= kLastPass; pass++) {
2789 CHARACTER_MATCH, 2864 if (!SkipPass(pass, compiler->ignore_case())) {
2790 true, 2865 TextEmitPass(compiler,
2791 trace, 2866 static_cast<TextEmitPassType>(pass),
2792 false, 2867 true,
2793 &bound_checked_to); 2868 trace,
2794 if (compiler->ignore_case()) { 2869 false,
2795 TextEmitPass(compiler, 2870 &bound_checked_to);
2796 CASE_CHARACTER_MATCH, 2871 }
2797 true,
2798 trace,
2799 false,
2800 &bound_checked_to);
2801 } 2872 }
2802 TextEmitPass(compiler,
2803 CHARACTER_CLASS_MATCH,
2804 true,
2805 trace,
2806 false,
2807 &bound_checked_to);
2808 first_elt_done = true; 2873 first_elt_done = true;
2809 } 2874 }
2810 2875
2811 TextEmitPass(compiler, 2876 for (int pass = kFirstRealPass; pass <= kLastPass; pass++) {
2812 CHARACTER_MATCH, 2877 if (!SkipPass(pass, compiler->ignore_case())) {
2813 false, 2878 TextEmitPass(compiler,
2814 trace, 2879 static_cast<TextEmitPassType>(pass),
2815 first_elt_done, 2880 false,
2816 &bound_checked_to); 2881 trace,
2817 if (compiler->ignore_case()) { 2882 first_elt_done,
2818 TextEmitPass(compiler, 2883 &bound_checked_to);
2819 CASE_CHARACTER_MATCH, 2884 }
2820 false,
2821 trace,
2822 first_elt_done,
2823 &bound_checked_to);
2824 } 2885 }
2825 TextEmitPass(compiler,
2826 CHARACTER_CLASS_MATCH,
2827 false,
2828 trace,
2829 first_elt_done,
2830 &bound_checked_to);
2831 2886
2832 Trace successor_trace(*trace); 2887 Trace successor_trace(*trace);
2833 successor_trace.set_at_start(false); 2888 successor_trace.set_at_start(false);
2834 successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler); 2889 successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler);
2835 RecursionCheck rc(compiler); 2890 RecursionCheck rc(compiler);
2836 on_success()->Emit(compiler, &successor_trace); 2891 on_success()->Emit(compiler, &successor_trace);
2837 } 2892 }
2838 2893
2839 2894
2840 void Trace::InvalidateCurrentCharacter() { 2895 void Trace::InvalidateCurrentCharacter() {
(...skipping 2043 matching lines...) Expand 10 before | Expand all | Expand 10 after
4884 EmbeddedVector<byte, 1024> codes; 4939 EmbeddedVector<byte, 1024> codes;
4885 RegExpMacroAssemblerIrregexp macro_assembler(codes); 4940 RegExpMacroAssemblerIrregexp macro_assembler(codes);
4886 return compiler.Assemble(&macro_assembler, 4941 return compiler.Assemble(&macro_assembler,
4887 node, 4942 node,
4888 data->capture_count, 4943 data->capture_count,
4889 pattern); 4944 pattern);
4890 } 4945 }
4891 4946
4892 4947
4893 }} // namespace v8::internal 4948 }} // namespace v8::internal
OLDNEW
« no previous file with comments | « src/jsregexp.h ('k') | src/objects.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698