OLD | NEW |
---|---|
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 944 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
955 rc = false; | 955 rc = false; |
956 break; | 956 break; |
957 #endif | 957 #endif |
958 } | 958 } |
959 case RegExpMacroAssembler::kBytecodeImplementation: { | 959 case RegExpMacroAssembler::kBytecodeImplementation: { |
960 for (int i = (num_captures + 1) * 2 - 1; i >= 0; i--) { | 960 for (int i = (num_captures + 1) * 2 - 1; i >= 0; i--) { |
961 offsets_vector[i] = -1; | 961 offsets_vector[i] = -1; |
962 } | 962 } |
963 Handle<ByteArray> byte_codes = IrregexpByteCode(irregexp); | 963 Handle<ByteArray> byte_codes = IrregexpByteCode(irregexp); |
964 | 964 |
965 Handle<String> two_byte_subject = CachedStringToTwoByte(subject); | 965 if (!subject->IsFlat(StringShape(*subject))) { |
966 FlattenString(subject); | |
Lasse Reichstein
2008/12/09 07:43:07
The string is also flattened in the IA32 branch (l
| |
967 } | |
966 | 968 |
967 rc = IrregexpInterpreter::Match(byte_codes, | 969 rc = IrregexpInterpreter::Match(byte_codes, |
968 two_byte_subject, | 970 subject, |
969 offsets_vector, | 971 offsets_vector, |
970 previous_index); | 972 previous_index); |
971 break; | 973 break; |
972 } | 974 } |
973 case RegExpMacroAssembler::kARMImplementation: | 975 case RegExpMacroAssembler::kARMImplementation: |
974 default: | 976 default: |
975 UNREACHABLE(); | 977 UNREACHABLE(); |
976 rc = false; | 978 rc = false; |
977 break; | 979 break; |
978 } | 980 } |
(...skipping 205 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1184 table_ = new DispatchTable(); | 1186 table_ = new DispatchTable(); |
1185 DispatchTableConstructor cons(table_, ignore_case); | 1187 DispatchTableConstructor cons(table_, ignore_case); |
1186 cons.BuildTable(this); | 1188 cons.BuildTable(this); |
1187 } | 1189 } |
1188 return table_; | 1190 return table_; |
1189 } | 1191 } |
1190 | 1192 |
1191 | 1193 |
1192 class RegExpCompiler { | 1194 class RegExpCompiler { |
1193 public: | 1195 public: |
1194 RegExpCompiler(int capture_count, bool ignore_case); | 1196 RegExpCompiler(int capture_count, bool ignore_case, bool is_ascii); |
1195 | 1197 |
1196 int AllocateRegister() { return next_register_++; } | 1198 int AllocateRegister() { return next_register_++; } |
1197 | 1199 |
1198 Handle<FixedArray> Assemble(RegExpMacroAssembler* assembler, | 1200 Handle<FixedArray> Assemble(RegExpMacroAssembler* assembler, |
1199 RegExpNode* start, | 1201 RegExpNode* start, |
1200 int capture_count, | 1202 int capture_count, |
1201 Handle<String> pattern); | 1203 Handle<String> pattern); |
1202 | 1204 |
1203 inline void AddWork(RegExpNode* node) { work_list_->Add(node); } | 1205 inline void AddWork(RegExpNode* node) { work_list_->Add(node); } |
1204 | 1206 |
1205 static const int kImplementationOffset = 0; | 1207 static const int kImplementationOffset = 0; |
1206 static const int kNumberOfRegistersOffset = 0; | 1208 static const int kNumberOfRegistersOffset = 0; |
1207 static const int kCodeOffset = 1; | 1209 static const int kCodeOffset = 1; |
1208 | 1210 |
1209 RegExpMacroAssembler* macro_assembler() { return macro_assembler_; } | 1211 RegExpMacroAssembler* macro_assembler() { return macro_assembler_; } |
1210 EndNode* accept() { return accept_; } | 1212 EndNode* accept() { return accept_; } |
1211 | 1213 |
1212 static const int kMaxRecursion = 100; | 1214 static const int kMaxRecursion = 100; |
1213 inline int recursion_depth() { return recursion_depth_; } | 1215 inline int recursion_depth() { return recursion_depth_; } |
1214 inline void IncrementRecursionDepth() { recursion_depth_++; } | 1216 inline void IncrementRecursionDepth() { recursion_depth_++; } |
1215 inline void DecrementRecursionDepth() { recursion_depth_--; } | 1217 inline void DecrementRecursionDepth() { recursion_depth_--; } |
1216 | 1218 |
1217 inline bool ignore_case() { return ignore_case_; } | 1219 inline bool ignore_case() { return ignore_case_; } |
1220 inline bool ascii() { return ascii_; } | |
1218 | 1221 |
1219 private: | 1222 private: |
1220 EndNode* accept_; | 1223 EndNode* accept_; |
1221 int next_register_; | 1224 int next_register_; |
1222 List<RegExpNode*>* work_list_; | 1225 List<RegExpNode*>* work_list_; |
1223 int recursion_depth_; | 1226 int recursion_depth_; |
1224 RegExpMacroAssembler* macro_assembler_; | 1227 RegExpMacroAssembler* macro_assembler_; |
1225 bool ignore_case_; | 1228 bool ignore_case_; |
1229 bool ascii_; | |
1226 }; | 1230 }; |
1227 | 1231 |
1228 | 1232 |
1229 class RecursionCheck { | 1233 class RecursionCheck { |
1230 public: | 1234 public: |
1231 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) { | 1235 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) { |
1232 compiler->IncrementRecursionDepth(); | 1236 compiler->IncrementRecursionDepth(); |
1233 } | 1237 } |
1234 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); } | 1238 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); } |
1235 private: | 1239 private: |
1236 RegExpCompiler* compiler_; | 1240 RegExpCompiler* compiler_; |
1237 }; | 1241 }; |
1238 | 1242 |
1239 | 1243 |
1240 // Attempts to compile the regexp using an Irregexp code generator. Returns | 1244 // Attempts to compile the regexp using an Irregexp code generator. Returns |
1241 // a fixed array or a null handle depending on whether it succeeded. | 1245 // a fixed array or a null handle depending on whether it succeeded. |
1242 RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case) | 1246 RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case, bool ascii) |
1243 : next_register_(2 * (capture_count + 1)), | 1247 : next_register_(2 * (capture_count + 1)), |
1244 work_list_(NULL), | 1248 work_list_(NULL), |
1245 recursion_depth_(0), | 1249 recursion_depth_(0), |
1246 ignore_case_(ignore_case) { | 1250 ignore_case_(ignore_case), |
1251 ascii_(ascii) { | |
1247 accept_ = new EndNode(EndNode::ACCEPT); | 1252 accept_ = new EndNode(EndNode::ACCEPT); |
1248 } | 1253 } |
1249 | 1254 |
1250 | 1255 |
1251 Handle<FixedArray> RegExpCompiler::Assemble( | 1256 Handle<FixedArray> RegExpCompiler::Assemble( |
1252 RegExpMacroAssembler* macro_assembler, | 1257 RegExpMacroAssembler* macro_assembler, |
1253 RegExpNode* start, | 1258 RegExpNode* start, |
1254 int capture_count, | 1259 int capture_count, |
1255 Handle<String> pattern) { | 1260 Handle<String> pattern) { |
1256 #ifdef DEBUG | 1261 #ifdef DEBUG |
(...skipping 418 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1675 macro_assembler->LoadCurrentCharacterUnchecked(cp_offset + i); | 1680 macro_assembler->LoadCurrentCharacterUnchecked(cp_offset + i); |
1676 } | 1681 } |
1677 Label ok; | 1682 Label ok; |
1678 ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4); | 1683 ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4); |
1679 switch (length) { | 1684 switch (length) { |
1680 case 2: { | 1685 case 2: { |
1681 if (ShortCutEmitCharacterPair(macro_assembler, | 1686 if (ShortCutEmitCharacterPair(macro_assembler, |
1682 chars[0], | 1687 chars[0], |
1683 chars[1], | 1688 chars[1], |
1684 on_failure)) { | 1689 on_failure)) { |
1685 ok.Unuse(); | |
1686 } else { | 1690 } else { |
1687 macro_assembler->CheckCharacter(chars[0], &ok); | 1691 macro_assembler->CheckCharacter(chars[0], &ok); |
1688 macro_assembler->CheckNotCharacter(chars[1], on_failure); | 1692 macro_assembler->CheckNotCharacter(chars[1], on_failure); |
1689 macro_assembler->Bind(&ok); | 1693 macro_assembler->Bind(&ok); |
1690 } | 1694 } |
1691 break; | 1695 break; |
1692 } | 1696 } |
1693 case 4: | 1697 case 4: |
1694 macro_assembler->CheckCharacter(chars[3], &ok); | 1698 macro_assembler->CheckCharacter(chars[3], &ok); |
1695 // Fall through! | 1699 // Fall through! |
1696 case 3: | 1700 case 3: |
1697 macro_assembler->CheckCharacter(chars[0], &ok); | 1701 macro_assembler->CheckCharacter(chars[0], &ok); |
1698 macro_assembler->CheckCharacter(chars[1], &ok); | 1702 macro_assembler->CheckCharacter(chars[1], &ok); |
1699 macro_assembler->CheckNotCharacter(chars[2], on_failure); | 1703 macro_assembler->CheckNotCharacter(chars[2], on_failure); |
1700 macro_assembler->Bind(&ok); | 1704 macro_assembler->Bind(&ok); |
1701 break; | 1705 break; |
1702 default: | 1706 default: |
1703 UNREACHABLE(); | 1707 UNREACHABLE(); |
1704 break; | 1708 break; |
1705 } | 1709 } |
1706 } | 1710 } |
1707 } | 1711 } |
1708 | 1712 |
1709 | 1713 |
1710 static void EmitCharClass(RegExpMacroAssembler* macro_assembler, | 1714 static void EmitCharClass(RegExpMacroAssembler* macro_assembler, |
1711 RegExpCharacterClass* cc, | 1715 RegExpCharacterClass* cc, |
1712 int cp_offset, | 1716 int cp_offset, |
1713 Label* on_failure, | 1717 Label* on_failure, |
1714 bool check_offset) { | 1718 bool check_offset, |
1719 bool ascii) { | |
1715 ZoneList<CharacterRange>* ranges = cc->ranges(); | 1720 ZoneList<CharacterRange>* ranges = cc->ranges(); |
1721 const int max_char = ascii ? 0x7f : 0xffff; | |
Lasse Reichstein
2008/12/09 07:43:07
Use String::kMaxAsciiCharCode instead of 0x7f?
| |
1716 | 1722 |
1717 Label success; | 1723 Label success; |
1718 | 1724 |
1719 Label* char_is_in_class = | 1725 Label* char_is_in_class = |
1720 cc->is_negated() ? on_failure : &success; | 1726 cc->is_negated() ? on_failure : &success; |
1721 | 1727 |
1722 int range_count = ranges->length(); | 1728 int range_count = ranges->length(); |
1723 | 1729 |
1724 if (range_count == 0) { | 1730 int last_valid_range = range_count - 1; |
1731 while (last_valid_range >= 0) { | |
1732 CharacterRange& range = ranges->at(last_valid_range); | |
1733 if (range.from() <= max_char) { | |
1734 break; | |
1735 } | |
1736 last_valid_range--; | |
1737 } | |
1738 | |
1739 if (last_valid_range < 0) { | |
1725 if (!cc->is_negated()) { | 1740 if (!cc->is_negated()) { |
1741 // TODO(plesner): We can remove this when the node level does our | |
1742 // ASCII optimizations for us. | |
1726 macro_assembler->GoTo(on_failure); | 1743 macro_assembler->GoTo(on_failure); |
1727 } | 1744 } |
1728 return; | 1745 return; |
1729 } | 1746 } |
1730 | 1747 |
1731 if (range_count == 1 && | 1748 if (last_valid_range == 0 && |
1732 !cc->is_negated() && | 1749 !cc->is_negated() && |
1733 ranges->at(0).IsEverything(0xffff)) { | 1750 ranges->at(0).IsEverything(max_char)) { |
1734 // This is a common case hit by non-anchored expressions. | 1751 // This is a common case hit by non-anchored expressions. |
1735 // TODO(erikcorry): We should have a macro assembler instruction that just | 1752 // TODO(erikcorry): We should have a macro assembler instruction that just |
1736 // checks for end of string without loading the character. | 1753 // checks for end of string without loading the character. |
1737 if (check_offset) { | 1754 if (check_offset) { |
1738 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure); | 1755 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure); |
1739 } | 1756 } |
1740 return; | 1757 return; |
1741 } | 1758 } |
1742 | 1759 |
1743 if (check_offset) { | 1760 if (check_offset) { |
1744 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure); | 1761 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure); |
1745 } else { | 1762 } else { |
1746 // Here we don't need to check against the end of the input string | 1763 // Here we don't need to check against the end of the input string |
1747 // since this character lies before a character that matched. | 1764 // since this character lies before a character that matched. |
1748 macro_assembler->LoadCurrentCharacterUnchecked(cp_offset); | 1765 macro_assembler->LoadCurrentCharacterUnchecked(cp_offset); |
1749 } | 1766 } |
1750 | 1767 |
1751 for (int i = 0; i < range_count - 1; i++) { | 1768 for (int i = 0; i <= last_valid_range; i++) { |
1752 CharacterRange& range = ranges->at(i); | 1769 CharacterRange& range = ranges->at(i); |
1753 Label next_range; | 1770 Label next_range; |
1754 uc16 from = range.from(); | 1771 uc16 from = range.from(); |
1755 uc16 to = range.to(); | 1772 uc16 to = range.to(); |
1773 if (from > max_char) { | |
1774 continue; | |
1775 } | |
1776 if (to > max_char) to = max_char; | |
1756 if (to == from) { | 1777 if (to == from) { |
1757 macro_assembler->CheckCharacter(to, char_is_in_class); | 1778 macro_assembler->CheckCharacter(to, char_is_in_class); |
1758 } else { | 1779 } else { |
1759 if (from != 0) { | 1780 if (from != 0) { |
1760 macro_assembler->CheckCharacterLT(from, &next_range); | 1781 macro_assembler->CheckCharacterLT(from, &next_range); |
Lasse Reichstein
2008/12/09 07:43:07
How about a CheckCharacterRange(from, to, char_is_
| |
1761 } | 1782 } |
1762 if (to != 0xffff) { | 1783 if (to != max_char) { |
1763 macro_assembler->CheckCharacterLT(to + 1, char_is_in_class); | 1784 macro_assembler->CheckCharacterLT(to + 1, char_is_in_class); |
1764 } else { | 1785 } else { |
1765 macro_assembler->GoTo(char_is_in_class); | 1786 macro_assembler->GoTo(char_is_in_class); |
1766 } | 1787 } |
1767 } | 1788 } |
1768 macro_assembler->Bind(&next_range); | 1789 macro_assembler->Bind(&next_range); |
1769 } | 1790 } |
1770 | 1791 |
1771 CharacterRange& range = ranges->at(range_count - 1); | 1792 CharacterRange& range = ranges->at(last_valid_range); |
1772 uc16 from = range.from(); | 1793 uc16 from = range.from(); |
1773 uc16 to = range.to(); | 1794 uc16 to = range.to(); |
1774 | 1795 |
1796 if (to > max_char) to = max_char; | |
1797 ASSERT(to >= from); | |
1798 | |
1775 if (to == from) { | 1799 if (to == from) { |
1776 if (cc->is_negated()) { | 1800 if (cc->is_negated()) { |
1777 macro_assembler->CheckCharacter(to, on_failure); | 1801 macro_assembler->CheckCharacter(to, on_failure); |
1778 } else { | 1802 } else { |
1779 macro_assembler->CheckNotCharacter(to, on_failure); | 1803 macro_assembler->CheckNotCharacter(to, on_failure); |
1780 } | 1804 } |
1781 } else { | 1805 } else { |
1782 if (from != 0) { | 1806 if (from != 0) { |
1783 if (cc->is_negated()) { | 1807 if (cc->is_negated()) { |
1784 macro_assembler->CheckCharacterLT(from, &success); | 1808 macro_assembler->CheckCharacterLT(from, &success); |
(...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1868 if (limit_result == FAIL) return false; | 1892 if (limit_result == FAIL) return false; |
1869 if (limit_result == DONE) return true; | 1893 if (limit_result == DONE) return true; |
1870 ASSERT(limit_result == CONTINUE); | 1894 ASSERT(limit_result == CONTINUE); |
1871 | 1895 |
1872 int element_count = elms_->length(); | 1896 int element_count = elms_->length(); |
1873 ASSERT(element_count != 0); | 1897 ASSERT(element_count != 0); |
1874 if (info()->at_end) { | 1898 if (info()->at_end) { |
1875 macro_assembler->GoTo(backtrack); | 1899 macro_assembler->GoTo(backtrack); |
1876 return true; | 1900 return true; |
1877 } | 1901 } |
1878 // First, handle straight character matches. | 1902 // First check for non-ASCII text. |
1903 // TODO(plesner): We should do this at node level. | |
1904 if (compiler->ascii()) { | |
1905 for (int i = element_count - 1; i >= 0; i--) { | |
1906 TextElement elm = elms_->at(i); | |
1907 if (elm.type == TextElement::ATOM) { | |
1908 Vector<const uc16> quarks = elm.data.u_atom->data(); | |
1909 for (int j = quarks.length() - 1; j >= 0; j--) { | |
1910 if (quarks[j] > 0x7f) { | |
Lasse Reichstein
2008/12/09 07:43:07
Use String::kMaxAsciiCharCode
| |
1911 macro_assembler->GoTo(backtrack); | |
1912 return true; | |
1913 } | |
1914 } | |
1915 } else { | |
1916 ASSERT_EQ(elm.type, TextElement::CHAR_CLASS); | |
1917 } | |
1918 } | |
1919 } | |
1920 // Second, handle straight character matches. | |
1879 int checked_up_to = -1; | 1921 int checked_up_to = -1; |
1880 for (int i = element_count - 1; i >= 0; i--) { | 1922 for (int i = element_count - 1; i >= 0; i--) { |
1881 TextElement elm = elms_->at(i); | 1923 TextElement elm = elms_->at(i); |
1882 ASSERT(elm.cp_offset >= 0); | 1924 ASSERT(elm.cp_offset >= 0); |
1883 int cp_offset = variant->cp_offset() + elm.cp_offset; | 1925 int cp_offset = variant->cp_offset() + elm.cp_offset; |
1884 if (elm.type == TextElement::ATOM) { | 1926 if (elm.type == TextElement::ATOM) { |
1885 Vector<const uc16> quarks = elm.data.u_atom->data(); | 1927 Vector<const uc16> quarks = elm.data.u_atom->data(); |
1886 int last_cp_offset = cp_offset + quarks.length(); | 1928 int last_cp_offset = cp_offset + quarks.length(); |
1887 if (compiler->ignore_case()) { | 1929 if (compiler->ignore_case()) { |
1888 EmitAtomNonLetters(macro_assembler, | 1930 EmitAtomNonLetters(macro_assembler, |
1889 elm, | 1931 elm, |
1890 quarks, | 1932 quarks, |
1891 backtrack, | 1933 backtrack, |
1892 cp_offset, | 1934 cp_offset, |
1893 checked_up_to < last_cp_offset); | 1935 checked_up_to < last_cp_offset); |
1894 } else { | 1936 } else { |
1895 macro_assembler->CheckCharacters(quarks, | 1937 macro_assembler->CheckCharacters(quarks, |
1896 cp_offset, | 1938 cp_offset, |
1897 backtrack, | 1939 backtrack, |
1898 checked_up_to < last_cp_offset); | 1940 checked_up_to < last_cp_offset); |
1899 } | 1941 } |
1900 if (last_cp_offset > checked_up_to) checked_up_to = last_cp_offset - 1; | 1942 if (last_cp_offset > checked_up_to) checked_up_to = last_cp_offset - 1; |
1901 } else { | 1943 } else { |
1902 ASSERT_EQ(elm.type, TextElement::CHAR_CLASS); | 1944 ASSERT_EQ(elm.type, TextElement::CHAR_CLASS); |
1903 } | 1945 } |
1904 } | 1946 } |
1905 // Second, handle case independent letter matches if any. | 1947 // Third, handle case independent letter matches if any. |
1906 if (compiler->ignore_case()) { | 1948 if (compiler->ignore_case()) { |
1907 for (int i = element_count - 1; i >= 0; i--) { | 1949 for (int i = element_count - 1; i >= 0; i--) { |
1908 TextElement elm = elms_->at(i); | 1950 TextElement elm = elms_->at(i); |
1909 int cp_offset = variant->cp_offset() + elm.cp_offset; | 1951 int cp_offset = variant->cp_offset() + elm.cp_offset; |
1910 if (elm.type == TextElement::ATOM) { | 1952 if (elm.type == TextElement::ATOM) { |
1911 Vector<const uc16> quarks = elm.data.u_atom->data(); | 1953 Vector<const uc16> quarks = elm.data.u_atom->data(); |
1912 int last_cp_offset = cp_offset + quarks.length(); | 1954 int last_cp_offset = cp_offset + quarks.length(); |
1913 EmitAtomLetters(macro_assembler, | 1955 EmitAtomLetters(macro_assembler, |
1914 elm, | 1956 elm, |
1915 quarks, | 1957 quarks, |
1916 backtrack, | 1958 backtrack, |
1917 cp_offset, | 1959 cp_offset, |
1918 checked_up_to < last_cp_offset); | 1960 checked_up_to < last_cp_offset); |
1919 if (last_cp_offset > checked_up_to) checked_up_to = last_cp_offset - 1; | 1961 if (last_cp_offset > checked_up_to) checked_up_to = last_cp_offset - 1; |
1920 } | 1962 } |
1921 } | 1963 } |
1922 } | 1964 } |
1923 // If the fast character matches passed then do the character classes. | 1965 // If the fast character matches passed then do the character classes. |
1924 for (int i = element_count - 1; i >= 0; i--) { | 1966 for (int i = element_count - 1; i >= 0; i--) { |
1925 TextElement elm = elms_->at(i); | 1967 TextElement elm = elms_->at(i); |
1926 int cp_offset = variant->cp_offset() + elm.cp_offset; | 1968 int cp_offset = variant->cp_offset() + elm.cp_offset; |
1927 if (elm.type == TextElement::CHAR_CLASS) { | 1969 if (elm.type == TextElement::CHAR_CLASS) { |
1928 RegExpCharacterClass* cc = elm.data.u_char_class; | 1970 RegExpCharacterClass* cc = elm.data.u_char_class; |
1929 EmitCharClass(macro_assembler, | 1971 EmitCharClass(macro_assembler, |
1930 cc, | 1972 cc, |
1931 cp_offset, | 1973 cp_offset, |
1932 backtrack, | 1974 backtrack, |
1933 checked_up_to < cp_offset); | 1975 checked_up_to < cp_offset, |
1976 compiler->ascii()); | |
1934 if (cp_offset > checked_up_to) checked_up_to = cp_offset; | 1977 if (cp_offset > checked_up_to) checked_up_to = cp_offset; |
1935 } | 1978 } |
1936 } | 1979 } |
1937 | 1980 |
1938 GenerationVariant new_variant(*variant); | 1981 GenerationVariant new_variant(*variant); |
1939 new_variant.set_cp_offset(checked_up_to + 1); | 1982 new_variant.set_cp_offset(checked_up_to + 1); |
1940 RecursionCheck rc(compiler); | 1983 RecursionCheck rc(compiler); |
1941 return on_success()->Emit(compiler, &new_variant); | 1984 return on_success()->Emit(compiler, &new_variant); |
1942 } | 1985 } |
1943 | 1986 |
(...skipping 1660 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
3604 target->Accept(this); | 3647 target->Accept(this); |
3605 } | 3648 } |
3606 | 3649 |
3607 | 3650 |
3608 Handle<FixedArray> RegExpEngine::Compile(RegExpParseResult* input, | 3651 Handle<FixedArray> RegExpEngine::Compile(RegExpParseResult* input, |
3609 RegExpNode** node_return, | 3652 RegExpNode** node_return, |
3610 bool ignore_case, | 3653 bool ignore_case, |
3611 bool is_multiline, | 3654 bool is_multiline, |
3612 Handle<String> pattern, | 3655 Handle<String> pattern, |
3613 bool is_ascii) { | 3656 bool is_ascii) { |
3614 RegExpCompiler compiler(input->capture_count, ignore_case); | 3657 RegExpCompiler compiler(input->capture_count, ignore_case, is_ascii); |
3615 // Wrap the body of the regexp in capture #0. | 3658 // Wrap the body of the regexp in capture #0. |
3616 RegExpNode* captured_body = RegExpCapture::ToNode(input->tree, | 3659 RegExpNode* captured_body = RegExpCapture::ToNode(input->tree, |
3617 0, | 3660 0, |
3618 &compiler, | 3661 &compiler, |
3619 compiler.accept()); | 3662 compiler.accept()); |
3620 // Add a .*? at the beginning, outside the body capture. | 3663 // Add a .*? at the beginning, outside the body capture. |
3621 // Note: We could choose to not add this if the regexp is anchored at | 3664 // Note: We could choose to not add this if the regexp is anchored at |
3622 // the start of the input but I'm not sure how best to do that and | 3665 // the start of the input but I'm not sure how best to do that and |
3623 // since we don't even handle ^ yet I'm saving that optimization for | 3666 // since we don't even handle ^ yet I'm saving that optimization for |
3624 // later. | 3667 // later. |
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
3660 EmbeddedVector<byte, 1024> codes; | 3703 EmbeddedVector<byte, 1024> codes; |
3661 RegExpMacroAssemblerIrregexp macro_assembler(codes); | 3704 RegExpMacroAssemblerIrregexp macro_assembler(codes); |
3662 return compiler.Assemble(¯o_assembler, | 3705 return compiler.Assemble(¯o_assembler, |
3663 node, | 3706 node, |
3664 input->capture_count, | 3707 input->capture_count, |
3665 pattern); | 3708 pattern); |
3666 } | 3709 } |
3667 | 3710 |
3668 | 3711 |
3669 }} // namespace v8::internal | 3712 }} // namespace v8::internal |
OLD | NEW |