Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 944 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 955 rc = false; | 955 rc = false; |
| 956 break; | 956 break; |
| 957 #endif | 957 #endif |
| 958 } | 958 } |
| 959 case RegExpMacroAssembler::kBytecodeImplementation: { | 959 case RegExpMacroAssembler::kBytecodeImplementation: { |
| 960 for (int i = (num_captures + 1) * 2 - 1; i >= 0; i--) { | 960 for (int i = (num_captures + 1) * 2 - 1; i >= 0; i--) { |
| 961 offsets_vector[i] = -1; | 961 offsets_vector[i] = -1; |
| 962 } | 962 } |
| 963 Handle<ByteArray> byte_codes = IrregexpByteCode(irregexp); | 963 Handle<ByteArray> byte_codes = IrregexpByteCode(irregexp); |
| 964 | 964 |
| 965 Handle<String> two_byte_subject = CachedStringToTwoByte(subject); | 965 if (!subject->IsFlat(StringShape(*subject))) { |
| 966 FlattenString(subject); | |
|
Lasse Reichstein
2008/12/09 07:43:07
The string is also flattened in the IA32 branch (l
| |
| 967 } | |
| 966 | 968 |
| 967 rc = IrregexpInterpreter::Match(byte_codes, | 969 rc = IrregexpInterpreter::Match(byte_codes, |
| 968 two_byte_subject, | 970 subject, |
| 969 offsets_vector, | 971 offsets_vector, |
| 970 previous_index); | 972 previous_index); |
| 971 break; | 973 break; |
| 972 } | 974 } |
| 973 case RegExpMacroAssembler::kARMImplementation: | 975 case RegExpMacroAssembler::kARMImplementation: |
| 974 default: | 976 default: |
| 975 UNREACHABLE(); | 977 UNREACHABLE(); |
| 976 rc = false; | 978 rc = false; |
| 977 break; | 979 break; |
| 978 } | 980 } |
| (...skipping 205 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1184 table_ = new DispatchTable(); | 1186 table_ = new DispatchTable(); |
| 1185 DispatchTableConstructor cons(table_, ignore_case); | 1187 DispatchTableConstructor cons(table_, ignore_case); |
| 1186 cons.BuildTable(this); | 1188 cons.BuildTable(this); |
| 1187 } | 1189 } |
| 1188 return table_; | 1190 return table_; |
| 1189 } | 1191 } |
| 1190 | 1192 |
| 1191 | 1193 |
| 1192 class RegExpCompiler { | 1194 class RegExpCompiler { |
| 1193 public: | 1195 public: |
| 1194 RegExpCompiler(int capture_count, bool ignore_case); | 1196 RegExpCompiler(int capture_count, bool ignore_case, bool is_ascii); |
| 1195 | 1197 |
| 1196 int AllocateRegister() { return next_register_++; } | 1198 int AllocateRegister() { return next_register_++; } |
| 1197 | 1199 |
| 1198 Handle<FixedArray> Assemble(RegExpMacroAssembler* assembler, | 1200 Handle<FixedArray> Assemble(RegExpMacroAssembler* assembler, |
| 1199 RegExpNode* start, | 1201 RegExpNode* start, |
| 1200 int capture_count, | 1202 int capture_count, |
| 1201 Handle<String> pattern); | 1203 Handle<String> pattern); |
| 1202 | 1204 |
| 1203 inline void AddWork(RegExpNode* node) { work_list_->Add(node); } | 1205 inline void AddWork(RegExpNode* node) { work_list_->Add(node); } |
| 1204 | 1206 |
| 1205 static const int kImplementationOffset = 0; | 1207 static const int kImplementationOffset = 0; |
| 1206 static const int kNumberOfRegistersOffset = 0; | 1208 static const int kNumberOfRegistersOffset = 0; |
| 1207 static const int kCodeOffset = 1; | 1209 static const int kCodeOffset = 1; |
| 1208 | 1210 |
| 1209 RegExpMacroAssembler* macro_assembler() { return macro_assembler_; } | 1211 RegExpMacroAssembler* macro_assembler() { return macro_assembler_; } |
| 1210 EndNode* accept() { return accept_; } | 1212 EndNode* accept() { return accept_; } |
| 1211 | 1213 |
| 1212 static const int kMaxRecursion = 100; | 1214 static const int kMaxRecursion = 100; |
| 1213 inline int recursion_depth() { return recursion_depth_; } | 1215 inline int recursion_depth() { return recursion_depth_; } |
| 1214 inline void IncrementRecursionDepth() { recursion_depth_++; } | 1216 inline void IncrementRecursionDepth() { recursion_depth_++; } |
| 1215 inline void DecrementRecursionDepth() { recursion_depth_--; } | 1217 inline void DecrementRecursionDepth() { recursion_depth_--; } |
| 1216 | 1218 |
| 1217 inline bool ignore_case() { return ignore_case_; } | 1219 inline bool ignore_case() { return ignore_case_; } |
| 1220 inline bool ascii() { return ascii_; } | |
| 1218 | 1221 |
| 1219 private: | 1222 private: |
| 1220 EndNode* accept_; | 1223 EndNode* accept_; |
| 1221 int next_register_; | 1224 int next_register_; |
| 1222 List<RegExpNode*>* work_list_; | 1225 List<RegExpNode*>* work_list_; |
| 1223 int recursion_depth_; | 1226 int recursion_depth_; |
| 1224 RegExpMacroAssembler* macro_assembler_; | 1227 RegExpMacroAssembler* macro_assembler_; |
| 1225 bool ignore_case_; | 1228 bool ignore_case_; |
| 1229 bool ascii_; | |
| 1226 }; | 1230 }; |
| 1227 | 1231 |
| 1228 | 1232 |
| 1229 class RecursionCheck { | 1233 class RecursionCheck { |
| 1230 public: | 1234 public: |
| 1231 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) { | 1235 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) { |
| 1232 compiler->IncrementRecursionDepth(); | 1236 compiler->IncrementRecursionDepth(); |
| 1233 } | 1237 } |
| 1234 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); } | 1238 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); } |
| 1235 private: | 1239 private: |
| 1236 RegExpCompiler* compiler_; | 1240 RegExpCompiler* compiler_; |
| 1237 }; | 1241 }; |
| 1238 | 1242 |
| 1239 | 1243 |
| 1240 // Attempts to compile the regexp using an Irregexp code generator. Returns | 1244 // Attempts to compile the regexp using an Irregexp code generator. Returns |
| 1241 // a fixed array or a null handle depending on whether it succeeded. | 1245 // a fixed array or a null handle depending on whether it succeeded. |
| 1242 RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case) | 1246 RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case, bool ascii) |
| 1243 : next_register_(2 * (capture_count + 1)), | 1247 : next_register_(2 * (capture_count + 1)), |
| 1244 work_list_(NULL), | 1248 work_list_(NULL), |
| 1245 recursion_depth_(0), | 1249 recursion_depth_(0), |
| 1246 ignore_case_(ignore_case) { | 1250 ignore_case_(ignore_case), |
| 1251 ascii_(ascii) { | |
| 1247 accept_ = new EndNode(EndNode::ACCEPT); | 1252 accept_ = new EndNode(EndNode::ACCEPT); |
| 1248 } | 1253 } |
| 1249 | 1254 |
| 1250 | 1255 |
| 1251 Handle<FixedArray> RegExpCompiler::Assemble( | 1256 Handle<FixedArray> RegExpCompiler::Assemble( |
| 1252 RegExpMacroAssembler* macro_assembler, | 1257 RegExpMacroAssembler* macro_assembler, |
| 1253 RegExpNode* start, | 1258 RegExpNode* start, |
| 1254 int capture_count, | 1259 int capture_count, |
| 1255 Handle<String> pattern) { | 1260 Handle<String> pattern) { |
| 1256 #ifdef DEBUG | 1261 #ifdef DEBUG |
| (...skipping 418 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1675 macro_assembler->LoadCurrentCharacterUnchecked(cp_offset + i); | 1680 macro_assembler->LoadCurrentCharacterUnchecked(cp_offset + i); |
| 1676 } | 1681 } |
| 1677 Label ok; | 1682 Label ok; |
| 1678 ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4); | 1683 ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4); |
| 1679 switch (length) { | 1684 switch (length) { |
| 1680 case 2: { | 1685 case 2: { |
| 1681 if (ShortCutEmitCharacterPair(macro_assembler, | 1686 if (ShortCutEmitCharacterPair(macro_assembler, |
| 1682 chars[0], | 1687 chars[0], |
| 1683 chars[1], | 1688 chars[1], |
| 1684 on_failure)) { | 1689 on_failure)) { |
| 1685 ok.Unuse(); | |
| 1686 } else { | 1690 } else { |
| 1687 macro_assembler->CheckCharacter(chars[0], &ok); | 1691 macro_assembler->CheckCharacter(chars[0], &ok); |
| 1688 macro_assembler->CheckNotCharacter(chars[1], on_failure); | 1692 macro_assembler->CheckNotCharacter(chars[1], on_failure); |
| 1689 macro_assembler->Bind(&ok); | 1693 macro_assembler->Bind(&ok); |
| 1690 } | 1694 } |
| 1691 break; | 1695 break; |
| 1692 } | 1696 } |
| 1693 case 4: | 1697 case 4: |
| 1694 macro_assembler->CheckCharacter(chars[3], &ok); | 1698 macro_assembler->CheckCharacter(chars[3], &ok); |
| 1695 // Fall through! | 1699 // Fall through! |
| 1696 case 3: | 1700 case 3: |
| 1697 macro_assembler->CheckCharacter(chars[0], &ok); | 1701 macro_assembler->CheckCharacter(chars[0], &ok); |
| 1698 macro_assembler->CheckCharacter(chars[1], &ok); | 1702 macro_assembler->CheckCharacter(chars[1], &ok); |
| 1699 macro_assembler->CheckNotCharacter(chars[2], on_failure); | 1703 macro_assembler->CheckNotCharacter(chars[2], on_failure); |
| 1700 macro_assembler->Bind(&ok); | 1704 macro_assembler->Bind(&ok); |
| 1701 break; | 1705 break; |
| 1702 default: | 1706 default: |
| 1703 UNREACHABLE(); | 1707 UNREACHABLE(); |
| 1704 break; | 1708 break; |
| 1705 } | 1709 } |
| 1706 } | 1710 } |
| 1707 } | 1711 } |
| 1708 | 1712 |
| 1709 | 1713 |
| 1710 static void EmitCharClass(RegExpMacroAssembler* macro_assembler, | 1714 static void EmitCharClass(RegExpMacroAssembler* macro_assembler, |
| 1711 RegExpCharacterClass* cc, | 1715 RegExpCharacterClass* cc, |
| 1712 int cp_offset, | 1716 int cp_offset, |
| 1713 Label* on_failure, | 1717 Label* on_failure, |
| 1714 bool check_offset) { | 1718 bool check_offset, |
| 1719 bool ascii) { | |
| 1715 ZoneList<CharacterRange>* ranges = cc->ranges(); | 1720 ZoneList<CharacterRange>* ranges = cc->ranges(); |
| 1721 const int max_char = ascii ? 0x7f : 0xffff; | |
|
Lasse Reichstein
2008/12/09 07:43:07
Use String::kMaxAsciiCharCode instead of 0x7f?
| |
| 1716 | 1722 |
| 1717 Label success; | 1723 Label success; |
| 1718 | 1724 |
| 1719 Label* char_is_in_class = | 1725 Label* char_is_in_class = |
| 1720 cc->is_negated() ? on_failure : &success; | 1726 cc->is_negated() ? on_failure : &success; |
| 1721 | 1727 |
| 1722 int range_count = ranges->length(); | 1728 int range_count = ranges->length(); |
| 1723 | 1729 |
| 1724 if (range_count == 0) { | 1730 int last_valid_range = range_count - 1; |
| 1731 while (last_valid_range >= 0) { | |
| 1732 CharacterRange& range = ranges->at(last_valid_range); | |
| 1733 if (range.from() <= max_char) { | |
| 1734 break; | |
| 1735 } | |
| 1736 last_valid_range--; | |
| 1737 } | |
| 1738 | |
| 1739 if (last_valid_range < 0) { | |
| 1725 if (!cc->is_negated()) { | 1740 if (!cc->is_negated()) { |
| 1741 // TODO(plesner): We can remove this when the node level does our | |
| 1742 // ASCII optimizations for us. | |
| 1726 macro_assembler->GoTo(on_failure); | 1743 macro_assembler->GoTo(on_failure); |
| 1727 } | 1744 } |
| 1728 return; | 1745 return; |
| 1729 } | 1746 } |
| 1730 | 1747 |
| 1731 if (range_count == 1 && | 1748 if (last_valid_range == 0 && |
| 1732 !cc->is_negated() && | 1749 !cc->is_negated() && |
| 1733 ranges->at(0).IsEverything(0xffff)) { | 1750 ranges->at(0).IsEverything(max_char)) { |
| 1734 // This is a common case hit by non-anchored expressions. | 1751 // This is a common case hit by non-anchored expressions. |
| 1735 // TODO(erikcorry): We should have a macro assembler instruction that just | 1752 // TODO(erikcorry): We should have a macro assembler instruction that just |
| 1736 // checks for end of string without loading the character. | 1753 // checks for end of string without loading the character. |
| 1737 if (check_offset) { | 1754 if (check_offset) { |
| 1738 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure); | 1755 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure); |
| 1739 } | 1756 } |
| 1740 return; | 1757 return; |
| 1741 } | 1758 } |
| 1742 | 1759 |
| 1743 if (check_offset) { | 1760 if (check_offset) { |
| 1744 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure); | 1761 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure); |
| 1745 } else { | 1762 } else { |
| 1746 // Here we don't need to check against the end of the input string | 1763 // Here we don't need to check against the end of the input string |
| 1747 // since this character lies before a character that matched. | 1764 // since this character lies before a character that matched. |
| 1748 macro_assembler->LoadCurrentCharacterUnchecked(cp_offset); | 1765 macro_assembler->LoadCurrentCharacterUnchecked(cp_offset); |
| 1749 } | 1766 } |
| 1750 | 1767 |
| 1751 for (int i = 0; i < range_count - 1; i++) { | 1768 for (int i = 0; i <= last_valid_range; i++) { |
| 1752 CharacterRange& range = ranges->at(i); | 1769 CharacterRange& range = ranges->at(i); |
| 1753 Label next_range; | 1770 Label next_range; |
| 1754 uc16 from = range.from(); | 1771 uc16 from = range.from(); |
| 1755 uc16 to = range.to(); | 1772 uc16 to = range.to(); |
| 1773 if (from > max_char) { | |
| 1774 continue; | |
| 1775 } | |
| 1776 if (to > max_char) to = max_char; | |
| 1756 if (to == from) { | 1777 if (to == from) { |
| 1757 macro_assembler->CheckCharacter(to, char_is_in_class); | 1778 macro_assembler->CheckCharacter(to, char_is_in_class); |
| 1758 } else { | 1779 } else { |
| 1759 if (from != 0) { | 1780 if (from != 0) { |
| 1760 macro_assembler->CheckCharacterLT(from, &next_range); | 1781 macro_assembler->CheckCharacterLT(from, &next_range); |
|
Lasse Reichstein
2008/12/09 07:43:07
How about a CheckCharacterRange(from, to, char_is_
| |
| 1761 } | 1782 } |
| 1762 if (to != 0xffff) { | 1783 if (to != max_char) { |
| 1763 macro_assembler->CheckCharacterLT(to + 1, char_is_in_class); | 1784 macro_assembler->CheckCharacterLT(to + 1, char_is_in_class); |
| 1764 } else { | 1785 } else { |
| 1765 macro_assembler->GoTo(char_is_in_class); | 1786 macro_assembler->GoTo(char_is_in_class); |
| 1766 } | 1787 } |
| 1767 } | 1788 } |
| 1768 macro_assembler->Bind(&next_range); | 1789 macro_assembler->Bind(&next_range); |
| 1769 } | 1790 } |
| 1770 | 1791 |
| 1771 CharacterRange& range = ranges->at(range_count - 1); | 1792 CharacterRange& range = ranges->at(last_valid_range); |
| 1772 uc16 from = range.from(); | 1793 uc16 from = range.from(); |
| 1773 uc16 to = range.to(); | 1794 uc16 to = range.to(); |
| 1774 | 1795 |
| 1796 if (to > max_char) to = max_char; | |
| 1797 ASSERT(to >= from); | |
| 1798 | |
| 1775 if (to == from) { | 1799 if (to == from) { |
| 1776 if (cc->is_negated()) { | 1800 if (cc->is_negated()) { |
| 1777 macro_assembler->CheckCharacter(to, on_failure); | 1801 macro_assembler->CheckCharacter(to, on_failure); |
| 1778 } else { | 1802 } else { |
| 1779 macro_assembler->CheckNotCharacter(to, on_failure); | 1803 macro_assembler->CheckNotCharacter(to, on_failure); |
| 1780 } | 1804 } |
| 1781 } else { | 1805 } else { |
| 1782 if (from != 0) { | 1806 if (from != 0) { |
| 1783 if (cc->is_negated()) { | 1807 if (cc->is_negated()) { |
| 1784 macro_assembler->CheckCharacterLT(from, &success); | 1808 macro_assembler->CheckCharacterLT(from, &success); |
| (...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1868 if (limit_result == FAIL) return false; | 1892 if (limit_result == FAIL) return false; |
| 1869 if (limit_result == DONE) return true; | 1893 if (limit_result == DONE) return true; |
| 1870 ASSERT(limit_result == CONTINUE); | 1894 ASSERT(limit_result == CONTINUE); |
| 1871 | 1895 |
| 1872 int element_count = elms_->length(); | 1896 int element_count = elms_->length(); |
| 1873 ASSERT(element_count != 0); | 1897 ASSERT(element_count != 0); |
| 1874 if (info()->at_end) { | 1898 if (info()->at_end) { |
| 1875 macro_assembler->GoTo(backtrack); | 1899 macro_assembler->GoTo(backtrack); |
| 1876 return true; | 1900 return true; |
| 1877 } | 1901 } |
| 1878 // First, handle straight character matches. | 1902 // First check for non-ASCII text. |
| 1903 // TODO(plesner): We should do this at node level. | |
| 1904 if (compiler->ascii()) { | |
| 1905 for (int i = element_count - 1; i >= 0; i--) { | |
| 1906 TextElement elm = elms_->at(i); | |
| 1907 if (elm.type == TextElement::ATOM) { | |
| 1908 Vector<const uc16> quarks = elm.data.u_atom->data(); | |
| 1909 for (int j = quarks.length() - 1; j >= 0; j--) { | |
| 1910 if (quarks[j] > 0x7f) { | |
|
Lasse Reichstein
2008/12/09 07:43:07
Use String::kMaxAsciiCharCode
| |
| 1911 macro_assembler->GoTo(backtrack); | |
| 1912 return true; | |
| 1913 } | |
| 1914 } | |
| 1915 } else { | |
| 1916 ASSERT_EQ(elm.type, TextElement::CHAR_CLASS); | |
| 1917 } | |
| 1918 } | |
| 1919 } | |
| 1920 // Second, handle straight character matches. | |
| 1879 int checked_up_to = -1; | 1921 int checked_up_to = -1; |
| 1880 for (int i = element_count - 1; i >= 0; i--) { | 1922 for (int i = element_count - 1; i >= 0; i--) { |
| 1881 TextElement elm = elms_->at(i); | 1923 TextElement elm = elms_->at(i); |
| 1882 ASSERT(elm.cp_offset >= 0); | 1924 ASSERT(elm.cp_offset >= 0); |
| 1883 int cp_offset = variant->cp_offset() + elm.cp_offset; | 1925 int cp_offset = variant->cp_offset() + elm.cp_offset; |
| 1884 if (elm.type == TextElement::ATOM) { | 1926 if (elm.type == TextElement::ATOM) { |
| 1885 Vector<const uc16> quarks = elm.data.u_atom->data(); | 1927 Vector<const uc16> quarks = elm.data.u_atom->data(); |
| 1886 int last_cp_offset = cp_offset + quarks.length(); | 1928 int last_cp_offset = cp_offset + quarks.length(); |
| 1887 if (compiler->ignore_case()) { | 1929 if (compiler->ignore_case()) { |
| 1888 EmitAtomNonLetters(macro_assembler, | 1930 EmitAtomNonLetters(macro_assembler, |
| 1889 elm, | 1931 elm, |
| 1890 quarks, | 1932 quarks, |
| 1891 backtrack, | 1933 backtrack, |
| 1892 cp_offset, | 1934 cp_offset, |
| 1893 checked_up_to < last_cp_offset); | 1935 checked_up_to < last_cp_offset); |
| 1894 } else { | 1936 } else { |
| 1895 macro_assembler->CheckCharacters(quarks, | 1937 macro_assembler->CheckCharacters(quarks, |
| 1896 cp_offset, | 1938 cp_offset, |
| 1897 backtrack, | 1939 backtrack, |
| 1898 checked_up_to < last_cp_offset); | 1940 checked_up_to < last_cp_offset); |
| 1899 } | 1941 } |
| 1900 if (last_cp_offset > checked_up_to) checked_up_to = last_cp_offset - 1; | 1942 if (last_cp_offset > checked_up_to) checked_up_to = last_cp_offset - 1; |
| 1901 } else { | 1943 } else { |
| 1902 ASSERT_EQ(elm.type, TextElement::CHAR_CLASS); | 1944 ASSERT_EQ(elm.type, TextElement::CHAR_CLASS); |
| 1903 } | 1945 } |
| 1904 } | 1946 } |
| 1905 // Second, handle case independent letter matches if any. | 1947 // Third, handle case independent letter matches if any. |
| 1906 if (compiler->ignore_case()) { | 1948 if (compiler->ignore_case()) { |
| 1907 for (int i = element_count - 1; i >= 0; i--) { | 1949 for (int i = element_count - 1; i >= 0; i--) { |
| 1908 TextElement elm = elms_->at(i); | 1950 TextElement elm = elms_->at(i); |
| 1909 int cp_offset = variant->cp_offset() + elm.cp_offset; | 1951 int cp_offset = variant->cp_offset() + elm.cp_offset; |
| 1910 if (elm.type == TextElement::ATOM) { | 1952 if (elm.type == TextElement::ATOM) { |
| 1911 Vector<const uc16> quarks = elm.data.u_atom->data(); | 1953 Vector<const uc16> quarks = elm.data.u_atom->data(); |
| 1912 int last_cp_offset = cp_offset + quarks.length(); | 1954 int last_cp_offset = cp_offset + quarks.length(); |
| 1913 EmitAtomLetters(macro_assembler, | 1955 EmitAtomLetters(macro_assembler, |
| 1914 elm, | 1956 elm, |
| 1915 quarks, | 1957 quarks, |
| 1916 backtrack, | 1958 backtrack, |
| 1917 cp_offset, | 1959 cp_offset, |
| 1918 checked_up_to < last_cp_offset); | 1960 checked_up_to < last_cp_offset); |
| 1919 if (last_cp_offset > checked_up_to) checked_up_to = last_cp_offset - 1; | 1961 if (last_cp_offset > checked_up_to) checked_up_to = last_cp_offset - 1; |
| 1920 } | 1962 } |
| 1921 } | 1963 } |
| 1922 } | 1964 } |
| 1923 // If the fast character matches passed then do the character classes. | 1965 // If the fast character matches passed then do the character classes. |
| 1924 for (int i = element_count - 1; i >= 0; i--) { | 1966 for (int i = element_count - 1; i >= 0; i--) { |
| 1925 TextElement elm = elms_->at(i); | 1967 TextElement elm = elms_->at(i); |
| 1926 int cp_offset = variant->cp_offset() + elm.cp_offset; | 1968 int cp_offset = variant->cp_offset() + elm.cp_offset; |
| 1927 if (elm.type == TextElement::CHAR_CLASS) { | 1969 if (elm.type == TextElement::CHAR_CLASS) { |
| 1928 RegExpCharacterClass* cc = elm.data.u_char_class; | 1970 RegExpCharacterClass* cc = elm.data.u_char_class; |
| 1929 EmitCharClass(macro_assembler, | 1971 EmitCharClass(macro_assembler, |
| 1930 cc, | 1972 cc, |
| 1931 cp_offset, | 1973 cp_offset, |
| 1932 backtrack, | 1974 backtrack, |
| 1933 checked_up_to < cp_offset); | 1975 checked_up_to < cp_offset, |
| 1976 compiler->ascii()); | |
| 1934 if (cp_offset > checked_up_to) checked_up_to = cp_offset; | 1977 if (cp_offset > checked_up_to) checked_up_to = cp_offset; |
| 1935 } | 1978 } |
| 1936 } | 1979 } |
| 1937 | 1980 |
| 1938 GenerationVariant new_variant(*variant); | 1981 GenerationVariant new_variant(*variant); |
| 1939 new_variant.set_cp_offset(checked_up_to + 1); | 1982 new_variant.set_cp_offset(checked_up_to + 1); |
| 1940 RecursionCheck rc(compiler); | 1983 RecursionCheck rc(compiler); |
| 1941 return on_success()->Emit(compiler, &new_variant); | 1984 return on_success()->Emit(compiler, &new_variant); |
| 1942 } | 1985 } |
| 1943 | 1986 |
| (...skipping 1660 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3604 target->Accept(this); | 3647 target->Accept(this); |
| 3605 } | 3648 } |
| 3606 | 3649 |
| 3607 | 3650 |
| 3608 Handle<FixedArray> RegExpEngine::Compile(RegExpParseResult* input, | 3651 Handle<FixedArray> RegExpEngine::Compile(RegExpParseResult* input, |
| 3609 RegExpNode** node_return, | 3652 RegExpNode** node_return, |
| 3610 bool ignore_case, | 3653 bool ignore_case, |
| 3611 bool is_multiline, | 3654 bool is_multiline, |
| 3612 Handle<String> pattern, | 3655 Handle<String> pattern, |
| 3613 bool is_ascii) { | 3656 bool is_ascii) { |
| 3614 RegExpCompiler compiler(input->capture_count, ignore_case); | 3657 RegExpCompiler compiler(input->capture_count, ignore_case, is_ascii); |
| 3615 // Wrap the body of the regexp in capture #0. | 3658 // Wrap the body of the regexp in capture #0. |
| 3616 RegExpNode* captured_body = RegExpCapture::ToNode(input->tree, | 3659 RegExpNode* captured_body = RegExpCapture::ToNode(input->tree, |
| 3617 0, | 3660 0, |
| 3618 &compiler, | 3661 &compiler, |
| 3619 compiler.accept()); | 3662 compiler.accept()); |
| 3620 // Add a .*? at the beginning, outside the body capture. | 3663 // Add a .*? at the beginning, outside the body capture. |
| 3621 // Note: We could choose to not add this if the regexp is anchored at | 3664 // Note: We could choose to not add this if the regexp is anchored at |
| 3622 // the start of the input but I'm not sure how best to do that and | 3665 // the start of the input but I'm not sure how best to do that and |
| 3623 // since we don't even handle ^ yet I'm saving that optimization for | 3666 // since we don't even handle ^ yet I'm saving that optimization for |
| 3624 // later. | 3667 // later. |
| (...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3660 EmbeddedVector<byte, 1024> codes; | 3703 EmbeddedVector<byte, 1024> codes; |
| 3661 RegExpMacroAssemblerIrregexp macro_assembler(codes); | 3704 RegExpMacroAssemblerIrregexp macro_assembler(codes); |
| 3662 return compiler.Assemble(¯o_assembler, | 3705 return compiler.Assemble(¯o_assembler, |
| 3663 node, | 3706 node, |
| 3664 input->capture_count, | 3707 input->capture_count, |
| 3665 pattern); | 3708 pattern); |
| 3666 } | 3709 } |
| 3667 | 3710 |
| 3668 | 3711 |
| 3669 }} // namespace v8::internal | 3712 }} // namespace v8::internal |
| OLD | NEW |