Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(477)

Side by Side Diff: src/jsregexp.cc

Issue 13247: * Have an ASCII and a UC16 interpreter for Irregexp bytecodes -... (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: Created 12 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/interpreter-irregexp.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 944 matching lines...) Expand 10 before | Expand all | Expand 10 after
955 rc = false; 955 rc = false;
956 break; 956 break;
957 #endif 957 #endif
958 } 958 }
959 case RegExpMacroAssembler::kBytecodeImplementation: { 959 case RegExpMacroAssembler::kBytecodeImplementation: {
960 for (int i = (num_captures + 1) * 2 - 1; i >= 0; i--) { 960 for (int i = (num_captures + 1) * 2 - 1; i >= 0; i--) {
961 offsets_vector[i] = -1; 961 offsets_vector[i] = -1;
962 } 962 }
963 Handle<ByteArray> byte_codes = IrregexpByteCode(irregexp); 963 Handle<ByteArray> byte_codes = IrregexpByteCode(irregexp);
964 964
965 Handle<String> two_byte_subject = CachedStringToTwoByte(subject); 965 if (!subject->IsFlat(StringShape(*subject))) {
966 FlattenString(subject);
Lasse Reichstein 2008/12/09 07:43:07 The string is also flattened in the IA32 branch (l
967 }
966 968
967 rc = IrregexpInterpreter::Match(byte_codes, 969 rc = IrregexpInterpreter::Match(byte_codes,
968 two_byte_subject, 970 subject,
969 offsets_vector, 971 offsets_vector,
970 previous_index); 972 previous_index);
971 break; 973 break;
972 } 974 }
973 case RegExpMacroAssembler::kARMImplementation: 975 case RegExpMacroAssembler::kARMImplementation:
974 default: 976 default:
975 UNREACHABLE(); 977 UNREACHABLE();
976 rc = false; 978 rc = false;
977 break; 979 break;
978 } 980 }
(...skipping 205 matching lines...) Expand 10 before | Expand all | Expand 10 after
1184 table_ = new DispatchTable(); 1186 table_ = new DispatchTable();
1185 DispatchTableConstructor cons(table_, ignore_case); 1187 DispatchTableConstructor cons(table_, ignore_case);
1186 cons.BuildTable(this); 1188 cons.BuildTable(this);
1187 } 1189 }
1188 return table_; 1190 return table_;
1189 } 1191 }
1190 1192
1191 1193
1192 class RegExpCompiler { 1194 class RegExpCompiler {
1193 public: 1195 public:
1194 RegExpCompiler(int capture_count, bool ignore_case); 1196 RegExpCompiler(int capture_count, bool ignore_case, bool is_ascii);
1195 1197
1196 int AllocateRegister() { return next_register_++; } 1198 int AllocateRegister() { return next_register_++; }
1197 1199
1198 Handle<FixedArray> Assemble(RegExpMacroAssembler* assembler, 1200 Handle<FixedArray> Assemble(RegExpMacroAssembler* assembler,
1199 RegExpNode* start, 1201 RegExpNode* start,
1200 int capture_count, 1202 int capture_count,
1201 Handle<String> pattern); 1203 Handle<String> pattern);
1202 1204
1203 inline void AddWork(RegExpNode* node) { work_list_->Add(node); } 1205 inline void AddWork(RegExpNode* node) { work_list_->Add(node); }
1204 1206
1205 static const int kImplementationOffset = 0; 1207 static const int kImplementationOffset = 0;
1206 static const int kNumberOfRegistersOffset = 0; 1208 static const int kNumberOfRegistersOffset = 0;
1207 static const int kCodeOffset = 1; 1209 static const int kCodeOffset = 1;
1208 1210
1209 RegExpMacroAssembler* macro_assembler() { return macro_assembler_; } 1211 RegExpMacroAssembler* macro_assembler() { return macro_assembler_; }
1210 EndNode* accept() { return accept_; } 1212 EndNode* accept() { return accept_; }
1211 1213
1212 static const int kMaxRecursion = 100; 1214 static const int kMaxRecursion = 100;
1213 inline int recursion_depth() { return recursion_depth_; } 1215 inline int recursion_depth() { return recursion_depth_; }
1214 inline void IncrementRecursionDepth() { recursion_depth_++; } 1216 inline void IncrementRecursionDepth() { recursion_depth_++; }
1215 inline void DecrementRecursionDepth() { recursion_depth_--; } 1217 inline void DecrementRecursionDepth() { recursion_depth_--; }
1216 1218
1217 inline bool ignore_case() { return ignore_case_; } 1219 inline bool ignore_case() { return ignore_case_; }
1220 inline bool ascii() { return ascii_; }
1218 1221
1219 private: 1222 private:
1220 EndNode* accept_; 1223 EndNode* accept_;
1221 int next_register_; 1224 int next_register_;
1222 List<RegExpNode*>* work_list_; 1225 List<RegExpNode*>* work_list_;
1223 int recursion_depth_; 1226 int recursion_depth_;
1224 RegExpMacroAssembler* macro_assembler_; 1227 RegExpMacroAssembler* macro_assembler_;
1225 bool ignore_case_; 1228 bool ignore_case_;
1229 bool ascii_;
1226 }; 1230 };
1227 1231
1228 1232
1229 class RecursionCheck { 1233 class RecursionCheck {
1230 public: 1234 public:
1231 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) { 1235 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) {
1232 compiler->IncrementRecursionDepth(); 1236 compiler->IncrementRecursionDepth();
1233 } 1237 }
1234 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); } 1238 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); }
1235 private: 1239 private:
1236 RegExpCompiler* compiler_; 1240 RegExpCompiler* compiler_;
1237 }; 1241 };
1238 1242
1239 1243
1240 // Attempts to compile the regexp using an Irregexp code generator. Returns 1244 // Attempts to compile the regexp using an Irregexp code generator. Returns
1241 // a fixed array or a null handle depending on whether it succeeded. 1245 // a fixed array or a null handle depending on whether it succeeded.
1242 RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case) 1246 RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case, bool ascii)
1243 : next_register_(2 * (capture_count + 1)), 1247 : next_register_(2 * (capture_count + 1)),
1244 work_list_(NULL), 1248 work_list_(NULL),
1245 recursion_depth_(0), 1249 recursion_depth_(0),
1246 ignore_case_(ignore_case) { 1250 ignore_case_(ignore_case),
1251 ascii_(ascii) {
1247 accept_ = new EndNode(EndNode::ACCEPT); 1252 accept_ = new EndNode(EndNode::ACCEPT);
1248 } 1253 }
1249 1254
1250 1255
1251 Handle<FixedArray> RegExpCompiler::Assemble( 1256 Handle<FixedArray> RegExpCompiler::Assemble(
1252 RegExpMacroAssembler* macro_assembler, 1257 RegExpMacroAssembler* macro_assembler,
1253 RegExpNode* start, 1258 RegExpNode* start,
1254 int capture_count, 1259 int capture_count,
1255 Handle<String> pattern) { 1260 Handle<String> pattern) {
1256 #ifdef DEBUG 1261 #ifdef DEBUG
(...skipping 418 matching lines...) Expand 10 before | Expand all | Expand 10 after
1675 macro_assembler->LoadCurrentCharacterUnchecked(cp_offset + i); 1680 macro_assembler->LoadCurrentCharacterUnchecked(cp_offset + i);
1676 } 1681 }
1677 Label ok; 1682 Label ok;
1678 ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4); 1683 ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4);
1679 switch (length) { 1684 switch (length) {
1680 case 2: { 1685 case 2: {
1681 if (ShortCutEmitCharacterPair(macro_assembler, 1686 if (ShortCutEmitCharacterPair(macro_assembler,
1682 chars[0], 1687 chars[0],
1683 chars[1], 1688 chars[1],
1684 on_failure)) { 1689 on_failure)) {
1685 ok.Unuse();
1686 } else { 1690 } else {
1687 macro_assembler->CheckCharacter(chars[0], &ok); 1691 macro_assembler->CheckCharacter(chars[0], &ok);
1688 macro_assembler->CheckNotCharacter(chars[1], on_failure); 1692 macro_assembler->CheckNotCharacter(chars[1], on_failure);
1689 macro_assembler->Bind(&ok); 1693 macro_assembler->Bind(&ok);
1690 } 1694 }
1691 break; 1695 break;
1692 } 1696 }
1693 case 4: 1697 case 4:
1694 macro_assembler->CheckCharacter(chars[3], &ok); 1698 macro_assembler->CheckCharacter(chars[3], &ok);
1695 // Fall through! 1699 // Fall through!
1696 case 3: 1700 case 3:
1697 macro_assembler->CheckCharacter(chars[0], &ok); 1701 macro_assembler->CheckCharacter(chars[0], &ok);
1698 macro_assembler->CheckCharacter(chars[1], &ok); 1702 macro_assembler->CheckCharacter(chars[1], &ok);
1699 macro_assembler->CheckNotCharacter(chars[2], on_failure); 1703 macro_assembler->CheckNotCharacter(chars[2], on_failure);
1700 macro_assembler->Bind(&ok); 1704 macro_assembler->Bind(&ok);
1701 break; 1705 break;
1702 default: 1706 default:
1703 UNREACHABLE(); 1707 UNREACHABLE();
1704 break; 1708 break;
1705 } 1709 }
1706 } 1710 }
1707 } 1711 }
1708 1712
1709 1713
1710 static void EmitCharClass(RegExpMacroAssembler* macro_assembler, 1714 static void EmitCharClass(RegExpMacroAssembler* macro_assembler,
1711 RegExpCharacterClass* cc, 1715 RegExpCharacterClass* cc,
1712 int cp_offset, 1716 int cp_offset,
1713 Label* on_failure, 1717 Label* on_failure,
1714 bool check_offset) { 1718 bool check_offset,
1719 bool ascii) {
1715 ZoneList<CharacterRange>* ranges = cc->ranges(); 1720 ZoneList<CharacterRange>* ranges = cc->ranges();
1721 const int max_char = ascii ? 0x7f : 0xffff;
Lasse Reichstein 2008/12/09 07:43:07 Use String::kMaxAsciiCharCode instead of 0x7f?
1716 1722
1717 Label success; 1723 Label success;
1718 1724
1719 Label* char_is_in_class = 1725 Label* char_is_in_class =
1720 cc->is_negated() ? on_failure : &success; 1726 cc->is_negated() ? on_failure : &success;
1721 1727
1722 int range_count = ranges->length(); 1728 int range_count = ranges->length();
1723 1729
1724 if (range_count == 0) { 1730 int last_valid_range = range_count - 1;
1731 while (last_valid_range >= 0) {
1732 CharacterRange& range = ranges->at(last_valid_range);
1733 if (range.from() <= max_char) {
1734 break;
1735 }
1736 last_valid_range--;
1737 }
1738
1739 if (last_valid_range < 0) {
1725 if (!cc->is_negated()) { 1740 if (!cc->is_negated()) {
1741 // TODO(plesner): We can remove this when the node level does our
1742 // ASCII optimizations for us.
1726 macro_assembler->GoTo(on_failure); 1743 macro_assembler->GoTo(on_failure);
1727 } 1744 }
1728 return; 1745 return;
1729 } 1746 }
1730 1747
1731 if (range_count == 1 && 1748 if (last_valid_range == 0 &&
1732 !cc->is_negated() && 1749 !cc->is_negated() &&
1733 ranges->at(0).IsEverything(0xffff)) { 1750 ranges->at(0).IsEverything(max_char)) {
1734 // This is a common case hit by non-anchored expressions. 1751 // This is a common case hit by non-anchored expressions.
1735 // TODO(erikcorry): We should have a macro assembler instruction that just 1752 // TODO(erikcorry): We should have a macro assembler instruction that just
1736 // checks for end of string without loading the character. 1753 // checks for end of string without loading the character.
1737 if (check_offset) { 1754 if (check_offset) {
1738 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure); 1755 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure);
1739 } 1756 }
1740 return; 1757 return;
1741 } 1758 }
1742 1759
1743 if (check_offset) { 1760 if (check_offset) {
1744 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure); 1761 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure);
1745 } else { 1762 } else {
1746 // Here we don't need to check against the end of the input string 1763 // Here we don't need to check against the end of the input string
1747 // since this character lies before a character that matched. 1764 // since this character lies before a character that matched.
1748 macro_assembler->LoadCurrentCharacterUnchecked(cp_offset); 1765 macro_assembler->LoadCurrentCharacterUnchecked(cp_offset);
1749 } 1766 }
1750 1767
1751 for (int i = 0; i < range_count - 1; i++) { 1768 for (int i = 0; i <= last_valid_range; i++) {
1752 CharacterRange& range = ranges->at(i); 1769 CharacterRange& range = ranges->at(i);
1753 Label next_range; 1770 Label next_range;
1754 uc16 from = range.from(); 1771 uc16 from = range.from();
1755 uc16 to = range.to(); 1772 uc16 to = range.to();
1773 if (from > max_char) {
1774 continue;
1775 }
1776 if (to > max_char) to = max_char;
1756 if (to == from) { 1777 if (to == from) {
1757 macro_assembler->CheckCharacter(to, char_is_in_class); 1778 macro_assembler->CheckCharacter(to, char_is_in_class);
1758 } else { 1779 } else {
1759 if (from != 0) { 1780 if (from != 0) {
1760 macro_assembler->CheckCharacterLT(from, &next_range); 1781 macro_assembler->CheckCharacterLT(from, &next_range);
Lasse Reichstein 2008/12/09 07:43:07 How about a CheckCharacterRange(from, to, char_is_
1761 } 1782 }
1762 if (to != 0xffff) { 1783 if (to != max_char) {
1763 macro_assembler->CheckCharacterLT(to + 1, char_is_in_class); 1784 macro_assembler->CheckCharacterLT(to + 1, char_is_in_class);
1764 } else { 1785 } else {
1765 macro_assembler->GoTo(char_is_in_class); 1786 macro_assembler->GoTo(char_is_in_class);
1766 } 1787 }
1767 } 1788 }
1768 macro_assembler->Bind(&next_range); 1789 macro_assembler->Bind(&next_range);
1769 } 1790 }
1770 1791
1771 CharacterRange& range = ranges->at(range_count - 1); 1792 CharacterRange& range = ranges->at(last_valid_range);
1772 uc16 from = range.from(); 1793 uc16 from = range.from();
1773 uc16 to = range.to(); 1794 uc16 to = range.to();
1774 1795
1796 if (to > max_char) to = max_char;
1797 ASSERT(to >= from);
1798
1775 if (to == from) { 1799 if (to == from) {
1776 if (cc->is_negated()) { 1800 if (cc->is_negated()) {
1777 macro_assembler->CheckCharacter(to, on_failure); 1801 macro_assembler->CheckCharacter(to, on_failure);
1778 } else { 1802 } else {
1779 macro_assembler->CheckNotCharacter(to, on_failure); 1803 macro_assembler->CheckNotCharacter(to, on_failure);
1780 } 1804 }
1781 } else { 1805 } else {
1782 if (from != 0) { 1806 if (from != 0) {
1783 if (cc->is_negated()) { 1807 if (cc->is_negated()) {
1784 macro_assembler->CheckCharacterLT(from, &success); 1808 macro_assembler->CheckCharacterLT(from, &success);
(...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after
1868 if (limit_result == FAIL) return false; 1892 if (limit_result == FAIL) return false;
1869 if (limit_result == DONE) return true; 1893 if (limit_result == DONE) return true;
1870 ASSERT(limit_result == CONTINUE); 1894 ASSERT(limit_result == CONTINUE);
1871 1895
1872 int element_count = elms_->length(); 1896 int element_count = elms_->length();
1873 ASSERT(element_count != 0); 1897 ASSERT(element_count != 0);
1874 if (info()->at_end) { 1898 if (info()->at_end) {
1875 macro_assembler->GoTo(backtrack); 1899 macro_assembler->GoTo(backtrack);
1876 return true; 1900 return true;
1877 } 1901 }
1878 // First, handle straight character matches. 1902 // First check for non-ASCII text.
1903 // TODO(plesner): We should do this at node level.
1904 if (compiler->ascii()) {
1905 for (int i = element_count - 1; i >= 0; i--) {
1906 TextElement elm = elms_->at(i);
1907 if (elm.type == TextElement::ATOM) {
1908 Vector<const uc16> quarks = elm.data.u_atom->data();
1909 for (int j = quarks.length() - 1; j >= 0; j--) {
1910 if (quarks[j] > 0x7f) {
Lasse Reichstein 2008/12/09 07:43:07 Use String::kMaxAsciiCharCode
1911 macro_assembler->GoTo(backtrack);
1912 return true;
1913 }
1914 }
1915 } else {
1916 ASSERT_EQ(elm.type, TextElement::CHAR_CLASS);
1917 }
1918 }
1919 }
1920 // Second, handle straight character matches.
1879 int checked_up_to = -1; 1921 int checked_up_to = -1;
1880 for (int i = element_count - 1; i >= 0; i--) { 1922 for (int i = element_count - 1; i >= 0; i--) {
1881 TextElement elm = elms_->at(i); 1923 TextElement elm = elms_->at(i);
1882 ASSERT(elm.cp_offset >= 0); 1924 ASSERT(elm.cp_offset >= 0);
1883 int cp_offset = variant->cp_offset() + elm.cp_offset; 1925 int cp_offset = variant->cp_offset() + elm.cp_offset;
1884 if (elm.type == TextElement::ATOM) { 1926 if (elm.type == TextElement::ATOM) {
1885 Vector<const uc16> quarks = elm.data.u_atom->data(); 1927 Vector<const uc16> quarks = elm.data.u_atom->data();
1886 int last_cp_offset = cp_offset + quarks.length(); 1928 int last_cp_offset = cp_offset + quarks.length();
1887 if (compiler->ignore_case()) { 1929 if (compiler->ignore_case()) {
1888 EmitAtomNonLetters(macro_assembler, 1930 EmitAtomNonLetters(macro_assembler,
1889 elm, 1931 elm,
1890 quarks, 1932 quarks,
1891 backtrack, 1933 backtrack,
1892 cp_offset, 1934 cp_offset,
1893 checked_up_to < last_cp_offset); 1935 checked_up_to < last_cp_offset);
1894 } else { 1936 } else {
1895 macro_assembler->CheckCharacters(quarks, 1937 macro_assembler->CheckCharacters(quarks,
1896 cp_offset, 1938 cp_offset,
1897 backtrack, 1939 backtrack,
1898 checked_up_to < last_cp_offset); 1940 checked_up_to < last_cp_offset);
1899 } 1941 }
1900 if (last_cp_offset > checked_up_to) checked_up_to = last_cp_offset - 1; 1942 if (last_cp_offset > checked_up_to) checked_up_to = last_cp_offset - 1;
1901 } else { 1943 } else {
1902 ASSERT_EQ(elm.type, TextElement::CHAR_CLASS); 1944 ASSERT_EQ(elm.type, TextElement::CHAR_CLASS);
1903 } 1945 }
1904 } 1946 }
1905 // Second, handle case independent letter matches if any. 1947 // Third, handle case independent letter matches if any.
1906 if (compiler->ignore_case()) { 1948 if (compiler->ignore_case()) {
1907 for (int i = element_count - 1; i >= 0; i--) { 1949 for (int i = element_count - 1; i >= 0; i--) {
1908 TextElement elm = elms_->at(i); 1950 TextElement elm = elms_->at(i);
1909 int cp_offset = variant->cp_offset() + elm.cp_offset; 1951 int cp_offset = variant->cp_offset() + elm.cp_offset;
1910 if (elm.type == TextElement::ATOM) { 1952 if (elm.type == TextElement::ATOM) {
1911 Vector<const uc16> quarks = elm.data.u_atom->data(); 1953 Vector<const uc16> quarks = elm.data.u_atom->data();
1912 int last_cp_offset = cp_offset + quarks.length(); 1954 int last_cp_offset = cp_offset + quarks.length();
1913 EmitAtomLetters(macro_assembler, 1955 EmitAtomLetters(macro_assembler,
1914 elm, 1956 elm,
1915 quarks, 1957 quarks,
1916 backtrack, 1958 backtrack,
1917 cp_offset, 1959 cp_offset,
1918 checked_up_to < last_cp_offset); 1960 checked_up_to < last_cp_offset);
1919 if (last_cp_offset > checked_up_to) checked_up_to = last_cp_offset - 1; 1961 if (last_cp_offset > checked_up_to) checked_up_to = last_cp_offset - 1;
1920 } 1962 }
1921 } 1963 }
1922 } 1964 }
1923 // If the fast character matches passed then do the character classes. 1965 // If the fast character matches passed then do the character classes.
1924 for (int i = element_count - 1; i >= 0; i--) { 1966 for (int i = element_count - 1; i >= 0; i--) {
1925 TextElement elm = elms_->at(i); 1967 TextElement elm = elms_->at(i);
1926 int cp_offset = variant->cp_offset() + elm.cp_offset; 1968 int cp_offset = variant->cp_offset() + elm.cp_offset;
1927 if (elm.type == TextElement::CHAR_CLASS) { 1969 if (elm.type == TextElement::CHAR_CLASS) {
1928 RegExpCharacterClass* cc = elm.data.u_char_class; 1970 RegExpCharacterClass* cc = elm.data.u_char_class;
1929 EmitCharClass(macro_assembler, 1971 EmitCharClass(macro_assembler,
1930 cc, 1972 cc,
1931 cp_offset, 1973 cp_offset,
1932 backtrack, 1974 backtrack,
1933 checked_up_to < cp_offset); 1975 checked_up_to < cp_offset,
1976 compiler->ascii());
1934 if (cp_offset > checked_up_to) checked_up_to = cp_offset; 1977 if (cp_offset > checked_up_to) checked_up_to = cp_offset;
1935 } 1978 }
1936 } 1979 }
1937 1980
1938 GenerationVariant new_variant(*variant); 1981 GenerationVariant new_variant(*variant);
1939 new_variant.set_cp_offset(checked_up_to + 1); 1982 new_variant.set_cp_offset(checked_up_to + 1);
1940 RecursionCheck rc(compiler); 1983 RecursionCheck rc(compiler);
1941 return on_success()->Emit(compiler, &new_variant); 1984 return on_success()->Emit(compiler, &new_variant);
1942 } 1985 }
1943 1986
(...skipping 1660 matching lines...) Expand 10 before | Expand all | Expand 10 after
3604 target->Accept(this); 3647 target->Accept(this);
3605 } 3648 }
3606 3649
3607 3650
3608 Handle<FixedArray> RegExpEngine::Compile(RegExpParseResult* input, 3651 Handle<FixedArray> RegExpEngine::Compile(RegExpParseResult* input,
3609 RegExpNode** node_return, 3652 RegExpNode** node_return,
3610 bool ignore_case, 3653 bool ignore_case,
3611 bool is_multiline, 3654 bool is_multiline,
3612 Handle<String> pattern, 3655 Handle<String> pattern,
3613 bool is_ascii) { 3656 bool is_ascii) {
3614 RegExpCompiler compiler(input->capture_count, ignore_case); 3657 RegExpCompiler compiler(input->capture_count, ignore_case, is_ascii);
3615 // Wrap the body of the regexp in capture #0. 3658 // Wrap the body of the regexp in capture #0.
3616 RegExpNode* captured_body = RegExpCapture::ToNode(input->tree, 3659 RegExpNode* captured_body = RegExpCapture::ToNode(input->tree,
3617 0, 3660 0,
3618 &compiler, 3661 &compiler,
3619 compiler.accept()); 3662 compiler.accept());
3620 // Add a .*? at the beginning, outside the body capture. 3663 // Add a .*? at the beginning, outside the body capture.
3621 // Note: We could choose to not add this if the regexp is anchored at 3664 // Note: We could choose to not add this if the regexp is anchored at
3622 // the start of the input but I'm not sure how best to do that and 3665 // the start of the input but I'm not sure how best to do that and
3623 // since we don't even handle ^ yet I'm saving that optimization for 3666 // since we don't even handle ^ yet I'm saving that optimization for
3624 // later. 3667 // later.
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after
3660 EmbeddedVector<byte, 1024> codes; 3703 EmbeddedVector<byte, 1024> codes;
3661 RegExpMacroAssemblerIrregexp macro_assembler(codes); 3704 RegExpMacroAssemblerIrregexp macro_assembler(codes);
3662 return compiler.Assemble(&macro_assembler, 3705 return compiler.Assemble(&macro_assembler,
3663 node, 3706 node,
3664 input->capture_count, 3707 input->capture_count,
3665 pattern); 3708 pattern);
3666 } 3709 }
3667 3710
3668 3711
3669 }} // namespace v8::internal 3712 }} // namespace v8::internal
OLDNEW
« no previous file with comments | « src/interpreter-irregexp.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698