Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(415)

Side by Side Diff: src/jsregexp.cc

Issue 10759: Introduce text nodes (Closed)
Patch Set: Fixed repeated RemoveLast issue Created 12 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/jsregexp.h ('k') | src/parser.cc » ('j') | src/parser.cc » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 770 matching lines...) Expand 10 before | Expand all | Expand 10 after
781 Handle<ByteArray> RegExpImpl::Re2kCode(Handle<JSRegExp> re) { 781 Handle<ByteArray> RegExpImpl::Re2kCode(Handle<JSRegExp> re) {
782 FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kRe2kDataIndex)); 782 FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kRe2kDataIndex));
783 return Handle<ByteArray>(ByteArray::cast(value->get(kRe2kCodeIndex))); 783 return Handle<ByteArray>(ByteArray::cast(value->get(kRe2kCodeIndex)));
784 } 784 }
785 785
786 786
787 // ------------------------------------------------------------------- 787 // -------------------------------------------------------------------
788 // New regular expression engine 788 // New regular expression engine
789 789
790 790
791 void RegExpTree::AppendToText(RegExpText* text) {
792 UNREACHABLE();
793 }
794
795
796 void RegExpAtom::AppendToText(RegExpText* text) {
797 text->AddElement(TextElement::Atom(this));
798 }
799
800
801 void RegExpCharacterClass::AppendToText(RegExpText* text) {
802 text->AddElement(TextElement::CharClass(this));
803 }
804
805
806 void RegExpText::AppendToText(RegExpText* text) {
807 for (int i = 0; i < elements()->length(); i++)
808 text->AddElement(elements()->at(i));
809 }
810
811
812 TextElement TextElement::Atom(RegExpAtom* atom) {
813 TextElement result = TextElement(ATOM);
814 result.data.u_atom = atom;
815 return result;
816 }
817
818
819 TextElement TextElement::CharClass(
820 RegExpCharacterClass* char_class) {
821 TextElement result = TextElement(CHAR_CLASS);
822 result.data.u_char_class = char_class;
823 return result;
824 }
825
826
791 class RegExpCompiler { 827 class RegExpCompiler {
792 public: 828 public:
793 explicit RegExpCompiler(int capture_count); 829 explicit RegExpCompiler(int capture_count);
794 830
795 int AllocateRegister() { return next_register_++; } 831 int AllocateRegister() { return next_register_++; }
796 832
797 Handle<FixedArray> Assemble(RegExpMacroAssembler* assembler, 833 Handle<FixedArray> Assemble(RegExpMacroAssembler* assembler,
798 RegExpNode* start, 834 RegExpNode* start,
799 int capture_count, 835 int capture_count,
800 bool case_independent); 836 bool case_independent);
(...skipping 328 matching lines...) Expand 10 before | Expand all | Expand 10 after
1129 class TableEntryHeaderPrinter { 1165 class TableEntryHeaderPrinter {
1130 public: 1166 public:
1131 explicit TableEntryHeaderPrinter(StringStream* stream) 1167 explicit TableEntryHeaderPrinter(StringStream* stream)
1132 : first_(true), stream_(stream) { } 1168 : first_(true), stream_(stream) { }
1133 void Call(uc16 from, DispatchTable::Entry entry) { 1169 void Call(uc16 from, DispatchTable::Entry entry) {
1134 if (first_) { 1170 if (first_) {
1135 first_ = false; 1171 first_ = false;
1136 } else { 1172 } else {
1137 stream()->Add("|"); 1173 stream()->Add("|");
1138 } 1174 }
1139 stream()->Add("{%k-%k|{", from, from, entry.to()); 1175 stream()->Add("{\\%k-\\%k|{", from, entry.to());
1140 OutSet* out_set = entry.out_set(); 1176 OutSet* out_set = entry.out_set();
1141 int priority = 0; 1177 int priority = 0;
1142 for (unsigned i = 0; i < OutSet::kFirstLimit; i++) { 1178 for (unsigned i = 0; i < OutSet::kFirstLimit; i++) {
1143 if (out_set->Get(i)) { 1179 if (out_set->Get(i)) {
1144 if (priority > 0) stream()->Add("|"); 1180 if (priority > 0) stream()->Add("|");
1145 stream()->Add("<s%io%i> %i", from, i, priority); 1181 stream()->Add("<s%io%i> %i", from, i, priority);
1146 priority++; 1182 priority++;
1147 } 1183 }
1148 } 1184 }
1149 stream()->Add("}}"); 1185 stream()->Add("}}");
(...skipping 13 matching lines...) Expand all
1163 TableEntryBodyPrinter body_printer(stream(), that); 1199 TableEntryBodyPrinter body_printer(stream(), that);
1164 that->table()->ForEach(&body_printer); 1200 that->table()->ForEach(&body_printer);
1165 PrintOnFailure(that, that->on_failure()); 1201 PrintOnFailure(that, that->on_failure());
1166 for (int i = 0; i < that->alternatives()->length(); i++) { 1202 for (int i = 0; i < that->alternatives()->length(); i++) {
1167 GuardedAlternative alt = that->alternatives()->at(i); 1203 GuardedAlternative alt = that->alternatives()->at(i);
1168 alt.node()->Accept(this); 1204 alt.node()->Accept(this);
1169 } 1205 }
1170 } 1206 }
1171 1207
1172 1208
1173 void DotPrinter::VisitAtom(AtomNode* that) { 1209 void DotPrinter::VisitText(TextNode* that) {
1174 stream()->Add(" n%p [label=\"'%w'\", shape=doubleoctagon];\n", 1210 stream()->Add(" n%p [label=\"", that);
1175 that, 1211 for (int i = 0; i < that->elements()->length(); i++) {
1176 that->data()); 1212 if (i > 0) stream()->Add(" ");
1213 TextElement elm = that->elements()->at(i);
1214 switch (elm.type) {
1215 case TextElement::ATOM: {
1216 stream()->Add("'%w'", elm.data.u_atom->data());
1217 break;
1218 }
1219 case TextElement::CHAR_CLASS: {
1220 RegExpCharacterClass* node = elm.data.u_char_class;
1221 stream()->Add("[");
1222 if (node->is_negated())
1223 stream()->Add("^");
1224 for (int j = 0; j < node->ranges()->length(); j++) {
1225 CharacterRange range = node->ranges()->at(j);
1226 stream()->Add("%k-%k", range.from(), range.to());
1227 }
1228 stream()->Add("]");
1229 break;
1230 }
1231 default:
1232 UNREACHABLE();
1233 }
1234 }
1235 stream()->Add("\", shape=box, peripheries=2];\n");
1177 stream()->Add(" n%p -> n%p;\n", that, that->on_success()); 1236 stream()->Add(" n%p -> n%p;\n", that, that->on_success());
1178 Visit(that->on_success()); 1237 Visit(that->on_success());
1179 PrintOnFailure(that, that->on_failure()); 1238 PrintOnFailure(that, that->on_failure());
1180 } 1239 }
1181 1240
1182 1241
1183 void DotPrinter::VisitBackreference(BackreferenceNode* that) { 1242 void DotPrinter::VisitBackreference(BackreferenceNode* that) {
1184 stream()->Add(" n%p [label=\"$%i..$%i\", shape=doubleoctagon];\n", 1243 stream()->Add(" n%p [label=\"$%i..$%i\", shape=doubleoctagon];\n",
1185 that, 1244 that,
1186 that->start_register(), 1245 that->start_register(),
1187 that->end_register()); 1246 that->end_register());
1188 stream()->Add(" n%p -> n%p;\n", that, that->on_success()); 1247 stream()->Add(" n%p -> n%p;\n", that, that->on_success());
1189 Visit(that->on_success()); 1248 Visit(that->on_success());
1190 PrintOnFailure(that, that->on_failure()); 1249 PrintOnFailure(that, that->on_failure());
1191 } 1250 }
1192 1251
1193 1252
1194 void DotPrinter::VisitEnd(EndNode* that) { 1253 void DotPrinter::VisitEnd(EndNode* that) {
1195 stream()->Add(" n%p [style=bold, shape=point];\n", that); 1254 stream()->Add(" n%p [style=bold, shape=point];\n", that);
1196 } 1255 }
1197 1256
1198 1257
1199 void DotPrinter::VisitCharacterClass(CharacterClassNode* that) {
1200 stream()->Add(" n%p [label=\"[", that);
1201 if (that->is_negated())
1202 stream()->Add("^");
1203 for (int i = 0; i < that->ranges()->length(); i++) {
1204 CharacterRange range = that->ranges()->at(i);
1205 stream()->Add("%k-%k", range.from(), range.to());
1206 }
1207 stream()->Add("]\"];\n");
1208 stream()->Add(" n%p -> n%p;\n", that, that->on_success());
1209 Visit(that->on_success());
1210 PrintOnFailure(that, that->on_failure());
1211 }
1212
1213
1214 void DotPrinter::VisitAction(ActionNode* that) { 1258 void DotPrinter::VisitAction(ActionNode* that) {
1215 stream()->Add(" n%p [", that); 1259 stream()->Add(" n%p [", that);
1216 switch (that->type_) { 1260 switch (that->type_) {
1217 case ActionNode::STORE_REGISTER: 1261 case ActionNode::STORE_REGISTER:
1218 stream()->Add("label=\"$%i:=%i\", shape=box", 1262 stream()->Add("label=\"$%i:=%i\", shape=octagon",
1219 that->data_.u_store_register.reg, 1263 that->data_.u_store_register.reg,
1220 that->data_.u_store_register.value); 1264 that->data_.u_store_register.value);
1221 break; 1265 break;
1222 case ActionNode::INCREMENT_REGISTER: 1266 case ActionNode::INCREMENT_REGISTER:
1223 stream()->Add("label=\"$%i++\", shape=box", 1267 stream()->Add("label=\"$%i++\", shape=octagon",
1224 that->data_.u_increment_register.reg); 1268 that->data_.u_increment_register.reg);
1225 break; 1269 break;
1226 case ActionNode::STORE_POSITION: 1270 case ActionNode::STORE_POSITION:
1227 stream()->Add("label=\"$%i:=$pos\", shape=box", 1271 stream()->Add("label=\"$%i:=$pos\", shape=octagon",
1228 that->data_.u_position_register.reg); 1272 that->data_.u_position_register.reg);
1229 break; 1273 break;
1230 case ActionNode::RESTORE_POSITION: 1274 case ActionNode::RESTORE_POSITION:
1231 stream()->Add("label=\"$pos:=$%i\", shape=box", 1275 stream()->Add("label=\"$pos:=$%i\", shape=octagon",
1232 that->data_.u_position_register.reg); 1276 that->data_.u_position_register.reg);
1233 break; 1277 break;
1234 case ActionNode::BEGIN_SUBMATCH: 1278 case ActionNode::BEGIN_SUBMATCH:
1235 stream()->Add("label=\"begin\", shape=septagon"); 1279 stream()->Add("label=\"begin\", shape=septagon");
1236 break; 1280 break;
1237 case ActionNode::ESCAPE_SUBMATCH: 1281 case ActionNode::ESCAPE_SUBMATCH:
1238 stream()->Add("label=\"escape\", shape=septagon"); 1282 stream()->Add("label=\"escape\", shape=septagon");
1239 break; 1283 break;
1240 case ActionNode::END_SUBMATCH: 1284 case ActionNode::END_SUBMATCH:
1241 stream()->Add("label=\"end\", shape=septagon"); 1285 stream()->Add("label=\"end\", shape=septagon");
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after
1293 #endif // DEBUG 1337 #endif // DEBUG
1294 1338
1295 1339
1296 // ------------------------------------------------------------------- 1340 // -------------------------------------------------------------------
1297 // Tree to graph conversion 1341 // Tree to graph conversion
1298 1342
1299 1343
1300 RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler, 1344 RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler,
1301 RegExpNode* on_success, 1345 RegExpNode* on_success,
1302 RegExpNode* on_failure) { 1346 RegExpNode* on_failure) {
1303 return new AtomNode(data(), on_success, on_failure); 1347 ZoneList<TextElement>* elms = new ZoneList<TextElement>(1);
1348 elms->Add(TextElement::Atom(this));
1349 return new TextNode(elms, on_success, on_failure);
1350 }
1351
1352
1353 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler,
1354 RegExpNode* on_success,
1355 RegExpNode* on_failure) {
1356 return new TextNode(elements(), on_success, on_failure);
1304 } 1357 }
1305 1358
1306 1359
1307 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, 1360 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
1308 RegExpNode* on_success, 1361 RegExpNode* on_success,
1309 RegExpNode* on_failure) { 1362 RegExpNode* on_failure) {
1310 return new CharacterClassNode(ranges(), 1363 ZoneList<TextElement>* elms = new ZoneList<TextElement>(1);
1311 is_negated(), 1364 elms->Add(TextElement::CharClass(this));
1312 on_success, 1365 return new TextNode(elms, on_success, on_failure);
1313 on_failure);
1314 } 1366 }
1315 1367
1316 1368
1317 RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler, 1369 RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler,
1318 RegExpNode* on_success, 1370 RegExpNode* on_success,
1319 RegExpNode* on_failure) { 1371 RegExpNode* on_failure) {
1320 ZoneList<RegExpTree*>* alternatives = this->alternatives(); 1372 ZoneList<RegExpTree*>* alternatives = this->alternatives();
1321 int length = alternatives->length(); 1373 int length = alternatives->length();
1322 ChoiceNode* result = new ChoiceNode(length, on_failure); 1374 ChoiceNode* result = new ChoiceNode(length, on_failure);
1323 for (int i = 0; i < length; i++) { 1375 for (int i = 0; i < length; i++) {
(...skipping 235 matching lines...) Expand 10 before | Expand all | Expand 10 after
1559 case 'W': 1611 case 'W':
1560 AddClassNegated(kWordRanges, kWordRangeCount, ranges); 1612 AddClassNegated(kWordRanges, kWordRangeCount, ranges);
1561 break; 1613 break;
1562 case 'd': 1614 case 'd':
1563 AddClass(kDigitRanges, kDigitRangeCount, ranges); 1615 AddClass(kDigitRanges, kDigitRangeCount, ranges);
1564 break; 1616 break;
1565 case 'D': 1617 case 'D':
1566 AddClassNegated(kDigitRanges, kDigitRangeCount, ranges); 1618 AddClassNegated(kDigitRanges, kDigitRangeCount, ranges);
1567 break; 1619 break;
1568 case '.': 1620 case '.':
1569 ranges->Add(CharacterRange(0x0000, 0xFFFF)); 1621 ranges->Add(CharacterRange::Everything());
1570 break; 1622 break;
1571 default: 1623 default:
1572 UNREACHABLE(); 1624 UNREACHABLE();
1573 } 1625 }
1574 } 1626 }
1575 1627
1576 1628
1577 // ------------------------------------------------------------------- 1629 // -------------------------------------------------------------------
1578 // Splay tree 1630 // Splay tree
1579 1631
(...skipping 155 matching lines...) Expand 10 before | Expand all | Expand 10 after
1735 that->info()->being_analyzed = false; 1787 that->info()->being_analyzed = false;
1736 that->info()->been_analyzed = true; 1788 that->info()->been_analyzed = true;
1737 } 1789 }
1738 1790
1739 1791
1740 void Analysis::VisitEnd(EndNode* that) { 1792 void Analysis::VisitEnd(EndNode* that) {
1741 // nothing to do 1793 // nothing to do
1742 } 1794 }
1743 1795
1744 1796
1745 void Analysis::VisitAtom(AtomNode* that) { 1797 void Analysis::VisitText(TextNode* that) {
1746 EnsureAnalyzed(that->on_success()); 1798 EnsureAnalyzed(that->on_success());
1747 EnsureAnalyzed(that->on_failure()); 1799 EnsureAnalyzed(that->on_failure());
1748 } 1800 }
1749 1801
1750 1802
1751 void Analysis::VisitAction(ActionNode* that) { 1803 void Analysis::VisitAction(ActionNode* that) {
1752 RegExpNode* next = that->on_success(); 1804 RegExpNode* next = that->on_success();
1753 EnsureAnalyzed(next); 1805 EnsureAnalyzed(next);
1754 that->info()->propagate_line = next->info()->propagate_line; 1806 that->info()->propagate_line = next->info()->propagate_line;
1755 that->info()->propagate_word = next->info()->propagate_word; 1807 that->info()->propagate_word = next->info()->propagate_word;
(...skipping 15 matching lines...) Expand all
1771 EnsureAnalyzed(that->on_failure()); 1823 EnsureAnalyzed(that->on_failure());
1772 } 1824 }
1773 1825
1774 1826
1775 void Analysis::VisitBackreference(BackreferenceNode* that) { 1827 void Analysis::VisitBackreference(BackreferenceNode* that) {
1776 EnsureAnalyzed(that->on_success()); 1828 EnsureAnalyzed(that->on_success());
1777 EnsureAnalyzed(that->on_failure()); 1829 EnsureAnalyzed(that->on_failure());
1778 } 1830 }
1779 1831
1780 1832
1781 void Analysis::VisitCharacterClass(CharacterClassNode* that) {
1782 EnsureAnalyzed(that->on_success());
1783 EnsureAnalyzed(that->on_failure());
1784 }
1785
1786
1787 // ------------------------------------------------------------------- 1833 // -------------------------------------------------------------------
1788 // Dispatch table construction 1834 // Dispatch table construction
1789 1835
1790 1836
1791 void DispatchTableConstructor::VisitEnd(EndNode* that) { 1837 void DispatchTableConstructor::VisitEnd(EndNode* that) {
1792 // nothing to do 1838 AddRange(CharacterRange::Everything());
1793 } 1839 }
1794 1840
1795 1841
1796 void DispatchTableConstructor::BuildTable(ChoiceNode* node) { 1842 void DispatchTableConstructor::BuildTable(ChoiceNode* node) {
1797 ASSERT(!node->table_calculated()); 1843 ASSERT(!node->table_calculated());
1798 node->set_being_calculated(true); 1844 node->set_being_calculated(true);
1799 ZoneList<GuardedAlternative>* alternatives = node->alternatives(); 1845 ZoneList<GuardedAlternative>* alternatives = node->alternatives();
1800 for (int i = 0; i < alternatives->length(); i++) { 1846 for (int i = 0; i < alternatives->length(); i++) {
1801 set_choice_index(i); 1847 set_choice_index(i);
1802 alternatives->at(i).node()->Accept(this); 1848 alternatives->at(i).node()->Accept(this);
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after
1859 return; 1905 return;
1860 } else { 1906 } else {
1861 last = range.to() + 1; 1907 last = range.to() + 1;
1862 } 1908 }
1863 } 1909 }
1864 } 1910 }
1865 AddRange(CharacterRange(last, 0xFFFF)); 1911 AddRange(CharacterRange(last, 0xFFFF));
1866 } 1912 }
1867 1913
1868 1914
1869 void DispatchTableConstructor::VisitCharacterClass(CharacterClassNode* that) { 1915 void DispatchTableConstructor::VisitText(TextNode* that) {
1870 ZoneList<CharacterRange>* ranges = that->ranges(); 1916 TextElement elm = that->elements()->at(0);
1871 if (that->is_negated()) { 1917 switch (elm.type) {
1872 AddInverse(ranges); 1918 case TextElement::ATOM: {
1873 } else { 1919 uc16 c = elm.data.u_atom->data()[0];
1874 for (int i = 0; i < ranges->length(); i++) { 1920 AddRange(CharacterRange(c, c));
1875 AddRange(ranges->at(i)); 1921 break;
1922 }
1923 case TextElement::CHAR_CLASS: {
1924 RegExpCharacterClass* tree = elm.data.u_char_class;
1925 ZoneList<CharacterRange>* ranges = tree->ranges();
1926 if (tree->is_negated()) {
1927 AddInverse(ranges);
1928 } else {
1929 for (int i = 0; i < ranges->length(); i++)
1930 AddRange(ranges->at(i));
1931 }
1932 break;
1933 }
1934 default: {
1935 UNIMPLEMENTED();
1876 } 1936 }
1877 } 1937 }
1878 } 1938 }
1879 1939
1880 1940
1881 void DispatchTableConstructor::VisitAtom(AtomNode* that) {
1882 uc16 c = that->data()[0];
1883 AddRange(CharacterRange(c, c));
1884 }
1885
1886
1887 void DispatchTableConstructor::VisitAction(ActionNode* that) { 1941 void DispatchTableConstructor::VisitAction(ActionNode* that) {
1888 that->on_success()->Accept(this); 1942 that->on_success()->Accept(this);
1889 } 1943 }
1890 1944
1891 1945
1892 Handle<FixedArray> RegExpEngine::Compile(RegExpParseResult* input, 1946 Handle<FixedArray> RegExpEngine::Compile(RegExpParseResult* input,
1893 RegExpNode** node_return, 1947 RegExpNode** node_return,
1894 bool ignore_case) { 1948 bool ignore_case) {
1895 RegExpCompiler compiler(input->capture_count); 1949 RegExpCompiler compiler(input->capture_count);
1896 // Wrap the body of the regexp in capture #0. 1950 // Wrap the body of the regexp in capture #0.
(...skipping 27 matching lines...) Expand all
1924 } 1978 }
1925 1979
1926 RegExpMacroAssembler::RegExpMacroAssembler() { 1980 RegExpMacroAssembler::RegExpMacroAssembler() {
1927 } 1981 }
1928 1982
1929 RegExpMacroAssembler::~RegExpMacroAssembler() { 1983 RegExpMacroAssembler::~RegExpMacroAssembler() {
1930 } 1984 }
1931 1985
1932 1986
1933 }} // namespace v8::internal 1987 }} // namespace v8::internal
OLDNEW
« no previous file with comments | « src/jsregexp.h ('k') | src/parser.cc » ('j') | src/parser.cc » ('J')

Powered by Google App Engine
This is Rietveld 408576698