Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(85)

Side by Side Diff: src/jsregexp.cc

Issue 11319: * Add support for positive lookahead assertions and negative... (Closed) Base URL: http://v8.googlecode.com/svn/branches/experimental/regexp2000/
Patch Set: Created 12 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 994 matching lines...) Expand 10 before | Expand all | Expand 10 after
1005 } 1005 }
1006 1006
1007 1007
1008 ActionNode* ActionNode::StorePosition(int reg, RegExpNode* on_success) { 1008 ActionNode* ActionNode::StorePosition(int reg, RegExpNode* on_success) {
1009 ActionNode* result = new ActionNode(STORE_POSITION, on_success); 1009 ActionNode* result = new ActionNode(STORE_POSITION, on_success);
1010 result->data_.u_position_register.reg = reg; 1010 result->data_.u_position_register.reg = reg;
1011 return result; 1011 return result;
1012 } 1012 }
1013 1013
1014 1014
1015 ActionNode* ActionNode::SavePosition(int reg, RegExpNode* on_success) {
1016 ActionNode* result = new ActionNode(SAVE_POSITION, on_success);
1017 result->data_.u_position_register.reg = reg;
1018 return result;
1019 }
1020
1021
1015 ActionNode* ActionNode::RestorePosition(int reg, RegExpNode* on_success) { 1022 ActionNode* ActionNode::RestorePosition(int reg, RegExpNode* on_success) {
1016 ActionNode* result = new ActionNode(RESTORE_POSITION, on_success); 1023 ActionNode* result = new ActionNode(RESTORE_POSITION, on_success);
1017 result->data_.u_position_register.reg = reg; 1024 result->data_.u_position_register.reg = reg;
1018 return result; 1025 return result;
1019 } 1026 }
1020 1027
1021 1028
1022 ActionNode* ActionNode::BeginSubmatch(RegExpNode* on_success) { 1029 ActionNode* ActionNode::BeginSubmatch(int reg, RegExpNode* on_success) {
1023 return new ActionNode(BEGIN_SUBMATCH, on_success); 1030 ActionNode* result = new ActionNode(BEGIN_SUBMATCH, on_success);
1031 result->data_.u_submatch_stack_pointer_register.reg = reg;
1032 return result;
1024 } 1033 }
1025 1034
1026 1035
1027 ActionNode* ActionNode::EscapeSubmatch(RegExpNode* on_success) { 1036 ActionNode* ActionNode::EscapeSubmatch(int reg, RegExpNode* on_success) {
1028 return new ActionNode(ESCAPE_SUBMATCH, on_success); 1037 ActionNode* result = new ActionNode(ESCAPE_SUBMATCH, on_success);
1038 result->data_.u_submatch_stack_pointer_register.reg = reg;
1039 return result;
1029 } 1040 }
1030 1041
1031 1042
1032 ActionNode* ActionNode::EndSubmatch(RegExpNode* on_success) {
1033 return new ActionNode(END_SUBMATCH, on_success);
1034 }
1035
1036
1037 #define DEFINE_ACCEPT(Type) \ 1043 #define DEFINE_ACCEPT(Type) \
1038 void Type##Node::Accept(NodeVisitor* visitor) { \ 1044 void Type##Node::Accept(NodeVisitor* visitor) { \
1039 visitor->Visit##Type(this); \ 1045 visitor->Visit##Type(this); \
1040 } 1046 }
1041 FOR_EACH_NODE_TYPE(DEFINE_ACCEPT) 1047 FOR_EACH_NODE_TYPE(DEFINE_ACCEPT)
1042 #undef DEFINE_ACCEPT 1048 #undef DEFINE_ACCEPT
1043 1049
1044 1050
1045 // ------------------------------------------------------------------- 1051 // -------------------------------------------------------------------
1046 // Emit code. 1052 // Emit code.
(...skipping 24 matching lines...) Expand all
1071 case TextElement::ATOM: { 1077 case TextElement::ATOM: {
1072 Vector<const uc16> quarks = elm.data.u_atom->data(); 1078 Vector<const uc16> quarks = elm.data.u_atom->data();
1073 macro_assembler->CheckCharacters(quarks, 1079 macro_assembler->CheckCharacters(quarks,
1074 cp_offset, 1080 cp_offset,
1075 on_failure_->label()); 1081 on_failure_->label());
1076 cp_offset += quarks.length(); 1082 cp_offset += quarks.length();
1077 break; 1083 break;
1078 } 1084 }
1079 case TextElement::CHAR_CLASS: { 1085 case TextElement::CHAR_CLASS: {
1080 RegExpCharacterClass* cc = elm.data.u_char_class; 1086 RegExpCharacterClass* cc = elm.data.u_char_class;
1081 if (cc->is_negated()) return false;
1082 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure_->label()); 1087 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure_->label());
1083 cp_offset++; 1088 cp_offset++;
1084 1089
1085 ZoneList<CharacterRange>* ranges = cc->ranges(); 1090 ZoneList<CharacterRange>* ranges = cc->ranges();
1086 1091
1087 Label found; 1092 Label success;
1093
1094 Label *char_is_in_class =
1095 cc->is_negated() ? on_failure_->label() : &success;
1088 1096
1089 int range_count = ranges->length(); 1097 int range_count = ranges->length();
1090 1098
1091 if (range_count == 0) { 1099 if (range_count == 0) {
1092 on_failure()->GoTo(compiler); 1100 if (!cc->is_negated()) {
1101 on_failure()->GoTo(compiler);
1102 }
1093 break; 1103 break;
1094 } 1104 }
1095 1105
1096 for (int i = 0; i < range_count - 1; i++) { 1106 for (int i = 0; i < range_count - 1; i++) {
1097 CharacterRange& range = (*ranges)[i]; 1107 CharacterRange& range = (*ranges)[i];
1098 Label next_range; 1108 Label next_range;
1099 uc16 from = range.from(); 1109 uc16 from = range.from();
1100 uc16 to = range.to(); 1110 uc16 to = range.to();
1101 if (from != 0) { 1111 if (to == from) {
1102 macro_assembler->CheckCharacterLT(from, &next_range); 1112 macro_assembler->CheckCharacter(to, char_is_in_class);
1103 }
1104 if (to != 0xffff) {
1105 macro_assembler->CheckCharacterLT(to + 1, &found);
1106 } else { 1113 } else {
1107 macro_assembler->AdvanceCurrentPosition(1); 1114 if (from != 0) {
1108 on_success()->GoTo(compiler); 1115 macro_assembler->CheckCharacterLT(from, &next_range);
1116 }
1117 if (to != 0xffff) {
1118 macro_assembler->CheckCharacterLT(to + 1, char_is_in_class);
1119 } else {
1120 macro_assembler->GoTo(char_is_in_class);
1121 }
1109 } 1122 }
1110 macro_assembler->Bind(&next_range); 1123 macro_assembler->Bind(&next_range);
1111 } 1124 }
1112 1125
1113 CharacterRange& range = (*ranges)[range_count - 1]; 1126 if (range_count != 0) {
1114 uc16 from = range.from(); 1127 CharacterRange& range = (*ranges)[range_count - 1];
1115 uc16 to = range.to(); 1128 uc16 from = range.from();
1116 if (from != 0) { 1129 uc16 to = range.to();
1117 macro_assembler->CheckCharacterLT(from, on_failure_->label()); 1130
1131 if (to == from) {
1132 if (cc->is_negated()) {
1133 macro_assembler->CheckCharacter(to, on_failure_->label());
1134 } else {
1135 macro_assembler->CheckNotCharacter(to, on_failure_->label());
1136 }
1137 } else {
1138 if (from != 0) {
1139 if (!cc->is_negated()) {
1140 macro_assembler->CheckCharacterLT(from, &on_failure_->label());
1141 } else {
1142 macro_assembler->CheckCharacterLT(from, &success);
1143 }
1144 }
1145 if (to != 0xffff) {
1146 if (!cc->is_negated()) {
1147 macro_assembler->CheckCharacterGT(to, on_failure_->label());
1148 } else {
1149 macro_assembler->CheckCharacterLT(to + 1, on_failure_->label());
1150 }
1151 } else {
1152 if (cc->is_negated()) {
1153 macro_assembler->GoTo(on_failure_->label());
1154 }
1155 }
1156 }
1157 } else if (cc->is_negated()) {
1158 macro_assembler->GoTo(on_failure_->label());
1118 } 1159 }
1119 if (to != 0xffff) { 1160
1120 macro_assembler->CheckCharacterGT(to, on_failure_->label()); 1161 macro_assembler->Bind(&success);
1121 } 1162
1122 compiler->AddWork(on_failure_);
1123 macro_assembler->Bind(&found);
1124 break; 1163 break;
1125 } 1164 }
1126 default: 1165 default:
1127 UNREACHABLE(); 1166 UNREACHABLE();
1128 return false; 1167 return false;
1129 } 1168 }
1130 } 1169 }
1170 compiler->AddWork(on_failure_);
1131 macro_assembler->AdvanceCurrentPosition(cp_offset); 1171 macro_assembler->AdvanceCurrentPosition(cp_offset);
1132 return on_success()->GoTo(compiler); 1172 return on_success()->GoTo(compiler);
1133 } 1173 }
1134 1174
1135 1175
1136 bool ChoiceNode::Emit(RegExpCompiler* compiler) { 1176 bool ChoiceNode::Emit(RegExpCompiler* compiler) {
1137 int choice_count = alternatives_->length(); 1177 int choice_count = alternatives_->length();
1138 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); 1178 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
1139 Bind(macro_assembler); 1179 Bind(macro_assembler);
1140 // For now we just call all choices one after the other. The idea ultimately 1180 // For now we just call all choices one after the other. The idea ultimately
(...skipping 24 matching lines...) Expand all
1165 GuardedAlternative alternative = (*alternatives_)[i]; 1205 GuardedAlternative alternative = (*alternatives_)[i];
1166 ZoneList<Guard*>* guards = alternative.guards(); 1206 ZoneList<Guard*>* guards = alternative.guards();
1167 if (guards != NULL) { 1207 if (guards != NULL) {
1168 int guard_count = guards->length(); 1208 int guard_count = guards->length();
1169 for (int j = 0; j < guard_count; j++) { 1209 for (int j = 0; j < guard_count; j++) {
1170 GenerateGuard(macro_assembler, (*guards)[j], on_failure_->label()); 1210 GenerateGuard(macro_assembler, (*guards)[j], on_failure_->label());
1171 } 1211 }
1172 } 1212 }
1173 if (!on_failure_->IsBacktrack()) { 1213 if (!on_failure_->IsBacktrack()) {
1174 macro_assembler->PushBacktrack(on_failure_->label()); 1214 macro_assembler->PushBacktrack(on_failure_->label());
1215 compiler->AddWork(on_failure_);
1175 } 1216 }
1176 if (!alternative.node()->GoTo(compiler)) { 1217 if (!alternative.node()->GoTo(compiler)) {
1177 return false; 1218 return false;
1178 } 1219 }
1179 compiler->AddWork(on_failure_);
1180 return true; 1220 return true;
1181 } 1221 }
1182 1222
1183 1223
1184 bool ActionNode::Emit(RegExpCompiler* compiler) { 1224 bool ActionNode::Emit(RegExpCompiler* compiler) {
1185 RegExpMacroAssembler* macro = compiler->macro_assembler(); 1225 RegExpMacroAssembler* macro = compiler->macro_assembler();
1186 Bind(macro); 1226 Bind(macro);
1187 switch (type_) { 1227 switch (type_) {
1188 case STORE_REGISTER: 1228 case STORE_REGISTER:
1189 macro->SetRegister(data_.u_store_register.reg, 1229 macro->SetRegister(data_.u_store_register.reg,
(...skipping 21 matching lines...) Expand all
1211 bool ok = on_success()->GoTo(compiler); 1251 bool ok = on_success()->GoTo(compiler);
1212 if (!ok) { 1252 if (!ok) {
1213 undo.Unuse(); 1253 undo.Unuse();
1214 return false; 1254 return false;
1215 } 1255 }
1216 macro->Bind(&undo); 1256 macro->Bind(&undo);
1217 macro->PopRegister(data_.u_position_register.reg); 1257 macro->PopRegister(data_.u_position_register.reg);
1218 macro->Backtrack(); 1258 macro->Backtrack();
1219 break; 1259 break;
1220 } 1260 }
1261 case SAVE_POSITION:
1262 macro->WriteCurrentPositionToRegister(
1263 data_.u_position_register.reg);
1264 break;
1221 case RESTORE_POSITION: 1265 case RESTORE_POSITION:
1222 // TODO(erikcorry): Implement this. 1266 macro->SetCurrentPositionFromRegister(
1223 return false; 1267 data_.u_position_register.reg);
1268 break;
1224 case BEGIN_SUBMATCH: 1269 case BEGIN_SUBMATCH:
1225 // TODO(erikcorry): Implement this. 1270 macro->WriteStackPointerToRegister(
1226 return false; 1271 data_.u_submatch_stack_pointer_register.reg);
1272 break;
1227 case ESCAPE_SUBMATCH: 1273 case ESCAPE_SUBMATCH:
1228 // TODO(erikcorry): Implement this. 1274 macro->SetStackPointerFromRegister(
1229 return false; 1275 data_.u_submatch_stack_pointer_register.reg);
1230 case END_SUBMATCH: 1276 break;
1231 // TODO(erikcorry): Implement this.
1232 return false;
1233 default: 1277 default:
1234 UNREACHABLE(); 1278 UNREACHABLE();
1235 return false; 1279 return false;
1236 } 1280 }
1237 return on_success()->GoTo(compiler); 1281 return on_success()->GoTo(compiler);
1238 } 1282 }
1239 1283
1240 1284
1241 // ------------------------------------------------------------------- 1285 // -------------------------------------------------------------------
1242 // Dot/dotty output 1286 // Dot/dotty output
(...skipping 183 matching lines...) Expand 10 before | Expand all | Expand 10 after
1426 that->data_.u_store_register.value); 1470 that->data_.u_store_register.value);
1427 break; 1471 break;
1428 case ActionNode::INCREMENT_REGISTER: 1472 case ActionNode::INCREMENT_REGISTER:
1429 stream()->Add("label=\"$%i++\", shape=octagon", 1473 stream()->Add("label=\"$%i++\", shape=octagon",
1430 that->data_.u_increment_register.reg); 1474 that->data_.u_increment_register.reg);
1431 break; 1475 break;
1432 case ActionNode::STORE_POSITION: 1476 case ActionNode::STORE_POSITION:
1433 stream()->Add("label=\"$%i:=$pos\", shape=octagon", 1477 stream()->Add("label=\"$%i:=$pos\", shape=octagon",
1434 that->data_.u_position_register.reg); 1478 that->data_.u_position_register.reg);
1435 break; 1479 break;
1480 case ActionNode::SAVE_POSITION:
1481 stream()->Add("label=\"$%i:=$pos\", shape=octagon",
1482 that->data_.u_position_register.reg);
1483 break;
1436 case ActionNode::RESTORE_POSITION: 1484 case ActionNode::RESTORE_POSITION:
1437 stream()->Add("label=\"$pos:=$%i\", shape=octagon", 1485 stream()->Add("label=\"$pos:=$%i\", shape=octagon",
1438 that->data_.u_position_register.reg); 1486 that->data_.u_position_register.reg);
1439 break; 1487 break;
1440 case ActionNode::BEGIN_SUBMATCH: 1488 case ActionNode::BEGIN_SUBMATCH:
1441 stream()->Add("label=\"begin\", shape=septagon"); 1489 stream()->Add("label=\"begin\", shape=septagon");
1442 break; 1490 break;
1443 case ActionNode::ESCAPE_SUBMATCH: 1491 case ActionNode::ESCAPE_SUBMATCH:
1444 stream()->Add("label=\"escape\", shape=septagon"); 1492 stream()->Add("label=\"escape\", shape=septagon");
1445 break; 1493 break;
1446 case ActionNode::END_SUBMATCH:
1447 stream()->Add("label=\"end\", shape=septagon");
1448 break;
1449 } 1494 }
1450 stream()->Add("];\n"); 1495 stream()->Add("];\n");
1451 stream()->Add(" n%p -> n%p;\n", that, that->on_success()); 1496 stream()->Add(" n%p -> n%p;\n", that, that->on_success());
1452 Visit(that->on_success()); 1497 Visit(that->on_success());
1453 } 1498 }
1454 1499
1455 1500
1456 class DispatchTableDumper { 1501 class DispatchTableDumper {
1457 public: 1502 public:
1458 explicit DispatchTableDumper(StringStream* stream) : stream_(stream) { } 1503 explicit DispatchTableDumper(StringStream* stream) : stream_(stream) { }
(...skipping 187 matching lines...) Expand 10 before | Expand all | Expand 10 after
1646 RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler, 1691 RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler,
1647 RegExpNode* on_success, 1692 RegExpNode* on_success,
1648 RegExpNode* on_failure) { 1693 RegExpNode* on_failure) {
1649 return on_success; 1694 return on_success;
1650 } 1695 }
1651 1696
1652 1697
1653 RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler, 1698 RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler,
1654 RegExpNode* on_success, 1699 RegExpNode* on_success,
1655 RegExpNode* on_failure) { 1700 RegExpNode* on_failure) {
1701 int stack_pointer_register = compiler->AllocateRegister();
Lasse Reichstein 2008/11/20 10:32:11 You never free a register again. Would it be poss
Erik Corry 2008/11/20 11:37:34 We could free them if we checked for nested expres
1702 int position_register = compiler->AllocateRegister();
1656 if (is_positive()) { 1703 if (is_positive()) {
1657 int position_register = compiler->AllocateRegister();
1658 // begin submatch scope 1704 // begin submatch scope
1659 // $reg = $pos 1705 // $reg = $pos
1660 // if [body] 1706 // if [body]
1661 // then 1707 // then
1662 // $pos = $reg 1708 // $pos = $reg
1663 // escape submatch scope (drop all backtracks created in scope) 1709 // escape submatch scope (drop all backtracks created in scope)
1664 // succeed 1710 // succeed
1665 // else 1711 // else
1666 // end submatch scope (nothing to clean up, just exit the scope) 1712 // end submatch scope (nothing to clean up, just exit the scope)
1667 // fail 1713 // fail
1668 return ActionNode::BeginSubmatch(ActionNode::StorePosition( 1714 return ActionNode::BeginSubmatch(
1669 position_register, body()->ToNode(compiler, 1715 stack_pointer_register,
1670 ActionNode::RestorePosition(position_register, 1716 ActionNode::SavePosition(
1671 ActionNode::EscapeSubmatch(on_success)), 1717 position_register,
1672 ActionNode::EndSubmatch(on_failure)))); 1718 body()->ToNode(
1719 compiler,
1720 ActionNode::RestorePosition(
1721 position_register,
1722 ActionNode::EscapeSubmatch(stack_pointer_register,
1723 on_success)),
1724 on_failure)));
1673 } else { 1725 } else {
1674 // begin submatch scope 1726 // begin submatch scope
1675 // try 1727 // try
1676 // first if (body) 1728 // first if (body)
1677 // then 1729 // then
1678 // escape submatch scope 1730 // escape submatch scope
1679 // fail 1731 // fail
1680 // else 1732 // else
1681 // backtrack 1733 // backtrack
1682 // second 1734 // second
1683 // end submatch scope 1735 // end submatch scope
1736 // restore current position
1684 // succeed 1737 // succeed
1685 ChoiceNode* try_node = 1738 ChoiceNode* try_node =
1686 new ChoiceNode(1, ActionNode::EndSubmatch(on_success)); 1739 new ChoiceNode(1, ActionNode::RestorePosition(position_register,
1687 RegExpNode* body_node = body()->ToNode(compiler, 1740 on_success));
1688 ActionNode::EscapeSubmatch(on_failure), 1741 RegExpNode* body_node = body()->ToNode(
1742 compiler,
1743 ActionNode::EscapeSubmatch(stack_pointer_register, on_failure),
1689 compiler->backtrack()); 1744 compiler->backtrack());
1690 GuardedAlternative body_alt(body_node); 1745 GuardedAlternative body_alt(body_node);
1691 try_node->AddAlternative(body_alt); 1746 try_node->AddAlternative(body_alt);
1692 return ActionNode::BeginSubmatch(try_node); 1747 return ActionNode::BeginSubmatch(stack_pointer_register,
1748 ActionNode::SavePosition(
1749 position_register,
1750 try_node));
1693 } 1751 }
1694 } 1752 }
1695 1753
1696 1754
1697 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler, 1755 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler,
1698 RegExpNode* on_success, 1756 RegExpNode* on_success,
1699 RegExpNode* on_failure) { 1757 RegExpNode* on_failure) {
1700 return ToNode(body(), index(), compiler, on_success, on_failure); 1758 return ToNode(body(), index(), compiler, on_success, on_failure);
1701 } 1759 }
1702 1760
(...skipping 539 matching lines...) Expand 10 before | Expand all | Expand 10 after
2242 } 2300 }
2243 2301
2244 RegExpMacroAssembler::RegExpMacroAssembler() { 2302 RegExpMacroAssembler::RegExpMacroAssembler() {
2245 } 2303 }
2246 2304
2247 RegExpMacroAssembler::~RegExpMacroAssembler() { 2305 RegExpMacroAssembler::~RegExpMacroAssembler() {
2248 } 2306 }
2249 2307
2250 2308
2251 }} // namespace v8::internal 2309 }} // namespace v8::internal
OLDNEW
« src/assembler-re2k.cc ('K') | « src/jsregexp.h ('k') | src/regexp-macro-assembler.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698