Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(288)

Side by Side Diff: src/jsregexp.cc

Issue 11352: * Match literals in a case independent way.... (Closed) Base URL: http://v8.googlecode.com/svn/branches/experimental/regexp2000/
Patch Set: '' Created 12 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/interpreter-re2k.cc ('k') | src/regexp-macro-assembler.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 857 matching lines...) Expand 10 before | Expand all | Expand 10 after
868 TextElement TextElement::CharClass( 868 TextElement TextElement::CharClass(
869 RegExpCharacterClass* char_class) { 869 RegExpCharacterClass* char_class) {
870 TextElement result = TextElement(CHAR_CLASS); 870 TextElement result = TextElement(CHAR_CLASS);
871 result.data.u_char_class = char_class; 871 result.data.u_char_class = char_class;
872 return result; 872 return result;
873 } 873 }
874 874
875 875
876 class RegExpCompiler { 876 class RegExpCompiler {
877 public: 877 public:
878 explicit RegExpCompiler(int capture_count); 878 RegExpCompiler(int capture_count, bool ignore_case);
879 879
880 int AllocateRegister() { return next_register_++; } 880 int AllocateRegister() { return next_register_++; }
881 881
882 Handle<FixedArray> Assemble(RegExpMacroAssembler* assembler, 882 Handle<FixedArray> Assemble(RegExpMacroAssembler* assembler,
883 RegExpNode* start, 883 RegExpNode* start,
884 int capture_count, 884 int capture_count);
885 bool case_independent);
886 885
887 inline void AddWork(RegExpNode* node) { work_list_->Add(node); } 886 inline void AddWork(RegExpNode* node) { work_list_->Add(node); }
888 887
889 static const int kImplementationOffset = 0; 888 static const int kImplementationOffset = 0;
890 static const int kNumberOfRegistersOffset = 0; 889 static const int kNumberOfRegistersOffset = 0;
891 static const int kCodeOffset = 1; 890 static const int kCodeOffset = 1;
892 891
893 RegExpMacroAssembler* macro_assembler() { return macro_assembler_; } 892 RegExpMacroAssembler* macro_assembler() { return macro_assembler_; }
894 EndNode* accept() { return accept_; } 893 EndNode* accept() { return accept_; }
895 EndNode* backtrack() { return backtrack_; } 894 EndNode* backtrack() { return backtrack_; }
896 895
897 static const int kMaxRecursion = 100; 896 static const int kMaxRecursion = 100;
898 inline int recursion_depth() { return recursion_depth_; } 897 inline int recursion_depth() { return recursion_depth_; }
899 inline void IncrementRecursionDepth() { recursion_depth_++; } 898 inline void IncrementRecursionDepth() { recursion_depth_++; }
900 inline void DecrementRecursionDepth() { recursion_depth_--; } 899 inline void DecrementRecursionDepth() { recursion_depth_--; }
901 900
901 inline bool is_case_independent() { return is_case_independent_; }
902
902 private: 903 private:
903 EndNode* accept_; 904 EndNode* accept_;
904 EndNode* backtrack_; 905 EndNode* backtrack_;
905 int next_register_; 906 int next_register_;
906 List<RegExpNode*>* work_list_; 907 List<RegExpNode*>* work_list_;
907 int recursion_depth_; 908 int recursion_depth_;
908 RegExpMacroAssembler* macro_assembler_; 909 RegExpMacroAssembler* macro_assembler_;
910 bool is_case_independent_;
909 }; 911 };
910 912
911 913
912 // Attempts to compile the regexp using a Regexp2000 code generator. Returns 914 // Attempts to compile the regexp using a Regexp2000 code generator. Returns
913 // a fixed array or a null handle depending on whether it succeeded. 915 // a fixed array or a null handle depending on whether it succeeded.
914 RegExpCompiler::RegExpCompiler(int capture_count) 916 RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case)
915 : next_register_(2 * (capture_count + 1)), 917 : next_register_(2 * (capture_count + 1)),
916 work_list_(NULL), 918 work_list_(NULL),
917 recursion_depth_(0) { 919 recursion_depth_(0),
920 is_case_independent_(ignore_case) {
918 accept_ = new EndNode(EndNode::ACCEPT); 921 accept_ = new EndNode(EndNode::ACCEPT);
919 backtrack_ = new EndNode(EndNode::BACKTRACK); 922 backtrack_ = new EndNode(EndNode::BACKTRACK);
920 } 923 }
921 924
922 925
923 Handle<FixedArray> RegExpCompiler::Assemble( 926 Handle<FixedArray> RegExpCompiler::Assemble(
924 RegExpMacroAssembler* macro_assembler, 927 RegExpMacroAssembler* macro_assembler,
925 RegExpNode* start, 928 RegExpNode* start,
926 int capture_count, 929 int capture_count) {
927 bool case_independent) { 930 if (!FLAG_attempt_case_independent && is_case_independent_) {
928 if (case_independent) return Handle<FixedArray>::null(); 931 return Handle<FixedArray>::null();
932 }
929 macro_assembler_ = macro_assembler; 933 macro_assembler_ = macro_assembler;
930 List <RegExpNode*> work_list(0); 934 List <RegExpNode*> work_list(0);
931 work_list_ = &work_list; 935 work_list_ = &work_list;
932 Label fail; 936 Label fail;
933 macro_assembler->PushBacktrack(&fail); 937 macro_assembler->PushBacktrack(&fail);
934 if (!start->GoTo(this)) { 938 if (!start->GoTo(this)) {
935 fail.Unuse(); 939 fail.Unuse();
936 return Handle<FixedArray>::null(); 940 return Handle<FixedArray>::null();
937 } 941 }
938 while (!work_list.is_empty()) { 942 while (!work_list.is_empty()) {
(...skipping 164 matching lines...) Expand 10 before | Expand all | Expand 10 after
1103 case Guard::LT: 1107 case Guard::LT:
1104 macro_assembler->IfRegisterGE(guard->reg(), guard->value(), on_failure); 1108 macro_assembler->IfRegisterGE(guard->reg(), guard->value(), on_failure);
1105 break; 1109 break;
1106 case Guard::GEQ: 1110 case Guard::GEQ:
1107 macro_assembler->IfRegisterLT(guard->reg(), guard->value(), on_failure); 1111 macro_assembler->IfRegisterLT(guard->reg(), guard->value(), on_failure);
1108 break; 1112 break;
1109 } 1113 }
1110 } 1114 }
1111 1115
1112 1116
1117 static unibrow::Mapping<unibrow::Ecma262UnCanonicalize> uncanonicalize;
1118 static unibrow::Mapping<unibrow::CanonicalizationRange> canonrange;
1119
1120
1121 static inline void EmitAtomNonLetters(
1122 RegExpMacroAssembler* macro_assembler,
1123 TextElement elm,
1124 Vector<const uc16> quarks,
1125 Label* on_failure,
1126 int cp_offset) {
1127 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1128 for (int i = quarks.length() - 1; i >= 0; i--) {
1129 uc16 c = quarks[i];
1130 int length = uncanonicalize.get(c, '\0', chars);
1131 if (length <= 1) {
1132 macro_assembler->LoadCurrentCharacter(cp_offset + i, on_failure);
1133 macro_assembler->CheckNotCharacter(c, on_failure);
1134 }
1135 }
1136 }
1137
1138
1139 static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,
1140 uc16 c1,
1141 uc16 c2,
1142 Label* on_failure) {
1143 uc16 exor = c1 ^ c2;
1144 // Check whether exor has only one bit set.
1145 if (((exor - 1) & exor) == 0) {
1146 // If c1 and c2 differ only by one bit.
1147 // Ecma262UnCanonicalize always gives the highest number last.
1148 ASSERT(c2 > c1);
1149 macro_assembler->CheckNotCharacterAfterOr(c2, exor, on_failure);
1150 return true;
1151 } else {
1152 ASSERT(c2 > c1);
1153 uc16 diff = c2 - c1;
1154 if (((diff - 1) & diff) == 0 && c1 >= diff) {
1155 // If the characters differ by 2^n but don't differ by one bit then
1156 // subtract the difference from the found character, then do the or
1157 // trick. We avoid the theoretical case where negative numbers are
1158 // involved in order to simplify code generation.
1159 macro_assembler->CheckNotCharacterAfterMinusOr(c2 - diff,
1160 diff,
1161 on_failure);
1162 return true;
1163 }
1164 }
1165 return false;
1166 }
1167
1168
1169 static inline void EmitAtomLetters(
1170 RegExpMacroAssembler* macro_assembler,
1171 TextElement elm,
1172 Vector<const uc16> quarks,
1173 Label* on_failure,
1174 int cp_offset) {
1175 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1176 for (int i = quarks.length() - 1; i >= 0; i--) {
1177 uc16 c = quarks[i];
1178 int length = uncanonicalize.get(c, '\0', chars);
1179 if (length <= 1) continue;
1180 macro_assembler->LoadCurrentCharacter(cp_offset + i, on_failure);
1181 Label ok;
1182 ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4);
1183 switch (length) {
1184 case 2: {
1185 if (ShortCutEmitCharacterPair(macro_assembler,
1186 chars[0],
1187 chars[1],
1188 on_failure)) {
1189 ok.Unuse();
1190 } else {
1191 macro_assembler->CheckCharacter(chars[0], &ok);
1192 macro_assembler->CheckNotCharacter(chars[1], on_failure);
1193 macro_assembler->Bind(&ok);
1194 }
1195 break;
1196 }
1197 case 4:
1198 macro_assembler->CheckCharacter(chars[3], &ok);
1199 // Fall through!
1200 case 3:
1201 macro_assembler->CheckCharacter(chars[0], &ok);
1202 macro_assembler->CheckCharacter(chars[1], &ok);
1203 macro_assembler->CheckNotCharacter(chars[2], on_failure);
1204 macro_assembler->Bind(&ok);
1205 break;
1206 default:
1207 UNREACHABLE();
1208 break;
1209 }
1210 }
1211 }
1212
1213
1214 static void EmitCharClass(RegExpMacroAssembler* macro_assembler,
1215 RegExpCharacterClass* cc,
1216 int cp_offset,
1217 Label* on_failure) {
1218 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure);
1219 cp_offset++;
1220
1221 ZoneList<CharacterRange>* ranges = cc->ranges();
1222
1223 Label success;
1224
1225 Label *char_is_in_class =
1226 cc->is_negated() ? on_failure : &success;
1227
1228 int range_count = ranges->length();
1229
1230 if (range_count == 0) {
1231 if (!cc->is_negated()) {
1232 macro_assembler->GoTo(on_failure);
1233 }
1234 return;
1235 }
1236
1237 for (int i = 0; i < range_count - 1; i++) {
1238 CharacterRange& range = ranges->at(i);
1239 Label next_range;
1240 uc16 from = range.from();
1241 uc16 to = range.to();
1242 if (to == from) {
1243 macro_assembler->CheckCharacter(to, char_is_in_class);
1244 } else {
1245 if (from != 0) {
1246 macro_assembler->CheckCharacterLT(from, &next_range);
1247 }
1248 if (to != 0xffff) {
1249 macro_assembler->CheckCharacterLT(to + 1, char_is_in_class);
1250 } else {
1251 macro_assembler->GoTo(char_is_in_class);
1252 }
1253 }
1254 macro_assembler->Bind(&next_range);
1255 }
1256
1257 CharacterRange& range = ranges->at(range_count - 1);
1258 uc16 from = range.from();
1259 uc16 to = range.to();
1260
1261 if (to == from) {
1262 if (cc->is_negated()) {
1263 macro_assembler->CheckCharacter(to, on_failure);
1264 } else {
1265 macro_assembler->CheckNotCharacter(to, on_failure);
1266 }
1267 } else {
1268 if (from != 0) {
1269 if (!cc->is_negated()) {
1270 macro_assembler->CheckCharacterLT(from, on_failure);
1271 } else {
1272 macro_assembler->CheckCharacterLT(from, &success);
1273 }
1274 }
1275 if (to != 0xffff) {
1276 if (!cc->is_negated()) {
1277 macro_assembler->CheckCharacterGT(to, on_failure);
1278 } else {
1279 macro_assembler->CheckCharacterLT(to + 1, on_failure);
1280 }
1281 } else {
1282 if (cc->is_negated()) {
1283 macro_assembler->GoTo(on_failure);
1284 }
1285 }
1286 }
1287 macro_assembler->Bind(&success);
1288 }
1289
1290
1291
1113 bool TextNode::Emit(RegExpCompiler* compiler) { 1292 bool TextNode::Emit(RegExpCompiler* compiler) {
1114 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); 1293 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
1115 Bind(macro_assembler); 1294 Bind(macro_assembler);
1116 int element_count = elms_->length(); 1295 int element_count = elms_->length();
1117 int cp_offset = 0; 1296 int cp_offset = 0;
1297 // First, handle straight character matches.
1118 for (int i = 0; i < element_count; i++) { 1298 for (int i = 0; i < element_count; i++) {
1119 TextElement elm = (*elms_)[i]; 1299 TextElement elm = elms_->at(i);
1120 switch (elm.type) { 1300 if (elm.type == TextElement::ATOM) {
1121 case TextElement::ATOM: { 1301 Vector<const uc16> quarks = elm.data.u_atom->data();
1122 Vector<const uc16> quarks = elm.data.u_atom->data(); 1302 if (!compiler->is_case_independent()) {
1123 macro_assembler->CheckCharacters(quarks, 1303 macro_assembler->CheckCharacters(quarks,
1124 cp_offset, 1304 cp_offset,
1125 on_failure_->label()); 1305 on_failure_->label());
1306 } else {
1307 EmitAtomNonLetters(macro_assembler, elm, quarks, on_failure_->label(), c p_offset);
1308 }
1309 cp_offset += quarks.length();
1310 } else {
1311 ASSERT_EQ(elm.type, TextElement::CHAR_CLASS);
1312 cp_offset++;
1313 }
1314 }
1315 // Second, handle case independent letter matches if any.
1316 if (compiler->is_case_independent()) {
1317 cp_offset = 0;
1318 for (int i = 0; i < element_count; i++) {
1319 TextElement elm = elms_->at(i);
1320 if (elm.type == TextElement::ATOM) {
1321 Vector<const uc16> quarks = elm.data.u_atom->data();
1322 EmitAtomLetters(macro_assembler, elm, quarks, on_failure_->label(), cp_o ffset);
1126 cp_offset += quarks.length(); 1323 cp_offset += quarks.length();
1127 break; 1324 } else {
1128 }
1129 case TextElement::CHAR_CLASS: {
1130 RegExpCharacterClass* cc = elm.data.u_char_class;
1131 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure_->label());
1132 cp_offset++; 1325 cp_offset++;
1133 1326 }
1134 ZoneList<CharacterRange>* ranges = cc->ranges(); 1327 }
1135 1328 }
1136 Label success; 1329 // If the fast character matches passed then do the character classes.
1137 1330 cp_offset = 0;
1138 Label *char_is_in_class = 1331 for (int i = 0; i < element_count; i++) {
1139 cc->is_negated() ? on_failure_->label() : &success; 1332 TextElement elm = elms_->at(i);
1140 1333 if (elm.type == TextElement::CHAR_CLASS) {
1141 int range_count = ranges->length(); 1334 RegExpCharacterClass* cc = elm.data.u_char_class;
1142 1335 EmitCharClass(macro_assembler, cc, cp_offset, on_failure_->label());
1143 if (range_count == 0) { 1336 cp_offset ++;
1144 if (!cc->is_negated()) { 1337 } else {
1145 on_failure()->GoTo(compiler); 1338 cp_offset += elm.data.u_atom->data().length();
1146 } 1339 }
1147 break; 1340 }
1148 } 1341
1149
1150 for (int i = 0; i < range_count - 1; i++) {
1151 CharacterRange& range = (*ranges)[i];
1152 Label next_range;
1153 uc16 from = range.from();
1154 uc16 to = range.to();
1155 if (to == from) {
1156 macro_assembler->CheckCharacter(to, char_is_in_class);
1157 } else {
1158 if (from != 0) {
1159 macro_assembler->CheckCharacterLT(from, &next_range);
1160 }
1161 if (to != 0xffff) {
1162 macro_assembler->CheckCharacterLT(to + 1, char_is_in_class);
1163 } else {
1164 macro_assembler->GoTo(char_is_in_class);
1165 }
1166 }
1167 macro_assembler->Bind(&next_range);
1168 }
1169
1170 if (range_count != 0) {
1171 CharacterRange& range = (*ranges)[range_count - 1];
1172 uc16 from = range.from();
1173 uc16 to = range.to();
1174
1175 if (to == from) {
1176 if (cc->is_negated()) {
1177 macro_assembler->CheckCharacter(to, on_failure_->label());
1178 } else {
1179 macro_assembler->CheckNotCharacter(to, on_failure_->label());
1180 }
1181 } else {
1182 if (from != 0) {
1183 if (!cc->is_negated()) {
1184 macro_assembler->CheckCharacterLT(from, on_failure_->label());
1185 } else {
1186 macro_assembler->CheckCharacterLT(from, &success);
1187 }
1188 }
1189 if (to != 0xffff) {
1190 if (!cc->is_negated()) {
1191 macro_assembler->CheckCharacterGT(to, on_failure_->label());
1192 } else {
1193 macro_assembler->CheckCharacterLT(to + 1, on_failure_->label());
1194 }
1195 } else {
1196 if (cc->is_negated()) {
1197 macro_assembler->GoTo(on_failure_->label());
1198 }
1199 }
1200 }
1201 } else if (cc->is_negated()) {
1202 macro_assembler->GoTo(on_failure_->label());
1203 }
1204
1205 macro_assembler->Bind(&success);
1206
1207 break;
1208 }
1209 default:
1210 UNREACHABLE();
1211 return false;
1212 }
1213 }
1214 compiler->AddWork(on_failure_); 1342 compiler->AddWork(on_failure_);
1215 macro_assembler->AdvanceCurrentPosition(cp_offset); 1343 macro_assembler->AdvanceCurrentPosition(cp_offset);
1216 return on_success()->GoTo(compiler); 1344 return on_success()->GoTo(compiler);
1217 } 1345 }
1218 1346
1219 1347
1220 bool ChoiceNode::Emit(RegExpCompiler* compiler) { 1348 bool ChoiceNode::Emit(RegExpCompiler* compiler) {
1221 int choice_count = alternatives_->length(); 1349 int choice_count = alternatives_->length();
1222 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); 1350 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
1223 Bind(macro_assembler); 1351 Bind(macro_assembler);
1224 // For now we just call all choices one after the other. The idea ultimately 1352 // For now we just call all choices one after the other. The idea ultimately
1225 // is to use the Dispatch table to try only the relevant ones. 1353 // is to use the Dispatch table to try only the relevant ones.
1226 int i; 1354 int i;
1227 for (i = 0; i < choice_count - 1; i++) { 1355 for (i = 0; i < choice_count - 1; i++) {
1228 GuardedAlternative alternative = (*alternatives_)[i]; 1356 GuardedAlternative alternative = alternatives_->at(i);
1229 Label after; 1357 Label after;
1230 Label after_no_pop_cp; 1358 Label after_no_pop_cp;
1231 ZoneList<Guard*>* guards = alternative.guards(); 1359 ZoneList<Guard*>* guards = alternative.guards();
1232 if (guards != NULL) { 1360 if (guards != NULL) {
1233 int guard_count = guards->length(); 1361 int guard_count = guards->length();
1234 for (int j = 0; j < guard_count; j++) { 1362 for (int j = 0; j < guard_count; j++) {
1235 GenerateGuard(macro_assembler, (*guards)[j], &after_no_pop_cp); 1363 GenerateGuard(macro_assembler, guards->at(j), &after_no_pop_cp);
1236 } 1364 }
1237 } 1365 }
1238 macro_assembler->PushCurrentPosition(); 1366 macro_assembler->PushCurrentPosition();
1239 macro_assembler->PushBacktrack(&after); 1367 macro_assembler->PushBacktrack(&after);
1240 if (!alternative.node()->GoTo(compiler)) { 1368 if (!alternative.node()->GoTo(compiler)) {
1241 after.Unuse(); 1369 after.Unuse();
1242 after_no_pop_cp.Unuse(); 1370 after_no_pop_cp.Unuse();
1243 return false; 1371 return false;
1244 } 1372 }
1245 macro_assembler->Bind(&after); 1373 macro_assembler->Bind(&after);
1246 macro_assembler->PopCurrentPosition(); 1374 macro_assembler->PopCurrentPosition();
1247 macro_assembler->Bind(&after_no_pop_cp); 1375 macro_assembler->Bind(&after_no_pop_cp);
1248 } 1376 }
1249 GuardedAlternative alternative = (*alternatives_)[i]; 1377 GuardedAlternative alternative = alternatives_->at(i);
1250 ZoneList<Guard*>* guards = alternative.guards(); 1378 ZoneList<Guard*>* guards = alternative.guards();
1251 if (guards != NULL) { 1379 if (guards != NULL) {
1252 int guard_count = guards->length(); 1380 int guard_count = guards->length();
1253 for (int j = 0; j < guard_count; j++) { 1381 for (int j = 0; j < guard_count; j++) {
1254 GenerateGuard(macro_assembler, (*guards)[j], on_failure_->label()); 1382 GenerateGuard(macro_assembler, guards->at(j), on_failure_->label());
1255 } 1383 }
1256 } 1384 }
1257 if (!on_failure_->IsBacktrack()) { 1385 if (!on_failure_->IsBacktrack()) {
1258 ASSERT_NOT_NULL(on_failure_ -> label()); 1386 ASSERT_NOT_NULL(on_failure_ -> label());
1259 macro_assembler->PushBacktrack(on_failure_->label()); 1387 macro_assembler->PushBacktrack(on_failure_->label());
1260 compiler->AddWork(on_failure_); 1388 compiler->AddWork(on_failure_);
1261 } 1389 }
1262 if (!alternative.node()->GoTo(compiler)) { 1390 if (!alternative.node()->GoTo(compiler)) {
1263 return false; 1391 return false;
1264 } 1392 }
(...skipping 660 matching lines...) Expand 10 before | Expand all | Expand 10 after
1925 // character. 2053 // character.
1926 case '*': 2054 case '*':
1927 ranges->Add(CharacterRange::Everything()); 2055 ranges->Add(CharacterRange::Everything());
1928 break; 2056 break;
1929 default: 2057 default:
1930 UNREACHABLE(); 2058 UNREACHABLE();
1931 } 2059 }
1932 } 2060 }
1933 2061
1934 2062
1935 static unibrow::Mapping<unibrow::Ecma262UnCanonicalize> uncanonicalize;
1936 static unibrow::Mapping<unibrow::CanonicalizationRange> canonrange;
1937
1938
1939 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges) { 2063 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges) {
1940 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 2064 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1941 if (IsSingleton()) { 2065 if (IsSingleton()) {
1942 // If this is a singleton we just expand the one character. 2066 // If this is a singleton we just expand the one character.
1943 int length = uncanonicalize.get(from(), '\0', chars); 2067 int length = uncanonicalize.get(from(), '\0', chars);
1944 for (int i = 0; i < length; i++) { 2068 for (int i = 0; i < length; i++) {
1945 uc32 chr = chars[i]; 2069 uc32 chr = chars[i];
1946 if (chr != from()) { 2070 if (chr != from()) {
1947 ranges->Add(CharacterRange::Singleton(chars[i])); 2071 ranges->Add(CharacterRange::Singleton(chars[i]));
1948 } 2072 }
(...skipping 456 matching lines...) Expand 10 before | Expand all | Expand 10 after
2405 2529
2406 2530
2407 void DispatchTableConstructor::VisitAction(ActionNode* that) { 2531 void DispatchTableConstructor::VisitAction(ActionNode* that) {
2408 that->on_success()->Accept(this); 2532 that->on_success()->Accept(this);
2409 } 2533 }
2410 2534
2411 2535
2412 Handle<FixedArray> RegExpEngine::Compile(RegExpParseResult* input, 2536 Handle<FixedArray> RegExpEngine::Compile(RegExpParseResult* input,
2413 RegExpNode** node_return, 2537 RegExpNode** node_return,
2414 bool ignore_case) { 2538 bool ignore_case) {
2415 RegExpCompiler compiler(input->capture_count); 2539 RegExpCompiler compiler(input->capture_count, ignore_case);
2416 // Wrap the body of the regexp in capture #0. 2540 // Wrap the body of the regexp in capture #0.
2417 RegExpNode* captured_body = RegExpCapture::ToNode(input->tree, 2541 RegExpNode* captured_body = RegExpCapture::ToNode(input->tree,
2418 0, 2542 0,
2419 &compiler, 2543 &compiler,
2420 compiler.accept(), 2544 compiler.accept(),
2421 compiler.backtrack()); 2545 compiler.backtrack());
2422 // Add a .*? at the beginning, outside the body capture. 2546 // Add a .*? at the beginning, outside the body capture.
2423 // Note: We could choose to not add this if the regexp is anchored at 2547 // Note: We could choose to not add this if the regexp is anchored at
2424 // the start of the input but I'm not sure how best to do that and 2548 // the start of the input but I'm not sure how best to do that and
2425 // since we don't even handle ^ yet I'm saving that optimization for 2549 // since we don't even handle ^ yet I'm saving that optimization for
(...skipping 12 matching lines...) Expand all
2438 #if !(defined ARM || defined __arm__ || defined __thumb__) 2562 #if !(defined ARM || defined __arm__ || defined __thumb__)
2439 if (FLAG_re2k_native) { // Flag only checked in IA32 mode. 2563 if (FLAG_re2k_native) { // Flag only checked in IA32 mode.
2440 // TODO(lrn) Move compilation to a later point in the life-cycle 2564 // TODO(lrn) Move compilation to a later point in the life-cycle
2441 // of the RegExp. We don't know the type of input string yet. 2565 // of the RegExp. We don't know the type of input string yet.
2442 // For now, always assume two-byte strings. 2566 // For now, always assume two-byte strings.
2443 RegExpMacroAssemblerIA32 macro_assembler(RegExpMacroAssemblerIA32::UC16, 2567 RegExpMacroAssemblerIA32 macro_assembler(RegExpMacroAssemblerIA32::UC16,
2444 (input->capture_count + 1) * 2, 2568 (input->capture_count + 1) * 2,
2445 ignore_case); 2569 ignore_case);
2446 return compiler.Assemble(&macro_assembler, 2570 return compiler.Assemble(&macro_assembler,
2447 node, 2571 node,
2448 input->capture_count, 2572 input->capture_count);
2449 ignore_case);
2450 } 2573 }
2451 #endif 2574 #endif
2452 byte codes[1024]; 2575 byte codes[1024];
2453 Re2kAssembler assembler(Vector<byte>(codes, 1024)); 2576 Re2kAssembler assembler(Vector<byte>(codes, 1024));
2454 RegExpMacroAssemblerRe2k macro_assembler(&assembler); 2577 RegExpMacroAssemblerRe2k macro_assembler(&assembler);
2455 return compiler.Assemble(&macro_assembler, 2578 return compiler.Assemble(&macro_assembler,
2456 node, 2579 node,
2457 input->capture_count, 2580 input->capture_count);
2458 ignore_case);
2459 } 2581 }
2460 2582
2461 2583
2462 }} // namespace v8::internal 2584 }} // namespace v8::internal
OLDNEW
« no previous file with comments | « src/interpreter-re2k.cc ('k') | src/regexp-macro-assembler.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698