Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 1162 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1173 ASSERT(args.length() == 1); | 1173 ASSERT(args.length() == 1); |
| 1174 uint32_t code; | 1174 uint32_t code; |
| 1175 if (Array::IndexFromObject(args[0], &code)) { | 1175 if (Array::IndexFromObject(args[0], &code)) { |
| 1176 if (code <= 0xffff) { | 1176 if (code <= 0xffff) { |
| 1177 return Heap::LookupSingleCharacterStringFromCode(code); | 1177 return Heap::LookupSingleCharacterStringFromCode(code); |
| 1178 } | 1178 } |
| 1179 } | 1179 } |
| 1180 return Heap::empty_string(); | 1180 return Heap::empty_string(); |
| 1181 } | 1181 } |
| 1182 | 1182 |
| 1183 class ReplacementStringBuilder { | |
|
Erik Corry
2009/03/12 10:13:00
I think you should use StringBuilderConcatHelper i
Lasse Reichstein
2009/03/13 08:36:51
Done.
| |
| 1184 public: | |
| 1185 ReplacementStringBuilder() : string_() {} | |
| 1186 | |
| 1187 void Add(Handle<String> string) { | |
| 1188 StringShape shape(*string); | |
| 1189 if (string->length(shape) > 0) { | |
| 1190 if (string_.is_null()) { | |
| 1191 string_ = string; | |
| 1192 } else { | |
| 1193 string_ = | |
| 1194 Factory::NewConsString(string_, StringShape(*string_), | |
| 1195 string, shape); | |
| 1196 } | |
| 1197 } | |
| 1198 } | |
| 1199 | |
| 1200 Handle<String> ToString() { | |
| 1201 if (string_.is_null()) { | |
| 1202 return Factory::empty_string(); | |
| 1203 } | |
| 1204 return string_; | |
| 1205 } | |
| 1206 private: | |
| 1207 Handle<String> string_; | |
| 1208 }; | |
| 1209 | |
| 1210 | |
| 1211 class CompiledReplacement { | |
| 1212 public: | |
| 1213 CompiledReplacement(Handle<String> replacement, int capture_count) | |
| 1214 : parts_(1), replacement_substrings_(0) { | |
| 1215 ParseReplacement(replacement, capture_count); | |
|
Erik Corry
2009/03/12 10:13:00
Nontrivial work shouldn't be in constructors.
Lasse Reichstein
2009/03/13 08:36:51
Done.
| |
| 1216 } | |
| 1217 void Apply(ReplacementStringBuilder* builder, | |
| 1218 int match_from, | |
| 1219 int match_to, | |
| 1220 Handle<String> subject, | |
| 1221 Handle<JSArray> last_match_info) { | |
| 1222 for (int i = 0, n = parts_.length(); i < n; i++) { | |
| 1223 ReplacementPart part = parts_[i]; | |
| 1224 switch (part.tag) { | |
| 1225 case SUBJECT_MATCH: | |
| 1226 builder->Add(Factory::NewStringSlice(subject, match_from, match_to)); | |
| 1227 break; | |
| 1228 case SUBJECT_PREFIX: | |
| 1229 builder->Add(Factory::NewStringSlice(subject, 0, match_from)); | |
| 1230 break; | |
| 1231 case SUBJECT_SUFFIX: { | |
| 1232 int length = subject->length(); | |
| 1233 builder->Add(Factory::NewStringSlice(subject, match_to, length)); | |
| 1234 break; | |
| 1235 } | |
| 1236 case SUBJECT_CAPTURE: { | |
| 1237 int capture = part.data; | |
| 1238 FixedArray* match_info = last_match_info->elements(); | |
|
Erik Corry
2009/03/12 10:13:00
Move outside loop? Actually, you should move this
Lasse Reichstein
2009/03/13 08:36:51
As commented later, it can't be moved that far out
| |
| 1239 int from = RegExpImpl::GetCapture(match_info, capture * 2); | |
| 1240 int to = RegExpImpl::GetCapture(match_info, capture * 2 + 1); | |
| 1241 builder->Add(Factory::NewStringSlice(subject, from, to)); | |
| 1242 break; | |
| 1243 } | |
| 1244 case REPLACEMENT_SUBSTRING: | |
| 1245 builder->Add(replacement_substrings_[part.data]); | |
| 1246 break; | |
| 1247 default: | |
| 1248 UNREACHABLE(); | |
| 1249 } | |
| 1250 } | |
| 1251 } | |
| 1252 private: | |
| 1253 enum PartType { | |
| 1254 SUBJECT_MATCH = -1, | |
|
Erik Corry
2009/03/12 10:13:00
Subject match seems like a special case of subject
Lasse Reichstein
2009/03/13 08:36:51
True. Slightly faster to implement since I already
| |
| 1255 SUBJECT_PREFIX = -2, | |
| 1256 SUBJECT_SUFFIX = -3, | |
| 1257 SUBJECT_CAPTURE = -4, | |
| 1258 REPLACEMENT_SUBSTRING = -5 | |
|
Erik Corry
2009/03/12 10:13:00
I'd prefer these to be positive and the offsets ne
Lasse Reichstein
2009/03/13 08:36:51
Done.
| |
| 1259 }; | |
| 1260 | |
| 1261 struct ReplacementPart { | |
| 1262 // If tag >= 0 then it is the start index of a substring of the replacement | |
| 1263 // pattern. | |
| 1264 ReplacementPart(int tag, int data) | |
| 1265 : tag(tag), data(data) {} | |
| 1266 int tag; | |
| 1267 int data; | |
|
Erik Corry
2009/03/12 10:13:00
'data' is too generic a name, there's no comment t
Lasse Reichstein
2009/03/13 08:36:51
Comments added.
| |
| 1268 }; | |
| 1269 | |
| 1270 template<typename Char> | |
| 1271 static void Parse(ZoneList<ReplacementPart>* parts, | |
| 1272 Vector<Char> characters, | |
| 1273 int capture_count) { | |
| 1274 int length = characters.length(); | |
| 1275 int last = 0; | |
| 1276 for (int i = 0; i < length; i++) { | |
| 1277 Char c = characters[i]; | |
| 1278 if (c == '$') { | |
| 1279 int next_index = i + 1; | |
| 1280 if (next_index == length) { // No next character! | |
| 1281 break; | |
| 1282 } | |
| 1283 Char c2 = characters[next_index]; | |
| 1284 switch (c2) { | |
| 1285 case '$': | |
| 1286 if (i > last) { | |
| 1287 // Up to and including the first "$". | |
| 1288 parts->Add(ReplacementPart(last, next_index)); | |
| 1289 last = next_index + 1; // Continue after the second "$". | |
| 1290 } else { | |
| 1291 last = next_index; // Continue with the second "$". | |
| 1292 } | |
| 1293 i = next_index; | |
| 1294 break; | |
| 1295 case '`': | |
| 1296 if (i > last) { | |
| 1297 parts->Add(ReplacementPart(last, i)); | |
|
Erik Corry
2009/03/12 10:13:00
Perhaps parts->Add could check for last == i to sa
Lasse Reichstein
2009/03/13 08:36:51
Parts is just a ZoneList, and this is a self-conta
| |
| 1298 } | |
| 1299 parts->Add(ReplacementPart(SUBJECT_PREFIX, 0)); | |
| 1300 i = next_index; | |
| 1301 last = i + 1; | |
| 1302 break; | |
| 1303 case '\'': | |
| 1304 if (i > last) { | |
| 1305 parts->Add(ReplacementPart(last, i)); | |
| 1306 } | |
| 1307 parts->Add(ReplacementPart(SUBJECT_SUFFIX, 0)); | |
| 1308 i = next_index; | |
| 1309 last = i + 1; | |
| 1310 break; | |
| 1311 case '&': | |
| 1312 if (i > last) { | |
| 1313 parts->Add(ReplacementPart(last, i)); | |
| 1314 } | |
| 1315 parts->Add(ReplacementPart(SUBJECT_MATCH, 0)); | |
| 1316 i = next_index; | |
| 1317 last = i + 1; | |
| 1318 break; | |
| 1319 case '0': | |
|
Erik Corry
2009/03/12 10:13:00
Are $0 and $04 allowed? Should $0 be taken verbat
Lasse Reichstein
2009/03/13 08:36:51
Yes and yes.
| |
| 1320 case '1': | |
| 1321 case '2': | |
| 1322 case '3': | |
| 1323 case '4': | |
| 1324 case '5': | |
| 1325 case '6': | |
| 1326 case '7': | |
| 1327 case '8': | |
| 1328 case '9': { | |
| 1329 int capture_ref = c2 - '0'; | |
| 1330 if (capture_ref > capture_count) { | |
| 1331 i = next_index; | |
| 1332 continue; | |
| 1333 } | |
| 1334 int second_digit_index = next_index + 1; | |
| 1335 if (second_digit_index < length) { | |
| 1336 // Peek ahead to see if we have two digits. | |
| 1337 Char c3 = characters[second_digit_index]; | |
| 1338 if ('0' <= c3 && c3 <= '9') { // Double digits. | |
| 1339 int double_digit_ref = capture_ref * 10 + c3 - '0'; | |
| 1340 if (double_digit_ref <= capture_count) { | |
| 1341 next_index = second_digit_index; | |
| 1342 capture_ref = double_digit_ref; | |
| 1343 } | |
| 1344 } | |
| 1345 } | |
| 1346 if (capture_ref > 0) { | |
| 1347 if (i > last) { | |
| 1348 parts->Add(ReplacementPart(last, i)); | |
| 1349 } | |
| 1350 parts->Add(ReplacementPart(SUBJECT_CAPTURE, capture_ref)); | |
| 1351 last = next_index + 1; | |
| 1352 } | |
| 1353 i = next_index; | |
| 1354 break; | |
| 1355 } | |
| 1356 default: | |
|
Erik Corry
2009/03/12 10:13:00
Should $F00 replace with $FOO or FOO?
Lasse Reichstein
2009/03/13 08:36:51
With "$F00".
| |
| 1357 i = next_index; | |
| 1358 break; | |
| 1359 } | |
| 1360 } | |
| 1361 } | |
| 1362 if (length > last) { | |
| 1363 parts->Add(ReplacementPart(last, length)); | |
| 1364 } | |
| 1365 } | |
| 1366 | |
| 1367 void ParseReplacement(Handle<String> replacement, int capture_count) { | |
| 1368 StringShape shape(*replacement); | |
| 1369 ASSERT(replacement->IsFlat(shape)); | |
| 1370 if (shape.IsAsciiRepresentation()) { | |
| 1371 Parse(&parts_, replacement->ToAsciiVector(), capture_count); | |
| 1372 } else { | |
| 1373 ASSERT(shape.IsTwoByteRepresentation()); | |
| 1374 Parse(&parts_, replacement->ToUC16Vector(), capture_count); | |
| 1375 } | |
| 1376 // Find substrings of replacement string and create them as String objcts.. | |
| 1377 int index = 0; | |
| 1378 for (int i = 0, n = parts_.length(); i < n; i++) { | |
| 1379 int tag = parts_[i].tag; | |
| 1380 if (tag >= 0) { // A replacement string slice. | |
| 1381 int end = parts_[i].data; | |
| 1382 replacement_substrings_.Add(Factory::NewStringSlice(replacement, | |
| 1383 tag, | |
| 1384 end)); | |
| 1385 parts_[i].tag = REPLACEMENT_SUBSTRING; | |
| 1386 parts_[i].data = index; | |
| 1387 index++; | |
| 1388 } | |
| 1389 } | |
| 1390 } | |
| 1391 | |
| 1392 ZoneList<ReplacementPart> parts_; | |
| 1393 ZoneList<Handle<String> > replacement_substrings_; | |
| 1394 }; | |
| 1395 | |
| 1396 | |
| 1397 static Object* StringReplaceRegExpWithString(String* subject, | |
| 1398 JSRegExp* regexp, | |
| 1399 String* replacement, | |
| 1400 JSArray* last_match_info) { | |
| 1401 ASSERT(subject->IsFlat(StringShape(subject))); | |
| 1402 ASSERT(replacement->IsFlat(StringShape(replacement))); | |
| 1403 | |
| 1404 HandleScope handles; | |
| 1405 | |
| 1406 int length = subject->length(); | |
| 1407 Handle<String> subject_handle(subject); | |
| 1408 Handle<JSRegExp> regexp_handle(regexp); | |
| 1409 Handle<String> replacement_handle(replacement); | |
| 1410 Handle<JSArray> last_match_info_handle(last_match_info); | |
| 1411 Handle<Object> match = RegExpImpl::Exec(regexp_handle, | |
| 1412 subject_handle, | |
| 1413 0, | |
| 1414 last_match_info_handle); | |
| 1415 if (match.is_null()) { | |
| 1416 return Failure::Exception(); | |
| 1417 } | |
| 1418 if (match->IsNull()) { | |
| 1419 return *subject_handle; | |
| 1420 } | |
| 1421 | |
| 1422 int capture_count = regexp_handle->CaptureCount(); | |
| 1423 | |
| 1424 // CompiledReplacement uses zone allocation. | |
| 1425 ZoneScope zone(DELETE_ON_EXIT); | |
| 1426 CompiledReplacement compiled_replacement(replacement_handle, capture_count); | |
| 1427 ReplacementStringBuilder builder; | |
| 1428 bool is_global = regexp_handle->GetFlags().is_global(); | |
| 1429 | |
| 1430 // Index of end of last match. | |
| 1431 int prev = 0; | |
| 1432 | |
| 1433 do { | |
| 1434 ASSERT(last_match_info_handle->HasFastElements()); | |
| 1435 FixedArray* match_data = last_match_info_handle->elements(); | |
|
Erik Corry
2009/03/12 10:13:00
Not using a handle is a little dodgy. If you move
Lasse Reichstein
2009/03/13 08:36:51
Won't work. The loop might be preempted (since it
Erik Corry
2009/03/13 09:12:34
If you retain a reference to the old FixedArray th
| |
| 1436 ASSERT_EQ(capture_count * 2 + 2, | |
| 1437 Smi::cast( | |
| 1438 match_data->get(RegExpImpl::kLastCaptureCount))->value()); | |
| 1439 | |
| 1440 int start = RegExpImpl::GetCapture(match_data, 0); | |
| 1441 int end = RegExpImpl::GetCapture(match_data, 1); | |
| 1442 | |
| 1443 if (prev < start) { | |
| 1444 builder.Add(Factory::NewStringSlice(subject_handle, prev, start)); | |
| 1445 } | |
| 1446 compiled_replacement.Apply(&builder, | |
| 1447 start, | |
| 1448 end, | |
| 1449 subject_handle, | |
| 1450 last_match_info_handle); | |
| 1451 prev = end; | |
| 1452 | |
| 1453 // Only continue checking for global regexps. | |
| 1454 if (!is_global) break; | |
| 1455 | |
| 1456 // Continue from where the match ended, unless it was an empty match. | |
| 1457 int next = end; | |
| 1458 if (start == end) { | |
| 1459 next = end + 1; | |
| 1460 if (next > length) break; | |
| 1461 } | |
| 1462 | |
| 1463 match = RegExpImpl::Exec(regexp_handle, | |
| 1464 subject_handle, | |
| 1465 next, | |
| 1466 last_match_info_handle); | |
| 1467 if (match.is_null()) { | |
| 1468 return Failure::Exception(); | |
| 1469 } | |
| 1470 } while (!match->IsNull()); | |
| 1471 | |
| 1472 if (prev < length) { | |
| 1473 builder.Add(Factory::NewStringSlice(subject_handle, prev, length)); | |
| 1474 } | |
| 1475 | |
| 1476 return *(builder.ToString()); | |
| 1477 } | |
| 1478 | |
| 1479 | |
| 1480 static Object* Runtime_StringReplaceRegExpWithString(Arguments args) { | |
| 1481 ASSERT(args.length() == 4); | |
| 1482 | |
| 1483 CONVERT_CHECKED(String, subject, args[0]); | |
| 1484 StringShape subject_shape(subject); | |
| 1485 if (!subject->IsFlat(subject_shape)) { | |
| 1486 Object* flat_subject = subject->TryFlatten(subject_shape); | |
| 1487 if (flat_subject->IsFailure()) { | |
| 1488 return flat_subject; | |
| 1489 } | |
| 1490 subject = String::cast(flat_subject); | |
| 1491 } | |
| 1492 | |
| 1493 CONVERT_CHECKED(String, replacement, args[2]); | |
| 1494 StringShape replacement_shape(replacement); | |
| 1495 if (!replacement->IsFlat(replacement_shape)) { | |
| 1496 Object* flat_replacement = replacement->TryFlatten(replacement_shape); | |
| 1497 if (flat_replacement->IsFailure()) { | |
| 1498 return flat_replacement; | |
| 1499 } | |
| 1500 replacement = String::cast(flat_replacement); | |
| 1501 } | |
| 1502 | |
| 1503 CONVERT_CHECKED(JSRegExp, regexp, args[1]); | |
| 1504 CONVERT_CHECKED(JSArray, last_match_info, args[3]); | |
|
Erik Corry
2009/03/12 10:13:00
You need to check here that the last_match_info ha
Lasse Reichstein
2009/03/13 08:36:51
I only ever use last_match_info after having execu
| |
| 1505 | |
| 1506 return StringReplaceRegExpWithString(subject, | |
| 1507 regexp, | |
| 1508 replacement, | |
| 1509 last_match_info); | |
| 1510 } | |
| 1511 | |
| 1512 | |
| 1183 | 1513 |
| 1184 // Cap on the maximal shift in the Boyer-Moore implementation. By setting a | 1514 // Cap on the maximal shift in the Boyer-Moore implementation. By setting a |
| 1185 // limit, we can fix the size of tables. | 1515 // limit, we can fix the size of tables. |
| 1186 static const int kBMMaxShift = 0xff; | 1516 static const int kBMMaxShift = 0xff; |
| 1187 // Reduce alphabet to this size. | 1517 // Reduce alphabet to this size. |
| 1188 static const int kBMAlphabetSize = 0x100; | 1518 static const int kBMAlphabetSize = 0x100; |
| 1189 // For patterns below this length, the skip length of Boyer-Moore is too short | 1519 // For patterns below this length, the skip length of Boyer-Moore is too short |
| 1190 // to compensate for the algorithmic overhead compared to simple brute force. | 1520 // to compensate for the algorithmic overhead compared to simple brute force. |
| 1191 static const int kBMMinPatternLength = 5; | 1521 static const int kBMMinPatternLength = 5; |
| 1192 | 1522 |
| (...skipping 5004 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 6197 } else { | 6527 } else { |
| 6198 // Handle last resort GC and make sure to allow future allocations | 6528 // Handle last resort GC and make sure to allow future allocations |
| 6199 // to grow the heap without causing GCs (if possible). | 6529 // to grow the heap without causing GCs (if possible). |
| 6200 Counters::gc_last_resort_from_js.Increment(); | 6530 Counters::gc_last_resort_from_js.Increment(); |
| 6201 Heap::CollectAllGarbage(); | 6531 Heap::CollectAllGarbage(); |
| 6202 } | 6532 } |
| 6203 } | 6533 } |
| 6204 | 6534 |
| 6205 | 6535 |
| 6206 } } // namespace v8::internal | 6536 } } // namespace v8::internal |
| OLD | NEW |