OLD | NEW |
(Empty) | |
| 1 // Copyright 2014 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #include "src/v8.h" |
| 6 |
| 7 #include "src/arguments.h" |
| 8 #include "src/jsregexp-inl.h" |
| 9 #include "src/jsregexp.h" |
| 10 #include "src/runtime/runtime.h" |
| 11 #include "src/runtime/runtime-utils.h" |
| 12 #include "src/runtime/string-builder.h" |
| 13 #include "src/string-search.h" |
| 14 |
| 15 namespace v8 { |
| 16 namespace internal { |
| 17 |
| 18 class CompiledReplacement { |
| 19 public: |
| 20 explicit CompiledReplacement(Zone* zone) |
| 21 : parts_(1, zone), replacement_substrings_(0, zone), zone_(zone) {} |
| 22 |
| 23 // Return whether the replacement is simple. |
| 24 bool Compile(Handle<String> replacement, int capture_count, |
| 25 int subject_length); |
| 26 |
| 27 // Use Apply only if Compile returned false. |
| 28 void Apply(ReplacementStringBuilder* builder, int match_from, int match_to, |
| 29 int32_t* match); |
| 30 |
| 31 // Number of distinct parts of the replacement pattern. |
| 32 int parts() { return parts_.length(); } |
| 33 |
| 34 Zone* zone() const { return zone_; } |
| 35 |
| 36 private: |
| 37 enum PartType { |
| 38 SUBJECT_PREFIX = 1, |
| 39 SUBJECT_SUFFIX, |
| 40 SUBJECT_CAPTURE, |
| 41 REPLACEMENT_SUBSTRING, |
| 42 REPLACEMENT_STRING, |
| 43 NUMBER_OF_PART_TYPES |
| 44 }; |
| 45 |
| 46 struct ReplacementPart { |
| 47 static inline ReplacementPart SubjectMatch() { |
| 48 return ReplacementPart(SUBJECT_CAPTURE, 0); |
| 49 } |
| 50 static inline ReplacementPart SubjectCapture(int capture_index) { |
| 51 return ReplacementPart(SUBJECT_CAPTURE, capture_index); |
| 52 } |
| 53 static inline ReplacementPart SubjectPrefix() { |
| 54 return ReplacementPart(SUBJECT_PREFIX, 0); |
| 55 } |
| 56 static inline ReplacementPart SubjectSuffix(int subject_length) { |
| 57 return ReplacementPart(SUBJECT_SUFFIX, subject_length); |
| 58 } |
| 59 static inline ReplacementPart ReplacementString() { |
| 60 return ReplacementPart(REPLACEMENT_STRING, 0); |
| 61 } |
| 62 static inline ReplacementPart ReplacementSubString(int from, int to) { |
| 63 DCHECK(from >= 0); |
| 64 DCHECK(to > from); |
| 65 return ReplacementPart(-from, to); |
| 66 } |
| 67 |
| 68 // If tag <= 0 then it is the negation of a start index of a substring of |
| 69 // the replacement pattern, otherwise it's a value from PartType. |
| 70 ReplacementPart(int tag, int data) : tag(tag), data(data) { |
| 71 // Must be non-positive or a PartType value. |
| 72 DCHECK(tag < NUMBER_OF_PART_TYPES); |
| 73 } |
| 74 // Either a value of PartType or a non-positive number that is |
| 75 // the negation of an index into the replacement string. |
| 76 int tag; |
| 77 // The data value's interpretation depends on the value of tag: |
| 78 // tag == SUBJECT_PREFIX || |
| 79 // tag == SUBJECT_SUFFIX: data is unused. |
| 80 // tag == SUBJECT_CAPTURE: data is the number of the capture. |
| 81 // tag == REPLACEMENT_SUBSTRING || |
| 82 // tag == REPLACEMENT_STRING: data is index into array of substrings |
| 83 // of the replacement string. |
| 84 // tag <= 0: Temporary representation of the substring of the replacement |
| 85 // string ranging over -tag .. data. |
| 86 // Is replaced by REPLACEMENT_{SUB,}STRING when we create the |
| 87 // substring objects. |
| 88 int data; |
| 89 }; |
| 90 |
| 91 template <typename Char> |
| 92 bool ParseReplacementPattern(ZoneList<ReplacementPart>* parts, |
| 93 Vector<Char> characters, int capture_count, |
| 94 int subject_length, Zone* zone) { |
| 95 int length = characters.length(); |
| 96 int last = 0; |
| 97 for (int i = 0; i < length; i++) { |
| 98 Char c = characters[i]; |
| 99 if (c == '$') { |
| 100 int next_index = i + 1; |
| 101 if (next_index == length) { // No next character! |
| 102 break; |
| 103 } |
| 104 Char c2 = characters[next_index]; |
| 105 switch (c2) { |
| 106 case '$': |
| 107 if (i > last) { |
| 108 // There is a substring before. Include the first "$". |
| 109 parts->Add( |
| 110 ReplacementPart::ReplacementSubString(last, next_index), |
| 111 zone); |
| 112 last = next_index + 1; // Continue after the second "$". |
| 113 } else { |
| 114 // Let the next substring start with the second "$". |
| 115 last = next_index; |
| 116 } |
| 117 i = next_index; |
| 118 break; |
| 119 case '`': |
| 120 if (i > last) { |
| 121 parts->Add(ReplacementPart::ReplacementSubString(last, i), zone); |
| 122 } |
| 123 parts->Add(ReplacementPart::SubjectPrefix(), zone); |
| 124 i = next_index; |
| 125 last = i + 1; |
| 126 break; |
| 127 case '\'': |
| 128 if (i > last) { |
| 129 parts->Add(ReplacementPart::ReplacementSubString(last, i), zone); |
| 130 } |
| 131 parts->Add(ReplacementPart::SubjectSuffix(subject_length), zone); |
| 132 i = next_index; |
| 133 last = i + 1; |
| 134 break; |
| 135 case '&': |
| 136 if (i > last) { |
| 137 parts->Add(ReplacementPart::ReplacementSubString(last, i), zone); |
| 138 } |
| 139 parts->Add(ReplacementPart::SubjectMatch(), zone); |
| 140 i = next_index; |
| 141 last = i + 1; |
| 142 break; |
| 143 case '0': |
| 144 case '1': |
| 145 case '2': |
| 146 case '3': |
| 147 case '4': |
| 148 case '5': |
| 149 case '6': |
| 150 case '7': |
| 151 case '8': |
| 152 case '9': { |
| 153 int capture_ref = c2 - '0'; |
| 154 if (capture_ref > capture_count) { |
| 155 i = next_index; |
| 156 continue; |
| 157 } |
| 158 int second_digit_index = next_index + 1; |
| 159 if (second_digit_index < length) { |
| 160 // Peek ahead to see if we have two digits. |
| 161 Char c3 = characters[second_digit_index]; |
| 162 if ('0' <= c3 && c3 <= '9') { // Double digits. |
| 163 int double_digit_ref = capture_ref * 10 + c3 - '0'; |
| 164 if (double_digit_ref <= capture_count) { |
| 165 next_index = second_digit_index; |
| 166 capture_ref = double_digit_ref; |
| 167 } |
| 168 } |
| 169 } |
| 170 if (capture_ref > 0) { |
| 171 if (i > last) { |
| 172 parts->Add(ReplacementPart::ReplacementSubString(last, i), |
| 173 zone); |
| 174 } |
| 175 DCHECK(capture_ref <= capture_count); |
| 176 parts->Add(ReplacementPart::SubjectCapture(capture_ref), zone); |
| 177 last = next_index + 1; |
| 178 } |
| 179 i = next_index; |
| 180 break; |
| 181 } |
| 182 default: |
| 183 i = next_index; |
| 184 break; |
| 185 } |
| 186 } |
| 187 } |
| 188 if (length > last) { |
| 189 if (last == 0) { |
| 190 // Replacement is simple. Do not use Apply to do the replacement. |
| 191 return true; |
| 192 } else { |
| 193 parts->Add(ReplacementPart::ReplacementSubString(last, length), zone); |
| 194 } |
| 195 } |
| 196 return false; |
| 197 } |
| 198 |
| 199 ZoneList<ReplacementPart> parts_; |
| 200 ZoneList<Handle<String> > replacement_substrings_; |
| 201 Zone* zone_; |
| 202 }; |
| 203 |
| 204 |
| 205 bool CompiledReplacement::Compile(Handle<String> replacement, int capture_count, |
| 206 int subject_length) { |
| 207 { |
| 208 DisallowHeapAllocation no_gc; |
| 209 String::FlatContent content = replacement->GetFlatContent(); |
| 210 DCHECK(content.IsFlat()); |
| 211 bool simple = false; |
| 212 if (content.IsOneByte()) { |
| 213 simple = ParseReplacementPattern(&parts_, content.ToOneByteVector(), |
| 214 capture_count, subject_length, zone()); |
| 215 } else { |
| 216 DCHECK(content.IsTwoByte()); |
| 217 simple = ParseReplacementPattern(&parts_, content.ToUC16Vector(), |
| 218 capture_count, subject_length, zone()); |
| 219 } |
| 220 if (simple) return true; |
| 221 } |
| 222 |
| 223 Isolate* isolate = replacement->GetIsolate(); |
| 224 // Find substrings of replacement string and create them as String objects. |
| 225 int substring_index = 0; |
| 226 for (int i = 0, n = parts_.length(); i < n; i++) { |
| 227 int tag = parts_[i].tag; |
| 228 if (tag <= 0) { // A replacement string slice. |
| 229 int from = -tag; |
| 230 int to = parts_[i].data; |
| 231 replacement_substrings_.Add( |
| 232 isolate->factory()->NewSubString(replacement, from, to), zone()); |
| 233 parts_[i].tag = REPLACEMENT_SUBSTRING; |
| 234 parts_[i].data = substring_index; |
| 235 substring_index++; |
| 236 } else if (tag == REPLACEMENT_STRING) { |
| 237 replacement_substrings_.Add(replacement, zone()); |
| 238 parts_[i].data = substring_index; |
| 239 substring_index++; |
| 240 } |
| 241 } |
| 242 return false; |
| 243 } |
| 244 |
| 245 |
| 246 void CompiledReplacement::Apply(ReplacementStringBuilder* builder, |
| 247 int match_from, int match_to, int32_t* match) { |
| 248 DCHECK_LT(0, parts_.length()); |
| 249 for (int i = 0, n = parts_.length(); i < n; i++) { |
| 250 ReplacementPart part = parts_[i]; |
| 251 switch (part.tag) { |
| 252 case SUBJECT_PREFIX: |
| 253 if (match_from > 0) builder->AddSubjectSlice(0, match_from); |
| 254 break; |
| 255 case SUBJECT_SUFFIX: { |
| 256 int subject_length = part.data; |
| 257 if (match_to < subject_length) { |
| 258 builder->AddSubjectSlice(match_to, subject_length); |
| 259 } |
| 260 break; |
| 261 } |
| 262 case SUBJECT_CAPTURE: { |
| 263 int capture = part.data; |
| 264 int from = match[capture * 2]; |
| 265 int to = match[capture * 2 + 1]; |
| 266 if (from >= 0 && to > from) { |
| 267 builder->AddSubjectSlice(from, to); |
| 268 } |
| 269 break; |
| 270 } |
| 271 case REPLACEMENT_SUBSTRING: |
| 272 case REPLACEMENT_STRING: |
| 273 builder->AddString(replacement_substrings_[part.data]); |
| 274 break; |
| 275 default: |
| 276 UNREACHABLE(); |
| 277 } |
| 278 } |
| 279 } |
| 280 |
| 281 |
| 282 void FindOneByteStringIndices(Vector<const uint8_t> subject, char pattern, |
| 283 ZoneList<int>* indices, unsigned int limit, |
| 284 Zone* zone) { |
| 285 DCHECK(limit > 0); |
| 286 // Collect indices of pattern in subject using memchr. |
| 287 // Stop after finding at most limit values. |
| 288 const uint8_t* subject_start = subject.start(); |
| 289 const uint8_t* subject_end = subject_start + subject.length(); |
| 290 const uint8_t* pos = subject_start; |
| 291 while (limit > 0) { |
| 292 pos = reinterpret_cast<const uint8_t*>( |
| 293 memchr(pos, pattern, subject_end - pos)); |
| 294 if (pos == NULL) return; |
| 295 indices->Add(static_cast<int>(pos - subject_start), zone); |
| 296 pos++; |
| 297 limit--; |
| 298 } |
| 299 } |
| 300 |
| 301 |
| 302 void FindTwoByteStringIndices(const Vector<const uc16> subject, uc16 pattern, |
| 303 ZoneList<int>* indices, unsigned int limit, |
| 304 Zone* zone) { |
| 305 DCHECK(limit > 0); |
| 306 const uc16* subject_start = subject.start(); |
| 307 const uc16* subject_end = subject_start + subject.length(); |
| 308 for (const uc16* pos = subject_start; pos < subject_end && limit > 0; pos++) { |
| 309 if (*pos == pattern) { |
| 310 indices->Add(static_cast<int>(pos - subject_start), zone); |
| 311 limit--; |
| 312 } |
| 313 } |
| 314 } |
| 315 |
| 316 |
| 317 template <typename SubjectChar, typename PatternChar> |
| 318 void FindStringIndices(Isolate* isolate, Vector<const SubjectChar> subject, |
| 319 Vector<const PatternChar> pattern, |
| 320 ZoneList<int>* indices, unsigned int limit, Zone* zone) { |
| 321 DCHECK(limit > 0); |
| 322 // Collect indices of pattern in subject. |
| 323 // Stop after finding at most limit values. |
| 324 int pattern_length = pattern.length(); |
| 325 int index = 0; |
| 326 StringSearch<PatternChar, SubjectChar> search(isolate, pattern); |
| 327 while (limit > 0) { |
| 328 index = search.Search(subject, index); |
| 329 if (index < 0) return; |
| 330 indices->Add(index, zone); |
| 331 index += pattern_length; |
| 332 limit--; |
| 333 } |
| 334 } |
| 335 |
| 336 |
| 337 void FindStringIndicesDispatch(Isolate* isolate, String* subject, |
| 338 String* pattern, ZoneList<int>* indices, |
| 339 unsigned int limit, Zone* zone) { |
| 340 { |
| 341 DisallowHeapAllocation no_gc; |
| 342 String::FlatContent subject_content = subject->GetFlatContent(); |
| 343 String::FlatContent pattern_content = pattern->GetFlatContent(); |
| 344 DCHECK(subject_content.IsFlat()); |
| 345 DCHECK(pattern_content.IsFlat()); |
| 346 if (subject_content.IsOneByte()) { |
| 347 Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector(); |
| 348 if (pattern_content.IsOneByte()) { |
| 349 Vector<const uint8_t> pattern_vector = |
| 350 pattern_content.ToOneByteVector(); |
| 351 if (pattern_vector.length() == 1) { |
| 352 FindOneByteStringIndices(subject_vector, pattern_vector[0], indices, |
| 353 limit, zone); |
| 354 } else { |
| 355 FindStringIndices(isolate, subject_vector, pattern_vector, indices, |
| 356 limit, zone); |
| 357 } |
| 358 } else { |
| 359 FindStringIndices(isolate, subject_vector, |
| 360 pattern_content.ToUC16Vector(), indices, limit, zone); |
| 361 } |
| 362 } else { |
| 363 Vector<const uc16> subject_vector = subject_content.ToUC16Vector(); |
| 364 if (pattern_content.IsOneByte()) { |
| 365 Vector<const uint8_t> pattern_vector = |
| 366 pattern_content.ToOneByteVector(); |
| 367 if (pattern_vector.length() == 1) { |
| 368 FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices, |
| 369 limit, zone); |
| 370 } else { |
| 371 FindStringIndices(isolate, subject_vector, pattern_vector, indices, |
| 372 limit, zone); |
| 373 } |
| 374 } else { |
| 375 Vector<const uc16> pattern_vector = pattern_content.ToUC16Vector(); |
| 376 if (pattern_vector.length() == 1) { |
| 377 FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices, |
| 378 limit, zone); |
| 379 } else { |
| 380 FindStringIndices(isolate, subject_vector, pattern_vector, indices, |
| 381 limit, zone); |
| 382 } |
| 383 } |
| 384 } |
| 385 } |
| 386 } |
| 387 |
| 388 |
| 389 template <typename ResultSeqString> |
| 390 MUST_USE_RESULT static Object* StringReplaceGlobalAtomRegExpWithString( |
| 391 Isolate* isolate, Handle<String> subject, Handle<JSRegExp> pattern_regexp, |
| 392 Handle<String> replacement, Handle<JSArray> last_match_info) { |
| 393 DCHECK(subject->IsFlat()); |
| 394 DCHECK(replacement->IsFlat()); |
| 395 |
| 396 ZoneScope zone_scope(isolate->runtime_zone()); |
| 397 ZoneList<int> indices(8, zone_scope.zone()); |
| 398 DCHECK_EQ(JSRegExp::ATOM, pattern_regexp->TypeTag()); |
| 399 String* pattern = |
| 400 String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex)); |
| 401 int subject_len = subject->length(); |
| 402 int pattern_len = pattern->length(); |
| 403 int replacement_len = replacement->length(); |
| 404 |
| 405 FindStringIndicesDispatch(isolate, *subject, pattern, &indices, 0xffffffff, |
| 406 zone_scope.zone()); |
| 407 |
| 408 int matches = indices.length(); |
| 409 if (matches == 0) return *subject; |
| 410 |
| 411 // Detect integer overflow. |
| 412 int64_t result_len_64 = (static_cast<int64_t>(replacement_len) - |
| 413 static_cast<int64_t>(pattern_len)) * |
| 414 static_cast<int64_t>(matches) + |
| 415 static_cast<int64_t>(subject_len); |
| 416 int result_len; |
| 417 if (result_len_64 > static_cast<int64_t>(String::kMaxLength)) { |
| 418 STATIC_ASSERT(String::kMaxLength < kMaxInt); |
| 419 result_len = kMaxInt; // Provoke exception. |
| 420 } else { |
| 421 result_len = static_cast<int>(result_len_64); |
| 422 } |
| 423 |
| 424 int subject_pos = 0; |
| 425 int result_pos = 0; |
| 426 |
| 427 MaybeHandle<SeqString> maybe_res; |
| 428 if (ResultSeqString::kHasOneByteEncoding) { |
| 429 maybe_res = isolate->factory()->NewRawOneByteString(result_len); |
| 430 } else { |
| 431 maybe_res = isolate->factory()->NewRawTwoByteString(result_len); |
| 432 } |
| 433 Handle<SeqString> untyped_res; |
| 434 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, untyped_res, maybe_res); |
| 435 Handle<ResultSeqString> result = Handle<ResultSeqString>::cast(untyped_res); |
| 436 |
| 437 for (int i = 0; i < matches; i++) { |
| 438 // Copy non-matched subject content. |
| 439 if (subject_pos < indices.at(i)) { |
| 440 String::WriteToFlat(*subject, result->GetChars() + result_pos, |
| 441 subject_pos, indices.at(i)); |
| 442 result_pos += indices.at(i) - subject_pos; |
| 443 } |
| 444 |
| 445 // Replace match. |
| 446 if (replacement_len > 0) { |
| 447 String::WriteToFlat(*replacement, result->GetChars() + result_pos, 0, |
| 448 replacement_len); |
| 449 result_pos += replacement_len; |
| 450 } |
| 451 |
| 452 subject_pos = indices.at(i) + pattern_len; |
| 453 } |
| 454 // Add remaining subject content at the end. |
| 455 if (subject_pos < subject_len) { |
| 456 String::WriteToFlat(*subject, result->GetChars() + result_pos, subject_pos, |
| 457 subject_len); |
| 458 } |
| 459 |
| 460 int32_t match_indices[] = {indices.at(matches - 1), |
| 461 indices.at(matches - 1) + pattern_len}; |
| 462 RegExpImpl::SetLastMatchInfo(last_match_info, subject, 0, match_indices); |
| 463 |
| 464 return *result; |
| 465 } |
| 466 |
| 467 |
| 468 MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithString( |
| 469 Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp, |
| 470 Handle<String> replacement, Handle<JSArray> last_match_info) { |
| 471 DCHECK(subject->IsFlat()); |
| 472 DCHECK(replacement->IsFlat()); |
| 473 |
| 474 int capture_count = regexp->CaptureCount(); |
| 475 int subject_length = subject->length(); |
| 476 |
| 477 // CompiledReplacement uses zone allocation. |
| 478 ZoneScope zone_scope(isolate->runtime_zone()); |
| 479 CompiledReplacement compiled_replacement(zone_scope.zone()); |
| 480 bool simple_replace = |
| 481 compiled_replacement.Compile(replacement, capture_count, subject_length); |
| 482 |
| 483 // Shortcut for simple non-regexp global replacements |
| 484 if (regexp->TypeTag() == JSRegExp::ATOM && simple_replace) { |
| 485 if (subject->HasOnlyOneByteChars() && replacement->HasOnlyOneByteChars()) { |
| 486 return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>( |
| 487 isolate, subject, regexp, replacement, last_match_info); |
| 488 } else { |
| 489 return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>( |
| 490 isolate, subject, regexp, replacement, last_match_info); |
| 491 } |
| 492 } |
| 493 |
| 494 RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate); |
| 495 if (global_cache.HasException()) return isolate->heap()->exception(); |
| 496 |
| 497 int32_t* current_match = global_cache.FetchNext(); |
| 498 if (current_match == NULL) { |
| 499 if (global_cache.HasException()) return isolate->heap()->exception(); |
| 500 return *subject; |
| 501 } |
| 502 |
| 503 // Guessing the number of parts that the final result string is built |
| 504 // from. Global regexps can match any number of times, so we guess |
| 505 // conservatively. |
| 506 int expected_parts = (compiled_replacement.parts() + 1) * 4 + 1; |
| 507 ReplacementStringBuilder builder(isolate->heap(), subject, expected_parts); |
| 508 |
| 509 // Number of parts added by compiled replacement plus preceeding |
| 510 // string and possibly suffix after last match. It is possible for |
| 511 // all components to use two elements when encoded as two smis. |
| 512 const int parts_added_per_loop = 2 * (compiled_replacement.parts() + 2); |
| 513 |
| 514 int prev = 0; |
| 515 |
| 516 do { |
| 517 builder.EnsureCapacity(parts_added_per_loop); |
| 518 |
| 519 int start = current_match[0]; |
| 520 int end = current_match[1]; |
| 521 |
| 522 if (prev < start) { |
| 523 builder.AddSubjectSlice(prev, start); |
| 524 } |
| 525 |
| 526 if (simple_replace) { |
| 527 builder.AddString(replacement); |
| 528 } else { |
| 529 compiled_replacement.Apply(&builder, start, end, current_match); |
| 530 } |
| 531 prev = end; |
| 532 |
| 533 current_match = global_cache.FetchNext(); |
| 534 } while (current_match != NULL); |
| 535 |
| 536 if (global_cache.HasException()) return isolate->heap()->exception(); |
| 537 |
| 538 if (prev < subject_length) { |
| 539 builder.EnsureCapacity(2); |
| 540 builder.AddSubjectSlice(prev, subject_length); |
| 541 } |
| 542 |
| 543 RegExpImpl::SetLastMatchInfo(last_match_info, subject, capture_count, |
| 544 global_cache.LastSuccessfulMatch()); |
| 545 |
| 546 Handle<String> result; |
| 547 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, result, builder.ToString()); |
| 548 return *result; |
| 549 } |
| 550 |
| 551 |
| 552 template <typename ResultSeqString> |
| 553 MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithEmptyString( |
| 554 Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp, |
| 555 Handle<JSArray> last_match_info) { |
| 556 DCHECK(subject->IsFlat()); |
| 557 |
| 558 // Shortcut for simple non-regexp global replacements |
| 559 if (regexp->TypeTag() == JSRegExp::ATOM) { |
| 560 Handle<String> empty_string = isolate->factory()->empty_string(); |
| 561 if (subject->IsOneByteRepresentation()) { |
| 562 return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>( |
| 563 isolate, subject, regexp, empty_string, last_match_info); |
| 564 } else { |
| 565 return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>( |
| 566 isolate, subject, regexp, empty_string, last_match_info); |
| 567 } |
| 568 } |
| 569 |
| 570 RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate); |
| 571 if (global_cache.HasException()) return isolate->heap()->exception(); |
| 572 |
| 573 int32_t* current_match = global_cache.FetchNext(); |
| 574 if (current_match == NULL) { |
| 575 if (global_cache.HasException()) return isolate->heap()->exception(); |
| 576 return *subject; |
| 577 } |
| 578 |
| 579 int start = current_match[0]; |
| 580 int end = current_match[1]; |
| 581 int capture_count = regexp->CaptureCount(); |
| 582 int subject_length = subject->length(); |
| 583 |
| 584 int new_length = subject_length - (end - start); |
| 585 if (new_length == 0) return isolate->heap()->empty_string(); |
| 586 |
| 587 Handle<ResultSeqString> answer; |
| 588 if (ResultSeqString::kHasOneByteEncoding) { |
| 589 answer = Handle<ResultSeqString>::cast( |
| 590 isolate->factory()->NewRawOneByteString(new_length).ToHandleChecked()); |
| 591 } else { |
| 592 answer = Handle<ResultSeqString>::cast( |
| 593 isolate->factory()->NewRawTwoByteString(new_length).ToHandleChecked()); |
| 594 } |
| 595 |
| 596 int prev = 0; |
| 597 int position = 0; |
| 598 |
| 599 do { |
| 600 start = current_match[0]; |
| 601 end = current_match[1]; |
| 602 if (prev < start) { |
| 603 // Add substring subject[prev;start] to answer string. |
| 604 String::WriteToFlat(*subject, answer->GetChars() + position, prev, start); |
| 605 position += start - prev; |
| 606 } |
| 607 prev = end; |
| 608 |
| 609 current_match = global_cache.FetchNext(); |
| 610 } while (current_match != NULL); |
| 611 |
| 612 if (global_cache.HasException()) return isolate->heap()->exception(); |
| 613 |
| 614 RegExpImpl::SetLastMatchInfo(last_match_info, subject, capture_count, |
| 615 global_cache.LastSuccessfulMatch()); |
| 616 |
| 617 if (prev < subject_length) { |
| 618 // Add substring subject[prev;length] to answer string. |
| 619 String::WriteToFlat(*subject, answer->GetChars() + position, prev, |
| 620 subject_length); |
| 621 position += subject_length - prev; |
| 622 } |
| 623 |
| 624 if (position == 0) return isolate->heap()->empty_string(); |
| 625 |
| 626 // Shorten string and fill |
| 627 int string_size = ResultSeqString::SizeFor(position); |
| 628 int allocated_string_size = ResultSeqString::SizeFor(new_length); |
| 629 int delta = allocated_string_size - string_size; |
| 630 |
| 631 answer->set_length(position); |
| 632 if (delta == 0) return *answer; |
| 633 |
| 634 Address end_of_string = answer->address() + string_size; |
| 635 Heap* heap = isolate->heap(); |
| 636 |
| 637 // The trimming is performed on a newly allocated object, which is on a |
| 638 // fresly allocated page or on an already swept page. Hence, the sweeper |
| 639 // thread can not get confused with the filler creation. No synchronization |
| 640 // needed. |
| 641 heap->CreateFillerObjectAt(end_of_string, delta); |
| 642 heap->AdjustLiveBytes(answer->address(), -delta, Heap::FROM_MUTATOR); |
| 643 return *answer; |
| 644 } |
| 645 |
| 646 |
| 647 RUNTIME_FUNCTION(Runtime_StringReplaceGlobalRegExpWithString) { |
| 648 HandleScope scope(isolate); |
| 649 DCHECK(args.length() == 4); |
| 650 |
| 651 CONVERT_ARG_HANDLE_CHECKED(String, subject, 0); |
| 652 CONVERT_ARG_HANDLE_CHECKED(String, replacement, 2); |
| 653 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1); |
| 654 CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 3); |
| 655 |
| 656 RUNTIME_ASSERT(regexp->GetFlags().is_global()); |
| 657 RUNTIME_ASSERT(last_match_info->HasFastObjectElements()); |
| 658 |
| 659 subject = String::Flatten(subject); |
| 660 |
| 661 if (replacement->length() == 0) { |
| 662 if (subject->HasOnlyOneByteChars()) { |
| 663 return StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>( |
| 664 isolate, subject, regexp, last_match_info); |
| 665 } else { |
| 666 return StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>( |
| 667 isolate, subject, regexp, last_match_info); |
| 668 } |
| 669 } |
| 670 |
| 671 replacement = String::Flatten(replacement); |
| 672 |
| 673 return StringReplaceGlobalRegExpWithString(isolate, subject, regexp, |
| 674 replacement, last_match_info); |
| 675 } |
| 676 |
| 677 |
| 678 RUNTIME_FUNCTION(Runtime_StringSplit) { |
| 679 HandleScope handle_scope(isolate); |
| 680 DCHECK(args.length() == 3); |
| 681 CONVERT_ARG_HANDLE_CHECKED(String, subject, 0); |
| 682 CONVERT_ARG_HANDLE_CHECKED(String, pattern, 1); |
| 683 CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[2]); |
| 684 RUNTIME_ASSERT(limit > 0); |
| 685 |
| 686 int subject_length = subject->length(); |
| 687 int pattern_length = pattern->length(); |
| 688 RUNTIME_ASSERT(pattern_length > 0); |
| 689 |
| 690 if (limit == 0xffffffffu) { |
| 691 Handle<Object> cached_answer( |
| 692 RegExpResultsCache::Lookup(isolate->heap(), *subject, *pattern, |
| 693 RegExpResultsCache::STRING_SPLIT_SUBSTRINGS), |
| 694 isolate); |
| 695 if (*cached_answer != Smi::FromInt(0)) { |
| 696 // The cache FixedArray is a COW-array and can therefore be reused. |
| 697 Handle<JSArray> result = isolate->factory()->NewJSArrayWithElements( |
| 698 Handle<FixedArray>::cast(cached_answer)); |
| 699 return *result; |
| 700 } |
| 701 } |
| 702 |
| 703 // The limit can be very large (0xffffffffu), but since the pattern |
| 704 // isn't empty, we can never create more parts than ~half the length |
| 705 // of the subject. |
| 706 |
| 707 subject = String::Flatten(subject); |
| 708 pattern = String::Flatten(pattern); |
| 709 |
| 710 static const int kMaxInitialListCapacity = 16; |
| 711 |
| 712 ZoneScope zone_scope(isolate->runtime_zone()); |
| 713 |
| 714 // Find (up to limit) indices of separator and end-of-string in subject |
| 715 int initial_capacity = Min<uint32_t>(kMaxInitialListCapacity, limit); |
| 716 ZoneList<int> indices(initial_capacity, zone_scope.zone()); |
| 717 |
| 718 FindStringIndicesDispatch(isolate, *subject, *pattern, &indices, limit, |
| 719 zone_scope.zone()); |
| 720 |
| 721 if (static_cast<uint32_t>(indices.length()) < limit) { |
| 722 indices.Add(subject_length, zone_scope.zone()); |
| 723 } |
| 724 |
| 725 // The list indices now contains the end of each part to create. |
| 726 |
| 727 // Create JSArray of substrings separated by separator. |
| 728 int part_count = indices.length(); |
| 729 |
| 730 Handle<JSArray> result = isolate->factory()->NewJSArray(part_count); |
| 731 JSObject::EnsureCanContainHeapObjectElements(result); |
| 732 result->set_length(Smi::FromInt(part_count)); |
| 733 |
| 734 DCHECK(result->HasFastObjectElements()); |
| 735 |
| 736 if (part_count == 1 && indices.at(0) == subject_length) { |
| 737 FixedArray::cast(result->elements())->set(0, *subject); |
| 738 return *result; |
| 739 } |
| 740 |
| 741 Handle<FixedArray> elements(FixedArray::cast(result->elements())); |
| 742 int part_start = 0; |
| 743 for (int i = 0; i < part_count; i++) { |
| 744 HandleScope local_loop_handle(isolate); |
| 745 int part_end = indices.at(i); |
| 746 Handle<String> substring = |
| 747 isolate->factory()->NewProperSubString(subject, part_start, part_end); |
| 748 elements->set(i, *substring); |
| 749 part_start = part_end + pattern_length; |
| 750 } |
| 751 |
| 752 if (limit == 0xffffffffu) { |
| 753 if (result->HasFastObjectElements()) { |
| 754 RegExpResultsCache::Enter(isolate, subject, pattern, elements, |
| 755 RegExpResultsCache::STRING_SPLIT_SUBSTRINGS); |
| 756 } |
| 757 } |
| 758 |
| 759 return *result; |
| 760 } |
| 761 |
| 762 |
| 763 RUNTIME_FUNCTION(Runtime_RegExpCompile) { |
| 764 HandleScope scope(isolate); |
| 765 DCHECK(args.length() == 3); |
| 766 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, re, 0); |
| 767 CONVERT_ARG_HANDLE_CHECKED(String, pattern, 1); |
| 768 CONVERT_ARG_HANDLE_CHECKED(String, flags, 2); |
| 769 Handle<Object> result; |
| 770 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, result, |
| 771 RegExpImpl::Compile(re, pattern, flags)); |
| 772 return *result; |
| 773 } |
| 774 |
| 775 |
| 776 RUNTIME_FUNCTION(Runtime_RegExpExecRT) { |
| 777 HandleScope scope(isolate); |
| 778 DCHECK(args.length() == 4); |
| 779 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0); |
| 780 CONVERT_ARG_HANDLE_CHECKED(String, subject, 1); |
| 781 CONVERT_INT32_ARG_CHECKED(index, 2); |
| 782 CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 3); |
| 783 // Due to the way the JS calls are constructed this must be less than the |
| 784 // length of a string, i.e. it is always a Smi. We check anyway for security. |
| 785 RUNTIME_ASSERT(index >= 0); |
| 786 RUNTIME_ASSERT(index <= subject->length()); |
| 787 isolate->counters()->regexp_entry_runtime()->Increment(); |
| 788 Handle<Object> result; |
| 789 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
| 790 isolate, result, |
| 791 RegExpImpl::Exec(regexp, subject, index, last_match_info)); |
| 792 return *result; |
| 793 } |
| 794 |
| 795 |
| 796 RUNTIME_FUNCTION(Runtime_RegExpConstructResult) { |
| 797 HandleScope handle_scope(isolate); |
| 798 DCHECK(args.length() == 3); |
| 799 CONVERT_SMI_ARG_CHECKED(size, 0); |
| 800 RUNTIME_ASSERT(size >= 0 && size <= FixedArray::kMaxLength); |
| 801 CONVERT_ARG_HANDLE_CHECKED(Object, index, 1); |
| 802 CONVERT_ARG_HANDLE_CHECKED(Object, input, 2); |
| 803 Handle<FixedArray> elements = isolate->factory()->NewFixedArray(size); |
| 804 Handle<Map> regexp_map(isolate->native_context()->regexp_result_map()); |
| 805 Handle<JSObject> object = |
| 806 isolate->factory()->NewJSObjectFromMap(regexp_map, NOT_TENURED, false); |
| 807 Handle<JSArray> array = Handle<JSArray>::cast(object); |
| 808 array->set_elements(*elements); |
| 809 array->set_length(Smi::FromInt(size)); |
| 810 // Write in-object properties after the length of the array. |
| 811 array->InObjectPropertyAtPut(JSRegExpResult::kIndexIndex, *index); |
| 812 array->InObjectPropertyAtPut(JSRegExpResult::kInputIndex, *input); |
| 813 return *array; |
| 814 } |
| 815 |
| 816 |
| 817 RUNTIME_FUNCTION(Runtime_RegExpInitializeObject) { |
| 818 HandleScope scope(isolate); |
| 819 DCHECK(args.length() == 6); |
| 820 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0); |
| 821 CONVERT_ARG_HANDLE_CHECKED(String, source, 1); |
| 822 // If source is the empty string we set it to "(?:)" instead as |
| 823 // suggested by ECMA-262, 5th, section 15.10.4.1. |
| 824 if (source->length() == 0) source = isolate->factory()->query_colon_string(); |
| 825 |
| 826 CONVERT_ARG_HANDLE_CHECKED(Object, global, 2); |
| 827 if (!global->IsTrue()) global = isolate->factory()->false_value(); |
| 828 |
| 829 CONVERT_ARG_HANDLE_CHECKED(Object, ignoreCase, 3); |
| 830 if (!ignoreCase->IsTrue()) ignoreCase = isolate->factory()->false_value(); |
| 831 |
| 832 CONVERT_ARG_HANDLE_CHECKED(Object, multiline, 4); |
| 833 if (!multiline->IsTrue()) multiline = isolate->factory()->false_value(); |
| 834 |
| 835 CONVERT_ARG_HANDLE_CHECKED(Object, sticky, 5); |
| 836 if (!sticky->IsTrue()) sticky = isolate->factory()->false_value(); |
| 837 |
| 838 Map* map = regexp->map(); |
| 839 Object* constructor = map->constructor(); |
| 840 if (!FLAG_harmony_regexps && constructor->IsJSFunction() && |
| 841 JSFunction::cast(constructor)->initial_map() == map) { |
| 842 // If we still have the original map, set in-object properties directly. |
| 843 regexp->InObjectPropertyAtPut(JSRegExp::kSourceFieldIndex, *source); |
| 844 // Both true and false are immovable immortal objects so no need for write |
| 845 // barrier. |
| 846 regexp->InObjectPropertyAtPut(JSRegExp::kGlobalFieldIndex, *global, |
| 847 SKIP_WRITE_BARRIER); |
| 848 regexp->InObjectPropertyAtPut(JSRegExp::kIgnoreCaseFieldIndex, *ignoreCase, |
| 849 SKIP_WRITE_BARRIER); |
| 850 regexp->InObjectPropertyAtPut(JSRegExp::kMultilineFieldIndex, *multiline, |
| 851 SKIP_WRITE_BARRIER); |
| 852 regexp->InObjectPropertyAtPut(JSRegExp::kLastIndexFieldIndex, |
| 853 Smi::FromInt(0), SKIP_WRITE_BARRIER); |
| 854 return *regexp; |
| 855 } |
| 856 |
| 857 // Map has changed, so use generic, but slower, method. We also end here if |
| 858 // the --harmony-regexp flag is set, because the initial map does not have |
| 859 // space for the 'sticky' flag, since it is from the snapshot, but must work |
| 860 // both with and without --harmony-regexp. When sticky comes out from under |
| 861 // the flag, we will be able to use the fast initial map. |
| 862 PropertyAttributes final = |
| 863 static_cast<PropertyAttributes>(READ_ONLY | DONT_ENUM | DONT_DELETE); |
| 864 PropertyAttributes writable = |
| 865 static_cast<PropertyAttributes>(DONT_ENUM | DONT_DELETE); |
| 866 Handle<Object> zero(Smi::FromInt(0), isolate); |
| 867 Factory* factory = isolate->factory(); |
| 868 JSObject::SetOwnPropertyIgnoreAttributes(regexp, factory->source_string(), |
| 869 source, final).Check(); |
| 870 JSObject::SetOwnPropertyIgnoreAttributes(regexp, factory->global_string(), |
| 871 global, final).Check(); |
| 872 JSObject::SetOwnPropertyIgnoreAttributes( |
| 873 regexp, factory->ignore_case_string(), ignoreCase, final).Check(); |
| 874 JSObject::SetOwnPropertyIgnoreAttributes(regexp, factory->multiline_string(), |
| 875 multiline, final).Check(); |
| 876 if (FLAG_harmony_regexps) { |
| 877 JSObject::SetOwnPropertyIgnoreAttributes(regexp, factory->sticky_string(), |
| 878 sticky, final).Check(); |
| 879 } |
| 880 JSObject::SetOwnPropertyIgnoreAttributes(regexp, factory->last_index_string(), |
| 881 zero, writable).Check(); |
| 882 return *regexp; |
| 883 } |
| 884 |
| 885 |
| 886 RUNTIME_FUNCTION(Runtime_MaterializeRegExpLiteral) { |
| 887 HandleScope scope(isolate); |
| 888 DCHECK(args.length() == 4); |
| 889 CONVERT_ARG_HANDLE_CHECKED(FixedArray, literals, 0); |
| 890 CONVERT_SMI_ARG_CHECKED(index, 1); |
| 891 CONVERT_ARG_HANDLE_CHECKED(String, pattern, 2); |
| 892 CONVERT_ARG_HANDLE_CHECKED(String, flags, 3); |
| 893 |
| 894 // Get the RegExp function from the context in the literals array. |
| 895 // This is the RegExp function from the context in which the |
| 896 // function was created. We do not use the RegExp function from the |
| 897 // current native context because this might be the RegExp function |
| 898 // from another context which we should not have access to. |
| 899 Handle<JSFunction> constructor = Handle<JSFunction>( |
| 900 JSFunction::NativeContextFromLiterals(*literals)->regexp_function()); |
| 901 // Compute the regular expression literal. |
| 902 Handle<Object> regexp; |
| 903 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
| 904 isolate, regexp, |
| 905 RegExpImpl::CreateRegExpLiteral(constructor, pattern, flags)); |
| 906 literals->set(index, *regexp); |
| 907 return *regexp; |
| 908 } |
| 909 |
| 910 |
| 911 // Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain |
| 912 // separate last match info. See comment on that function. |
| 913 template <bool has_capture> |
| 914 static Object* SearchRegExpMultiple(Isolate* isolate, Handle<String> subject, |
| 915 Handle<JSRegExp> regexp, |
| 916 Handle<JSArray> last_match_array, |
| 917 Handle<JSArray> result_array) { |
| 918 DCHECK(subject->IsFlat()); |
| 919 DCHECK_NE(has_capture, regexp->CaptureCount() == 0); |
| 920 |
| 921 int capture_count = regexp->CaptureCount(); |
| 922 int subject_length = subject->length(); |
| 923 |
| 924 static const int kMinLengthToCache = 0x1000; |
| 925 |
| 926 if (subject_length > kMinLengthToCache) { |
| 927 Handle<Object> cached_answer( |
| 928 RegExpResultsCache::Lookup(isolate->heap(), *subject, regexp->data(), |
| 929 RegExpResultsCache::REGEXP_MULTIPLE_INDICES), |
| 930 isolate); |
| 931 if (*cached_answer != Smi::FromInt(0)) { |
| 932 Handle<FixedArray> cached_fixed_array = |
| 933 Handle<FixedArray>(FixedArray::cast(*cached_answer)); |
| 934 // The cache FixedArray is a COW-array and can therefore be reused. |
| 935 JSArray::SetContent(result_array, cached_fixed_array); |
| 936 // The actual length of the result array is stored in the last element of |
| 937 // the backing store (the backing FixedArray may have a larger capacity). |
| 938 Object* cached_fixed_array_last_element = |
| 939 cached_fixed_array->get(cached_fixed_array->length() - 1); |
| 940 Smi* js_array_length = Smi::cast(cached_fixed_array_last_element); |
| 941 result_array->set_length(js_array_length); |
| 942 RegExpImpl::SetLastMatchInfo(last_match_array, subject, capture_count, |
| 943 NULL); |
| 944 return *result_array; |
| 945 } |
| 946 } |
| 947 |
| 948 RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate); |
| 949 if (global_cache.HasException()) return isolate->heap()->exception(); |
| 950 |
| 951 // Ensured in Runtime_RegExpExecMultiple. |
| 952 DCHECK(result_array->HasFastObjectElements()); |
| 953 Handle<FixedArray> result_elements( |
| 954 FixedArray::cast(result_array->elements())); |
| 955 if (result_elements->length() < 16) { |
| 956 result_elements = isolate->factory()->NewFixedArrayWithHoles(16); |
| 957 } |
| 958 |
| 959 FixedArrayBuilder builder(result_elements); |
| 960 |
| 961 // Position to search from. |
| 962 int match_start = -1; |
| 963 int match_end = 0; |
| 964 bool first = true; |
| 965 |
| 966 // Two smis before and after the match, for very long strings. |
| 967 static const int kMaxBuilderEntriesPerRegExpMatch = 5; |
| 968 |
| 969 while (true) { |
| 970 int32_t* current_match = global_cache.FetchNext(); |
| 971 if (current_match == NULL) break; |
| 972 match_start = current_match[0]; |
| 973 builder.EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch); |
| 974 if (match_end < match_start) { |
| 975 ReplacementStringBuilder::AddSubjectSlice(&builder, match_end, |
| 976 match_start); |
| 977 } |
| 978 match_end = current_match[1]; |
| 979 { |
| 980 // Avoid accumulating new handles inside loop. |
| 981 HandleScope temp_scope(isolate); |
| 982 Handle<String> match; |
| 983 if (!first) { |
| 984 match = isolate->factory()->NewProperSubString(subject, match_start, |
| 985 match_end); |
| 986 } else { |
| 987 match = |
| 988 isolate->factory()->NewSubString(subject, match_start, match_end); |
| 989 first = false; |
| 990 } |
| 991 |
| 992 if (has_capture) { |
| 993 // Arguments array to replace function is match, captures, index and |
| 994 // subject, i.e., 3 + capture count in total. |
| 995 Handle<FixedArray> elements = |
| 996 isolate->factory()->NewFixedArray(3 + capture_count); |
| 997 |
| 998 elements->set(0, *match); |
| 999 for (int i = 1; i <= capture_count; i++) { |
| 1000 int start = current_match[i * 2]; |
| 1001 if (start >= 0) { |
| 1002 int end = current_match[i * 2 + 1]; |
| 1003 DCHECK(start <= end); |
| 1004 Handle<String> substring = |
| 1005 isolate->factory()->NewSubString(subject, start, end); |
| 1006 elements->set(i, *substring); |
| 1007 } else { |
| 1008 DCHECK(current_match[i * 2 + 1] < 0); |
| 1009 elements->set(i, isolate->heap()->undefined_value()); |
| 1010 } |
| 1011 } |
| 1012 elements->set(capture_count + 1, Smi::FromInt(match_start)); |
| 1013 elements->set(capture_count + 2, *subject); |
| 1014 builder.Add(*isolate->factory()->NewJSArrayWithElements(elements)); |
| 1015 } else { |
| 1016 builder.Add(*match); |
| 1017 } |
| 1018 } |
| 1019 } |
| 1020 |
| 1021 if (global_cache.HasException()) return isolate->heap()->exception(); |
| 1022 |
| 1023 if (match_start >= 0) { |
| 1024 // Finished matching, with at least one match. |
| 1025 if (match_end < subject_length) { |
| 1026 ReplacementStringBuilder::AddSubjectSlice(&builder, match_end, |
| 1027 subject_length); |
| 1028 } |
| 1029 |
| 1030 RegExpImpl::SetLastMatchInfo(last_match_array, subject, capture_count, |
| 1031 NULL); |
| 1032 |
| 1033 if (subject_length > kMinLengthToCache) { |
| 1034 // Store the length of the result array into the last element of the |
| 1035 // backing FixedArray. |
| 1036 builder.EnsureCapacity(1); |
| 1037 Handle<FixedArray> fixed_array = builder.array(); |
| 1038 fixed_array->set(fixed_array->length() - 1, |
| 1039 Smi::FromInt(builder.length())); |
| 1040 // Cache the result and turn the FixedArray into a COW array. |
| 1041 RegExpResultsCache::Enter(isolate, subject, |
| 1042 handle(regexp->data(), isolate), fixed_array, |
| 1043 RegExpResultsCache::REGEXP_MULTIPLE_INDICES); |
| 1044 } |
| 1045 return *builder.ToJSArray(result_array); |
| 1046 } else { |
| 1047 return isolate->heap()->null_value(); // No matches at all. |
| 1048 } |
| 1049 } |
| 1050 |
| 1051 |
| 1052 // This is only called for StringReplaceGlobalRegExpWithFunction. This sets |
| 1053 // lastMatchInfoOverride to maintain the last match info, so we don't need to |
| 1054 // set any other last match array info. |
| 1055 RUNTIME_FUNCTION(Runtime_RegExpExecMultiple) { |
| 1056 HandleScope handles(isolate); |
| 1057 DCHECK(args.length() == 4); |
| 1058 |
| 1059 CONVERT_ARG_HANDLE_CHECKED(String, subject, 1); |
| 1060 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0); |
| 1061 CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 2); |
| 1062 CONVERT_ARG_HANDLE_CHECKED(JSArray, result_array, 3); |
| 1063 RUNTIME_ASSERT(last_match_info->HasFastObjectElements()); |
| 1064 RUNTIME_ASSERT(result_array->HasFastObjectElements()); |
| 1065 |
| 1066 subject = String::Flatten(subject); |
| 1067 RUNTIME_ASSERT(regexp->GetFlags().is_global()); |
| 1068 |
| 1069 if (regexp->CaptureCount() == 0) { |
| 1070 return SearchRegExpMultiple<false>(isolate, subject, regexp, |
| 1071 last_match_info, result_array); |
| 1072 } else { |
| 1073 return SearchRegExpMultiple<true>(isolate, subject, regexp, last_match_info, |
| 1074 result_array); |
| 1075 } |
| 1076 } |
| 1077 |
| 1078 |
| 1079 RUNTIME_FUNCTION(RuntimeReference_RegExpConstructResult) { |
| 1080 SealHandleScope shs(isolate); |
| 1081 return __RT_impl_Runtime_RegExpConstructResult(args, isolate); |
| 1082 } |
| 1083 |
| 1084 |
| 1085 RUNTIME_FUNCTION(RuntimeReference_RegExpExec) { |
| 1086 SealHandleScope shs(isolate); |
| 1087 return __RT_impl_Runtime_RegExpExecRT(args, isolate); |
| 1088 } |
| 1089 |
| 1090 |
| 1091 // Perform string match of pattern on subject, starting at start index. |
| 1092 // Caller must ensure that 0 <= start_index <= sub->length(), |
| 1093 // and should check that pat->length() + start_index <= sub->length(). |
| 1094 int Runtime::StringMatch(Isolate* isolate, Handle<String> sub, |
| 1095 Handle<String> pat, int start_index) { |
| 1096 DCHECK(0 <= start_index); |
| 1097 DCHECK(start_index <= sub->length()); |
| 1098 |
| 1099 int pattern_length = pat->length(); |
| 1100 if (pattern_length == 0) return start_index; |
| 1101 |
| 1102 int subject_length = sub->length(); |
| 1103 if (start_index + pattern_length > subject_length) return -1; |
| 1104 |
| 1105 sub = String::Flatten(sub); |
| 1106 pat = String::Flatten(pat); |
| 1107 |
| 1108 DisallowHeapAllocation no_gc; // ensure vectors stay valid |
| 1109 // Extract flattened substrings of cons strings before getting encoding. |
| 1110 String::FlatContent seq_sub = sub->GetFlatContent(); |
| 1111 String::FlatContent seq_pat = pat->GetFlatContent(); |
| 1112 |
| 1113 // dispatch on type of strings |
| 1114 if (seq_pat.IsOneByte()) { |
| 1115 Vector<const uint8_t> pat_vector = seq_pat.ToOneByteVector(); |
| 1116 if (seq_sub.IsOneByte()) { |
| 1117 return SearchString(isolate, seq_sub.ToOneByteVector(), pat_vector, |
| 1118 start_index); |
| 1119 } |
| 1120 return SearchString(isolate, seq_sub.ToUC16Vector(), pat_vector, |
| 1121 start_index); |
| 1122 } |
| 1123 Vector<const uc16> pat_vector = seq_pat.ToUC16Vector(); |
| 1124 if (seq_sub.IsOneByte()) { |
| 1125 return SearchString(isolate, seq_sub.ToOneByteVector(), pat_vector, |
| 1126 start_index); |
| 1127 } |
| 1128 return SearchString(isolate, seq_sub.ToUC16Vector(), pat_vector, start_index); |
| 1129 } |
| 1130 } |
| 1131 } // namespace v8::internal |
OLD | NEW |