| OLD | NEW |
| 1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 218 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 229 Handle<String> subject, | 229 Handle<String> subject, |
| 230 int index, | 230 int index, |
| 231 Handle<JSArray> last_match_info) { | 231 Handle<JSArray> last_match_info) { |
| 232 Isolate* isolate = re->GetIsolate(); | 232 Isolate* isolate = re->GetIsolate(); |
| 233 | 233 |
| 234 ASSERT(0 <= index); | 234 ASSERT(0 <= index); |
| 235 ASSERT(index <= subject->length()); | 235 ASSERT(index <= subject->length()); |
| 236 | 236 |
| 237 if (!subject->IsFlat()) FlattenString(subject); | 237 if (!subject->IsFlat()) FlattenString(subject); |
| 238 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid | 238 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid |
| 239 // Extract flattened substrings of cons strings before determining asciiness. | |
| 240 String* seq_sub = *subject; | |
| 241 if (seq_sub->IsConsString()) seq_sub = ConsString::cast(seq_sub)->first(); | |
| 242 | 239 |
| 243 String* needle = String::cast(re->DataAt(JSRegExp::kAtomPatternIndex)); | 240 String* needle = String::cast(re->DataAt(JSRegExp::kAtomPatternIndex)); |
| 241 ASSERT(StringShape(needle).IsSequential()); |
| 244 int needle_len = needle->length(); | 242 int needle_len = needle->length(); |
| 245 | 243 |
| 246 if (needle_len != 0) { | 244 if (needle_len != 0) { |
| 247 if (index + needle_len > subject->length()) | 245 if (index + needle_len > subject->length()) |
| 248 return isolate->factory()->null_value(); | 246 return isolate->factory()->null_value(); |
| 249 | 247 |
| 250 // dispatch on type of strings | 248 // dispatch on type of strings |
| 251 index = (needle->IsAsciiRepresentation() | 249 index = (needle->IsAsciiRepresentation() |
| 252 ? (seq_sub->IsAsciiRepresentation() | 250 ? (subject->IsAsciiRepresentationUnderneath() |
| 253 ? SearchString(isolate, | 251 ? SearchString(isolate, |
| 254 seq_sub->ToAsciiVector(), | 252 subject->ToAsciiVector(), |
| 255 needle->ToAsciiVector(), | 253 needle->ToAsciiVector(), |
| 256 index) | 254 index) |
| 257 : SearchString(isolate, | 255 : SearchString(isolate, |
| 258 seq_sub->ToUC16Vector(), | 256 subject->ToUC16Vector(), |
| 259 needle->ToAsciiVector(), | 257 needle->ToAsciiVector(), |
| 260 index)) | 258 index)) |
| 261 : (seq_sub->IsAsciiRepresentation() | 259 : (subject->IsAsciiRepresentationUnderneath() |
| 262 ? SearchString(isolate, | 260 ? SearchString(isolate, |
| 263 seq_sub->ToAsciiVector(), | 261 subject->ToAsciiVector(), |
| 264 needle->ToUC16Vector(), | 262 needle->ToUC16Vector(), |
| 265 index) | 263 index) |
| 266 : SearchString(isolate, | 264 : SearchString(isolate, |
| 267 seq_sub->ToUC16Vector(), | 265 subject->ToUC16Vector(), |
| 268 needle->ToUC16Vector(), | 266 needle->ToUC16Vector(), |
| 269 index))); | 267 index))); |
| 270 if (index == -1) return FACTORY->null_value(); | 268 if (index == -1) return FACTORY->null_value(); |
| 271 } | 269 } |
| 272 ASSERT(last_match_info->HasFastElements()); | 270 ASSERT(last_match_info->HasFastElements()); |
| 273 | 271 |
| 274 { | 272 { |
| 275 NoHandleAllocation no_handles; | 273 NoHandleAllocation no_handles; |
| 276 FixedArray* array = FixedArray::cast(last_match_info->elements()); | 274 FixedArray* array = FixedArray::cast(last_match_info->elements()); |
| 277 SetAtomLastCapture(array, *subject, index, index + needle_len); | 275 SetAtomLastCapture(array, *subject, index, index + needle_len); |
| (...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 348 Object* error_string = re->DataAt(JSRegExp::saved_code_index(is_ascii)); | 346 Object* error_string = re->DataAt(JSRegExp::saved_code_index(is_ascii)); |
| 349 ASSERT(error_string->IsString()); | 347 ASSERT(error_string->IsString()); |
| 350 Handle<String> error_message(String::cast(error_string)); | 348 Handle<String> error_message(String::cast(error_string)); |
| 351 CreateRegExpErrorObjectAndThrow(re, is_ascii, error_message, isolate); | 349 CreateRegExpErrorObjectAndThrow(re, is_ascii, error_message, isolate); |
| 352 return false; | 350 return false; |
| 353 } | 351 } |
| 354 | 352 |
| 355 JSRegExp::Flags flags = re->GetFlags(); | 353 JSRegExp::Flags flags = re->GetFlags(); |
| 356 | 354 |
| 357 Handle<String> pattern(re->Pattern()); | 355 Handle<String> pattern(re->Pattern()); |
| 358 if (!pattern->IsFlat()) { | 356 if (!pattern->IsFlat()) FlattenString(pattern); |
| 359 FlattenString(pattern); | |
| 360 } | |
| 361 | |
| 362 RegExpCompileData compile_data; | 357 RegExpCompileData compile_data; |
| 363 FlatStringReader reader(isolate, pattern); | 358 FlatStringReader reader(isolate, pattern); |
| 364 if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(), | 359 if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(), |
| 365 &compile_data)) { | 360 &compile_data)) { |
| 366 // Throw an exception if we fail to parse the pattern. | 361 // Throw an exception if we fail to parse the pattern. |
| 367 // THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once. | 362 // THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once. |
| 368 ThrowRegExpException(re, | 363 ThrowRegExpException(re, |
| 369 pattern, | 364 pattern, |
| 370 compile_data.error, | 365 compile_data.error, |
| 371 "malformed_regexp"); | 366 "malformed_regexp"); |
| (...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 435 re->GetIsolate()->factory()->SetRegExpIrregexpData(re, | 430 re->GetIsolate()->factory()->SetRegExpIrregexpData(re, |
| 436 JSRegExp::IRREGEXP, | 431 JSRegExp::IRREGEXP, |
| 437 pattern, | 432 pattern, |
| 438 flags, | 433 flags, |
| 439 capture_count); | 434 capture_count); |
| 440 } | 435 } |
| 441 | 436 |
| 442 | 437 |
| 443 int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp, | 438 int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp, |
| 444 Handle<String> subject) { | 439 Handle<String> subject) { |
| 445 if (!subject->IsFlat()) { | 440 if (!subject->IsFlat()) FlattenString(subject); |
| 446 FlattenString(subject); | 441 |
| 447 } | |
| 448 // Check the asciiness of the underlying storage. | 442 // Check the asciiness of the underlying storage. |
| 449 bool is_ascii; | 443 bool is_ascii = subject->IsAsciiRepresentationUnderneath(); |
| 450 { | 444 if (!EnsureCompiledIrregexp(regexp, is_ascii)) return -1; |
| 451 AssertNoAllocation no_gc; | 445 |
| 452 String* sequential_string = *subject; | |
| 453 if (subject->IsConsString()) { | |
| 454 sequential_string = ConsString::cast(*subject)->first(); | |
| 455 } | |
| 456 is_ascii = sequential_string->IsAsciiRepresentation(); | |
| 457 } | |
| 458 if (!EnsureCompiledIrregexp(regexp, is_ascii)) { | |
| 459 return -1; | |
| 460 } | |
| 461 #ifdef V8_INTERPRETED_REGEXP | 446 #ifdef V8_INTERPRETED_REGEXP |
| 462 // Byte-code regexp needs space allocated for all its registers. | 447 // Byte-code regexp needs space allocated for all its registers. |
| 463 return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data())); | 448 return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data())); |
| 464 #else // V8_INTERPRETED_REGEXP | 449 #else // V8_INTERPRETED_REGEXP |
| 465 // Native regexp only needs room to output captures. Registers are handled | 450 // Native regexp only needs room to output captures. Registers are handled |
| 466 // internally. | 451 // internally. |
| 467 return (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2; | 452 return (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2; |
| 468 #endif // V8_INTERPRETED_REGEXP | 453 #endif // V8_INTERPRETED_REGEXP |
| 469 } | 454 } |
| 470 | 455 |
| 471 | 456 |
| 472 RegExpImpl::IrregexpResult RegExpImpl::IrregexpExecOnce( | 457 RegExpImpl::IrregexpResult RegExpImpl::IrregexpExecOnce( |
| 473 Handle<JSRegExp> regexp, | 458 Handle<JSRegExp> regexp, |
| 474 Handle<String> subject, | 459 Handle<String> subject, |
| 475 int index, | 460 int index, |
| 476 Vector<int> output) { | 461 Vector<int> output) { |
| 477 Isolate* isolate = regexp->GetIsolate(); | 462 Isolate* isolate = regexp->GetIsolate(); |
| 478 | 463 |
| 479 Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()), isolate); | 464 Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()), isolate); |
| 480 | 465 |
| 481 ASSERT(index >= 0); | 466 ASSERT(index >= 0); |
| 482 ASSERT(index <= subject->length()); | 467 ASSERT(index <= subject->length()); |
| 483 ASSERT(subject->IsFlat()); | 468 ASSERT(subject->IsFlat()); |
| 484 | 469 |
| 485 // A flat ASCII string might have a two-byte first part. | 470 bool is_ascii = subject->IsAsciiRepresentationUnderneath(); |
| 486 if (subject->IsConsString()) { | |
| 487 subject = Handle<String>(ConsString::cast(*subject)->first(), isolate); | |
| 488 } | |
| 489 | 471 |
| 490 #ifndef V8_INTERPRETED_REGEXP | 472 #ifndef V8_INTERPRETED_REGEXP |
| 491 ASSERT(output.length() >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2); | 473 ASSERT(output.length() >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2); |
| 492 do { | 474 do { |
| 493 bool is_ascii = subject->IsAsciiRepresentation(); | |
| 494 EnsureCompiledIrregexp(regexp, is_ascii); | 475 EnsureCompiledIrregexp(regexp, is_ascii); |
| 495 Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii), isolate); | 476 Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii), isolate); |
| 496 NativeRegExpMacroAssembler::Result res = | 477 NativeRegExpMacroAssembler::Result res = |
| 497 NativeRegExpMacroAssembler::Match(code, | 478 NativeRegExpMacroAssembler::Match(code, |
| 498 subject, | 479 subject, |
| 499 output.start(), | 480 output.start(), |
| 500 output.length(), | 481 output.length(), |
| 501 index, | 482 index, |
| 502 isolate); | 483 isolate); |
| 503 if (res != NativeRegExpMacroAssembler::RETRY) { | 484 if (res != NativeRegExpMacroAssembler::RETRY) { |
| 504 ASSERT(res != NativeRegExpMacroAssembler::EXCEPTION || | 485 ASSERT(res != NativeRegExpMacroAssembler::EXCEPTION || |
| 505 isolate->has_pending_exception()); | 486 isolate->has_pending_exception()); |
| 506 STATIC_ASSERT( | 487 STATIC_ASSERT( |
| 507 static_cast<int>(NativeRegExpMacroAssembler::SUCCESS) == RE_SUCCESS); | 488 static_cast<int>(NativeRegExpMacroAssembler::SUCCESS) == RE_SUCCESS); |
| 508 STATIC_ASSERT( | 489 STATIC_ASSERT( |
| 509 static_cast<int>(NativeRegExpMacroAssembler::FAILURE) == RE_FAILURE); | 490 static_cast<int>(NativeRegExpMacroAssembler::FAILURE) == RE_FAILURE); |
| 510 STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::EXCEPTION) | 491 STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::EXCEPTION) |
| 511 == RE_EXCEPTION); | 492 == RE_EXCEPTION); |
| 512 return static_cast<IrregexpResult>(res); | 493 return static_cast<IrregexpResult>(res); |
| 513 } | 494 } |
| 514 // If result is RETRY, the string has changed representation, and we | 495 // If result is RETRY, the string has changed representation, and we |
| 515 // must restart from scratch. | 496 // must restart from scratch. |
| 516 // In this case, it means we must make sure we are prepared to handle | 497 // In this case, it means we must make sure we are prepared to handle |
| 517 // the, potentially, different subject (the string can switch between | 498 // the, potentially, different subject (the string can switch between |
| 518 // being internal and external, and even between being ASCII and UC16, | 499 // being internal and external, and even between being ASCII and UC16, |
| 519 // but the characters are always the same). | 500 // but the characters are always the same). |
| 520 IrregexpPrepare(regexp, subject); | 501 IrregexpPrepare(regexp, subject); |
| 502 is_ascii = subject->IsAsciiRepresentationUnderneath(); |
| 521 } while (true); | 503 } while (true); |
| 522 UNREACHABLE(); | 504 UNREACHABLE(); |
| 523 return RE_EXCEPTION; | 505 return RE_EXCEPTION; |
| 524 #else // V8_INTERPRETED_REGEXP | 506 #else // V8_INTERPRETED_REGEXP |
| 525 | 507 |
| 526 ASSERT(output.length() >= IrregexpNumberOfRegisters(*irregexp)); | 508 ASSERT(output.length() >= IrregexpNumberOfRegisters(*irregexp)); |
| 527 bool is_ascii = subject->IsAsciiRepresentation(); | |
| 528 // We must have done EnsureCompiledIrregexp, so we can get the number of | 509 // We must have done EnsureCompiledIrregexp, so we can get the number of |
| 529 // registers. | 510 // registers. |
| 530 int* register_vector = output.start(); | 511 int* register_vector = output.start(); |
| 531 int number_of_capture_registers = | 512 int number_of_capture_registers = |
| 532 (IrregexpNumberOfCaptures(*irregexp) + 1) * 2; | 513 (IrregexpNumberOfCaptures(*irregexp) + 1) * 2; |
| 533 for (int i = number_of_capture_registers - 1; i >= 0; i--) { | 514 for (int i = number_of_capture_registers - 1; i >= 0; i--) { |
| 534 register_vector[i] = -1; | 515 register_vector[i] = -1; |
| 535 } | 516 } |
| 536 Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_ascii), isolate); | 517 Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_ascii), isolate); |
| 537 | 518 |
| (...skipping 4819 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5357 } | 5338 } |
| 5358 | 5339 |
| 5359 return compiler.Assemble(¯o_assembler, | 5340 return compiler.Assemble(¯o_assembler, |
| 5360 node, | 5341 node, |
| 5361 data->capture_count, | 5342 data->capture_count, |
| 5362 pattern); | 5343 pattern); |
| 5363 } | 5344 } |
| 5364 | 5345 |
| 5365 | 5346 |
| 5366 }} // namespace v8::internal | 5347 }} // namespace v8::internal |
| OLD | NEW |