OLD | NEW |
---|---|
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 218 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
229 Handle<String> subject, | 229 Handle<String> subject, |
230 int index, | 230 int index, |
231 Handle<JSArray> last_match_info) { | 231 Handle<JSArray> last_match_info) { |
232 Isolate* isolate = re->GetIsolate(); | 232 Isolate* isolate = re->GetIsolate(); |
233 | 233 |
234 ASSERT(0 <= index); | 234 ASSERT(0 <= index); |
235 ASSERT(index <= subject->length()); | 235 ASSERT(index <= subject->length()); |
236 | 236 |
237 if (!subject->IsFlat()) FlattenString(subject); | 237 if (!subject->IsFlat()) FlattenString(subject); |
238 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid | 238 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid |
239 // Extract flattened substrings of cons strings before determining asciiness. | |
240 String* seq_sub = *subject; | |
241 if (seq_sub->IsConsString()) seq_sub = ConsString::cast(seq_sub)->first(); | |
242 | 239 |
243 String* needle = String::cast(re->DataAt(JSRegExp::kAtomPatternIndex)); | 240 String* needle = String::cast(re->DataAt(JSRegExp::kAtomPatternIndex)); |
Vitaly Repeshko
2011/08/19 16:27:40
We don't call needle->IsAsciiRepresentationUnderne
| |
244 int needle_len = needle->length(); | 241 int needle_len = needle->length(); |
245 | 242 |
246 if (needle_len != 0) { | 243 if (needle_len != 0) { |
247 if (index + needle_len > subject->length()) | 244 if (index + needle_len > subject->length()) |
248 return isolate->factory()->null_value(); | 245 return isolate->factory()->null_value(); |
249 | 246 |
250 // dispatch on type of strings | 247 // dispatch on type of strings |
251 index = (needle->IsAsciiRepresentation() | 248 index = (needle->IsAsciiRepresentation() |
252 ? (seq_sub->IsAsciiRepresentation() | 249 ? (subject->IsAsciiRepresentationUnderneath() |
253 ? SearchString(isolate, | 250 ? SearchString(isolate, |
254 seq_sub->ToAsciiVector(), | 251 subject->ToAsciiVector(), |
255 needle->ToAsciiVector(), | 252 needle->ToAsciiVector(), |
256 index) | 253 index) |
257 : SearchString(isolate, | 254 : SearchString(isolate, |
258 seq_sub->ToUC16Vector(), | 255 subject->ToUC16Vector(), |
259 needle->ToAsciiVector(), | 256 needle->ToAsciiVector(), |
260 index)) | 257 index)) |
261 : (seq_sub->IsAsciiRepresentation() | 258 : (subject->IsAsciiRepresentationUnderneath() |
262 ? SearchString(isolate, | 259 ? SearchString(isolate, |
263 seq_sub->ToAsciiVector(), | 260 subject->ToAsciiVector(), |
264 needle->ToUC16Vector(), | 261 needle->ToUC16Vector(), |
265 index) | 262 index) |
266 : SearchString(isolate, | 263 : SearchString(isolate, |
267 seq_sub->ToUC16Vector(), | 264 subject->ToUC16Vector(), |
268 needle->ToUC16Vector(), | 265 needle->ToUC16Vector(), |
269 index))); | 266 index))); |
270 if (index == -1) return FACTORY->null_value(); | 267 if (index == -1) return FACTORY->null_value(); |
271 } | 268 } |
272 ASSERT(last_match_info->HasFastElements()); | 269 ASSERT(last_match_info->HasFastElements()); |
273 | 270 |
274 { | 271 { |
275 NoHandleAllocation no_handles; | 272 NoHandleAllocation no_handles; |
276 FixedArray* array = FixedArray::cast(last_match_info->elements()); | 273 FixedArray* array = FixedArray::cast(last_match_info->elements()); |
277 SetAtomLastCapture(array, *subject, index, index + needle_len); | 274 SetAtomLastCapture(array, *subject, index, index + needle_len); |
(...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
348 Object* error_string = re->DataAt(JSRegExp::saved_code_index(is_ascii)); | 345 Object* error_string = re->DataAt(JSRegExp::saved_code_index(is_ascii)); |
349 ASSERT(error_string->IsString()); | 346 ASSERT(error_string->IsString()); |
350 Handle<String> error_message(String::cast(error_string)); | 347 Handle<String> error_message(String::cast(error_string)); |
351 CreateRegExpErrorObjectAndThrow(re, is_ascii, error_message, isolate); | 348 CreateRegExpErrorObjectAndThrow(re, is_ascii, error_message, isolate); |
352 return false; | 349 return false; |
353 } | 350 } |
354 | 351 |
355 JSRegExp::Flags flags = re->GetFlags(); | 352 JSRegExp::Flags flags = re->GetFlags(); |
356 | 353 |
357 Handle<String> pattern(re->Pattern()); | 354 Handle<String> pattern(re->Pattern()); |
358 if (!pattern->IsFlat()) { | 355 if (!pattern->IsFlat()) FlattenString(pattern); |
359 FlattenString(pattern); | |
360 } | |
361 | |
362 RegExpCompileData compile_data; | 356 RegExpCompileData compile_data; |
363 FlatStringReader reader(isolate, pattern); | 357 FlatStringReader reader(isolate, pattern); |
364 if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(), | 358 if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(), |
365 &compile_data)) { | 359 &compile_data)) { |
366 // Throw an exception if we fail to parse the pattern. | 360 // Throw an exception if we fail to parse the pattern. |
367 // THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once. | 361 // THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once. |
368 ThrowRegExpException(re, | 362 ThrowRegExpException(re, |
369 pattern, | 363 pattern, |
370 compile_data.error, | 364 compile_data.error, |
371 "malformed_regexp"); | 365 "malformed_regexp"); |
(...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
435 re->GetIsolate()->factory()->SetRegExpIrregexpData(re, | 429 re->GetIsolate()->factory()->SetRegExpIrregexpData(re, |
436 JSRegExp::IRREGEXP, | 430 JSRegExp::IRREGEXP, |
437 pattern, | 431 pattern, |
438 flags, | 432 flags, |
439 capture_count); | 433 capture_count); |
440 } | 434 } |
441 | 435 |
442 | 436 |
443 int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp, | 437 int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp, |
444 Handle<String> subject) { | 438 Handle<String> subject) { |
445 if (!subject->IsFlat()) { | 439 if (!subject->IsFlat()) FlattenString(subject); |
446 FlattenString(subject); | 440 |
447 } | |
448 // Check the asciiness of the underlying storage. | 441 // Check the asciiness of the underlying storage. |
449 bool is_ascii; | 442 bool is_ascii = subject->IsAsciiRepresentationUnderneath(); |
450 { | 443 if (!EnsureCompiledIrregexp(regexp, is_ascii)) return -1; |
451 AssertNoAllocation no_gc; | 444 |
452 String* sequential_string = *subject; | |
453 if (subject->IsConsString()) { | |
454 sequential_string = ConsString::cast(*subject)->first(); | |
455 } | |
456 is_ascii = sequential_string->IsAsciiRepresentation(); | |
457 } | |
458 if (!EnsureCompiledIrregexp(regexp, is_ascii)) { | |
459 return -1; | |
460 } | |
461 #ifdef V8_INTERPRETED_REGEXP | 445 #ifdef V8_INTERPRETED_REGEXP |
462 // Byte-code regexp needs space allocated for all its registers. | 446 // Byte-code regexp needs space allocated for all its registers. |
463 return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data())); | 447 return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data())); |
464 #else // V8_INTERPRETED_REGEXP | 448 #else // V8_INTERPRETED_REGEXP |
465 // Native regexp only needs room to output captures. Registers are handled | 449 // Native regexp only needs room to output captures. Registers are handled |
466 // internally. | 450 // internally. |
467 return (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2; | 451 return (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2; |
468 #endif // V8_INTERPRETED_REGEXP | 452 #endif // V8_INTERPRETED_REGEXP |
469 } | 453 } |
470 | 454 |
471 | 455 |
472 RegExpImpl::IrregexpResult RegExpImpl::IrregexpExecOnce( | 456 RegExpImpl::IrregexpResult RegExpImpl::IrregexpExecOnce( |
473 Handle<JSRegExp> regexp, | 457 Handle<JSRegExp> regexp, |
474 Handle<String> subject, | 458 Handle<String> subject, |
475 int index, | 459 int index, |
476 Vector<int> output) { | 460 Vector<int> output) { |
477 Isolate* isolate = regexp->GetIsolate(); | 461 Isolate* isolate = regexp->GetIsolate(); |
478 | 462 |
479 Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()), isolate); | 463 Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()), isolate); |
480 | 464 |
481 ASSERT(index >= 0); | 465 ASSERT(index >= 0); |
482 ASSERT(index <= subject->length()); | 466 ASSERT(index <= subject->length()); |
483 ASSERT(subject->IsFlat()); | 467 ASSERT(subject->IsFlat()); |
484 | 468 |
485 // A flat ASCII string might have a two-byte first part. | 469 bool is_ascii = subject->IsAsciiRepresentationUnderneath(); |
486 if (subject->IsConsString()) { | |
487 subject = Handle<String>(ConsString::cast(*subject)->first(), isolate); | |
488 } | |
489 | 470 |
490 #ifndef V8_INTERPRETED_REGEXP | 471 #ifndef V8_INTERPRETED_REGEXP |
491 ASSERT(output.length() >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2); | 472 ASSERT(output.length() >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2); |
492 do { | 473 do { |
493 bool is_ascii = subject->IsAsciiRepresentation(); | |
494 EnsureCompiledIrregexp(regexp, is_ascii); | 474 EnsureCompiledIrregexp(regexp, is_ascii); |
495 Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii), isolate); | 475 Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii), isolate); |
496 NativeRegExpMacroAssembler::Result res = | 476 NativeRegExpMacroAssembler::Result res = |
497 NativeRegExpMacroAssembler::Match(code, | 477 NativeRegExpMacroAssembler::Match(code, |
498 subject, | 478 subject, |
499 output.start(), | 479 output.start(), |
500 output.length(), | 480 output.length(), |
501 index, | 481 index, |
502 isolate); | 482 isolate); |
503 if (res != NativeRegExpMacroAssembler::RETRY) { | 483 if (res != NativeRegExpMacroAssembler::RETRY) { |
504 ASSERT(res != NativeRegExpMacroAssembler::EXCEPTION || | 484 ASSERT(res != NativeRegExpMacroAssembler::EXCEPTION || |
505 isolate->has_pending_exception()); | 485 isolate->has_pending_exception()); |
506 STATIC_ASSERT( | 486 STATIC_ASSERT( |
507 static_cast<int>(NativeRegExpMacroAssembler::SUCCESS) == RE_SUCCESS); | 487 static_cast<int>(NativeRegExpMacroAssembler::SUCCESS) == RE_SUCCESS); |
508 STATIC_ASSERT( | 488 STATIC_ASSERT( |
509 static_cast<int>(NativeRegExpMacroAssembler::FAILURE) == RE_FAILURE); | 489 static_cast<int>(NativeRegExpMacroAssembler::FAILURE) == RE_FAILURE); |
510 STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::EXCEPTION) | 490 STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::EXCEPTION) |
511 == RE_EXCEPTION); | 491 == RE_EXCEPTION); |
512 return static_cast<IrregexpResult>(res); | 492 return static_cast<IrregexpResult>(res); |
513 } | 493 } |
514 // If result is RETRY, the string has changed representation, and we | 494 // If result is RETRY, the string has changed representation, and we |
515 // must restart from scratch. | 495 // must restart from scratch. |
516 // In this case, it means we must make sure we are prepared to handle | 496 // In this case, it means we must make sure we are prepared to handle |
517 // the, potentially, different subject (the string can switch between | 497 // the, potentially, different subject (the string can switch between |
518 // being internal and external, and even between being ASCII and UC16, | 498 // being internal and external, and even between being ASCII and UC16, |
519 // but the characters are always the same). | 499 // but the characters are always the same). |
520 IrregexpPrepare(regexp, subject); | 500 IrregexpPrepare(regexp, subject); |
501 is_ascii = subject->IsAsciiRepresentationUnderneath(); | |
521 } while (true); | 502 } while (true); |
522 UNREACHABLE(); | 503 UNREACHABLE(); |
523 return RE_EXCEPTION; | 504 return RE_EXCEPTION; |
524 #else // V8_INTERPRETED_REGEXP | 505 #else // V8_INTERPRETED_REGEXP |
525 | 506 |
526 ASSERT(output.length() >= IrregexpNumberOfRegisters(*irregexp)); | 507 ASSERT(output.length() >= IrregexpNumberOfRegisters(*irregexp)); |
527 bool is_ascii = subject->IsAsciiRepresentation(); | |
528 // We must have done EnsureCompiledIrregexp, so we can get the number of | 508 // We must have done EnsureCompiledIrregexp, so we can get the number of |
529 // registers. | 509 // registers. |
530 int* register_vector = output.start(); | 510 int* register_vector = output.start(); |
531 int number_of_capture_registers = | 511 int number_of_capture_registers = |
532 (IrregexpNumberOfCaptures(*irregexp) + 1) * 2; | 512 (IrregexpNumberOfCaptures(*irregexp) + 1) * 2; |
533 for (int i = number_of_capture_registers - 1; i >= 0; i--) { | 513 for (int i = number_of_capture_registers - 1; i >= 0; i--) { |
534 register_vector[i] = -1; | 514 register_vector[i] = -1; |
535 } | 515 } |
536 Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_ascii), isolate); | 516 Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_ascii), isolate); |
537 | 517 |
(...skipping 4819 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
5357 } | 5337 } |
5358 | 5338 |
5359 return compiler.Assemble(¯o_assembler, | 5339 return compiler.Assemble(¯o_assembler, |
5360 node, | 5340 node, |
5361 data->capture_count, | 5341 data->capture_count, |
5362 pattern); | 5342 pattern); |
5363 } | 5343 } |
5364 | 5344 |
5365 | 5345 |
5366 }} // namespace v8::internal | 5346 }} // namespace v8::internal |
OLD | NEW |