src/jsregexp.cc - Issue 559913002: Rename ascii to one-byte where applicable.

Side by Side Diff: src/jsregexp.cc

Issue 559913002: Rename ascii to one-byte where applicable. (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge

Patch Set: Created 6 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2012 the V8 project authors. All rights reserved.	1 // Copyright 2012 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "src/v8.h"	5 #include "src/v8.h"

6	6

7 #include "src/ast.h"	7 #include "src/ast.h"

8 #include "src/base/platform/platform.h"	8 #include "src/base/platform/platform.h"

9 #include "src/compilation-cache.h"	9 #include "src/compilation-cache.h"

10 #include "src/compiler.h"	10 #include "src/compiler.h"

(...skipping 272 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
283 if (index + needle_len > subject->length()) {	283 if (index + needle_len > subject->length()) {

284 return RegExpImpl::RE_FAILURE;	284 return RegExpImpl::RE_FAILURE;

285 }	285 }

286	286

287 for (int i = 0; i < output_size; i += 2) {	287 for (int i = 0; i < output_size; i += 2) {

288 String::FlatContent needle_content = needle->GetFlatContent();	288 String::FlatContent needle_content = needle->GetFlatContent();

289 String::FlatContent subject_content = subject->GetFlatContent();	289 String::FlatContent subject_content = subject->GetFlatContent();

290 DCHECK(needle_content.IsFlat());	290 DCHECK(needle_content.IsFlat());

291 DCHECK(subject_content.IsFlat());	291 DCHECK(subject_content.IsFlat());

292 // dispatch on type of strings	292 // dispatch on type of strings

293 index = (needle_content.IsAscii()	293 index =

294 ? (subject_content.IsAscii()	294 (needle_content.IsOneByte()

295 ? SearchString(isolate,	295 ? (subject_content.IsOneByte()

296 subject_content.ToOneByteVector(),	296 ? SearchString(isolate, subject_content.ToOneByteVector(),

297 needle_content.ToOneByteVector(),	297 needle_content.ToOneByteVector(), index)

298 index)	298 : SearchString(isolate, subject_content.ToUC16Vector(),

299 : SearchString(isolate,	299 needle_content.ToOneByteVector(), index))

300 subject_content.ToUC16Vector(),	300 : (subject_content.IsOneByte()

301 needle_content.ToOneByteVector(),	301 ? SearchString(isolate, subject_content.ToOneByteVector(),

302 index))	302 needle_content.ToUC16Vector(), index)

303 : (subject_content.IsAscii()	303 : SearchString(isolate, subject_content.ToUC16Vector(),

304 ? SearchString(isolate,	304 needle_content.ToUC16Vector(), index)));

305 subject_content.ToOneByteVector(),

306 needle_content.ToUC16Vector(),

307 index)

308 : SearchString(isolate,

309 subject_content.ToUC16Vector(),

310 needle_content.ToUC16Vector(),

311 index)));

312 if (index == -1) {	305 if (index == -1) {

313 return i / 2; // Return number of matches.	306 return i / 2; // Return number of matches.

314 } else {	307 } else {

315 output[i] = index;	308 output[i] = index;

316 output[i+1] = index + needle_len;	309 output[i+1] = index + needle_len;

317 index += needle_len;	310 index += needle_len;

318 }	311 }

319 }	312 }

320 return output_size / 2;	313 return output_size / 2;

321 }	314 }

(...skipping 17 matching lines...) Expand all Loading...
339 SealHandleScope shs(isolate);	332 SealHandleScope shs(isolate);

340 FixedArray* array = FixedArray::cast(last_match_info->elements());	333 FixedArray* array = FixedArray::cast(last_match_info->elements());

341 SetAtomLastCapture(array, *subject, output_registers[0], output_registers[1]);	334 SetAtomLastCapture(array, *subject, output_registers[0], output_registers[1]);

342 return last_match_info;	335 return last_match_info;

343 }	336 }

344	337

345	338

346 // Irregexp implementation.	339 // Irregexp implementation.

347	340

348 // Ensures that the regexp object contains a compiled version of the	341 // Ensures that the regexp object contains a compiled version of the

349 // source for either ASCII or non-ASCII strings.	342 // source for either one-byte or two-byte subject strings.

350 // If the compiled version doesn't already exist, it is compiled	343 // If the compiled version doesn't already exist, it is compiled

351 // from the source pattern.	344 // from the source pattern.

352 // If compilation fails, an exception is thrown and this function	345 // If compilation fails, an exception is thrown and this function

353 // returns false.	346 // returns false.

354 bool RegExpImpl::EnsureCompiledIrregexp(	347 bool RegExpImpl::EnsureCompiledIrregexp(Handle<JSRegExp> re,

355 Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii) {	348 Handle<String> sample_subject,

356 Object* compiled_code = re->DataAt(JSRegExp::code_index(is_ascii));	349 bool is_one_byte) {

	350 Object* compiled_code = re->DataAt(JSRegExp::code_index(is_one_byte));

357 #ifdef V8_INTERPRETED_REGEXP	351 #ifdef V8_INTERPRETED_REGEXP

358 if (compiled_code->IsByteArray()) return true;	352 if (compiled_code->IsByteArray()) return true;

359 #else // V8_INTERPRETED_REGEXP (RegExp native code)	353 #else // V8_INTERPRETED_REGEXP (RegExp native code)

360 if (compiled_code->IsCode()) return true;	354 if (compiled_code->IsCode()) return true;

361 #endif	355 #endif

362 // We could potentially have marked this as flushable, but have kept	356 // We could potentially have marked this as flushable, but have kept

363 // a saved version if we did not flush it yet.	357 // a saved version if we did not flush it yet.

364 Object* saved_code = re->DataAt(JSRegExp::saved_code_index(is_ascii));	358 Object* saved_code = re->DataAt(JSRegExp::saved_code_index(is_one_byte));

365 if (saved_code->IsCode()) {	359 if (saved_code->IsCode()) {

366 // Reinstate the code in the original place.	360 // Reinstate the code in the original place.

367 re->SetDataAt(JSRegExp::code_index(is_ascii), saved_code);	361 re->SetDataAt(JSRegExp::code_index(is_one_byte), saved_code);

368 DCHECK(compiled_code->IsSmi());	362 DCHECK(compiled_code->IsSmi());

369 return true;	363 return true;

370 }	364 }

371 return CompileIrregexp(re, sample_subject, is_ascii);	365 return CompileIrregexp(re, sample_subject, is_one_byte);

372 }	366 }

373	367

374	368

375 static void CreateRegExpErrorObjectAndThrow(Handle<JSRegExp> re, bool is_ascii,	369 static void CreateRegExpErrorObjectAndThrow(Handle<JSRegExp> re,

376 Handle<String> error_message,	370 Handle<String> error_message,

377 Isolate* isolate) {	371 Isolate* isolate) {

378 Factory* factory = isolate->factory();	372 Factory* factory = isolate->factory();

379 Handle<FixedArray> elements = factory->NewFixedArray(2);	373 Handle<FixedArray> elements = factory->NewFixedArray(2);

380 elements->set(0, re->Pattern());	374 elements->set(0, re->Pattern());

381 elements->set(1, *error_message);	375 elements->set(1, *error_message);

382 Handle<JSArray> array = factory->NewJSArrayWithElements(elements);	376 Handle<JSArray> array = factory->NewJSArrayWithElements(elements);

383 Handle<Object> error;	377 Handle<Object> error;

384 MaybeHandle<Object> maybe_error =	378 MaybeHandle<Object> maybe_error =

385 factory->NewSyntaxError("malformed_regexp", array);	379 factory->NewSyntaxError("malformed_regexp", array);

386 if (maybe_error.ToHandle(&error)) isolate->Throw(*error);	380 if (maybe_error.ToHandle(&error)) isolate->Throw(*error);

387 }	381 }

388	382

389	383

390 bool RegExpImpl::CompileIrregexp(Handle<JSRegExp> re,	384 bool RegExpImpl::CompileIrregexp(Handle<JSRegExp> re,

391 Handle<String> sample_subject,	385 Handle<String> sample_subject,

392 bool is_ascii) {	386 bool is_one_byte) {

393 // Compile the RegExp.	387 // Compile the RegExp.

394 Isolate* isolate = re->GetIsolate();	388 Isolate* isolate = re->GetIsolate();

395 Zone zone(isolate);	389 Zone zone(isolate);

396 PostponeInterruptsScope postpone(isolate);	390 PostponeInterruptsScope postpone(isolate);

397 // If we had a compilation error the last time this is saved at the	391 // If we had a compilation error the last time this is saved at the

398 // saved code index.	392 // saved code index.

399 Object* entry = re->DataAt(JSRegExp::code_index(is_ascii));	393 Object* entry = re->DataAt(JSRegExp::code_index(is_one_byte));

400 // When arriving here entry can only be a smi, either representing an	394 // When arriving here entry can only be a smi, either representing an

401 // uncompiled regexp, a previous compilation error, or code that has	395 // uncompiled regexp, a previous compilation error, or code that has

402 // been flushed.	396 // been flushed.

403 DCHECK(entry->IsSmi());	397 DCHECK(entry->IsSmi());

404 int entry_value = Smi::cast(entry)->value();	398 int entry_value = Smi::cast(entry)->value();

405 DCHECK(entry_value == JSRegExp::kUninitializedValue \|\|	399 DCHECK(entry_value == JSRegExp::kUninitializedValue \|\|

406 entry_value == JSRegExp::kCompilationErrorValue \|\|	400 entry_value == JSRegExp::kCompilationErrorValue \|\|

407 (entry_value < JSRegExp::kCodeAgeMask && entry_value >= 0));	401 (entry_value < JSRegExp::kCodeAgeMask && entry_value >= 0));

408	402

409 if (entry_value == JSRegExp::kCompilationErrorValue) {	403 if (entry_value == JSRegExp::kCompilationErrorValue) {

410 // A previous compilation failed and threw an error which we store in	404 // A previous compilation failed and threw an error which we store in

411 // the saved code index (we store the error message, not the actual	405 // the saved code index (we store the error message, not the actual

412 // error). Recreate the error object and throw it.	406 // error). Recreate the error object and throw it.

413 Object* error_string = re->DataAt(JSRegExp::saved_code_index(is_ascii));	407 Object* error_string = re->DataAt(JSRegExp::saved_code_index(is_one_byte));

414 DCHECK(error_string->IsString());	408 DCHECK(error_string->IsString());

415 Handle<String> error_message(String::cast(error_string));	409 Handle<String> error_message(String::cast(error_string));

416 CreateRegExpErrorObjectAndThrow(re, is_ascii, error_message, isolate);	410 CreateRegExpErrorObjectAndThrow(re, error_message, isolate);

417 return false;	411 return false;

418 }	412 }

419	413

420 JSRegExp::Flags flags = re->GetFlags();	414 JSRegExp::Flags flags = re->GetFlags();

421	415

422 Handle<String> pattern(re->Pattern());	416 Handle<String> pattern(re->Pattern());

423 pattern = String::Flatten(pattern);	417 pattern = String::Flatten(pattern);

424 RegExpCompileData compile_data;	418 RegExpCompileData compile_data;

425 FlatStringReader reader(isolate, pattern);	419 FlatStringReader reader(isolate, pattern);

426 if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(),	420 if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(),

427 &compile_data,	421 &compile_data,

428 &zone)) {	422 &zone)) {

429 // Throw an exception if we fail to parse the pattern.	423 // Throw an exception if we fail to parse the pattern.

430 // THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once.	424 // THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once.

431 USE(ThrowRegExpException(re,	425 USE(ThrowRegExpException(re,

432 pattern,	426 pattern,

433 compile_data.error,	427 compile_data.error,

434 "malformed_regexp"));	428 "malformed_regexp"));

435 return false;	429 return false;

436 }	430 }

437 RegExpEngine::CompilationResult result =	431 RegExpEngine::CompilationResult result = RegExpEngine::Compile(

438 RegExpEngine::Compile(&compile_data,	432 &compile_data, flags.is_ignore_case(), flags.is_global(),

439 flags.is_ignore_case(),	433 flags.is_multiline(), pattern, sample_subject, is_one_byte, &zone);

440 flags.is_global(),

441 flags.is_multiline(),

442 pattern,

443 sample_subject,

444 is_ascii,

445 &zone);

446 if (result.error_message != NULL) {	434 if (result.error_message != NULL) {

447 // Unable to compile regexp.	435 // Unable to compile regexp.

448 Handle<String> error_message = isolate->factory()->NewStringFromUtf8(	436 Handle<String> error_message = isolate->factory()->NewStringFromUtf8(

449 CStrVector(result.error_message)).ToHandleChecked();	437 CStrVector(result.error_message)).ToHandleChecked();

450 CreateRegExpErrorObjectAndThrow(re, is_ascii, error_message, isolate);	438 CreateRegExpErrorObjectAndThrow(re, error_message, isolate);

451 return false;	439 return false;

452 }	440 }

453	441

454 Handle<FixedArray> data = Handle<FixedArray>(FixedArray::cast(re->data()));	442 Handle<FixedArray> data = Handle<FixedArray>(FixedArray::cast(re->data()));

455 data->set(JSRegExp::code_index(is_ascii), result.code);	443 data->set(JSRegExp::code_index(is_one_byte), result.code);

456 int register_max = IrregexpMaxRegisterCount(*data);	444 int register_max = IrregexpMaxRegisterCount(*data);

457 if (result.num_registers > register_max) {	445 if (result.num_registers > register_max) {

458 SetIrregexpMaxRegisterCount(*data, result.num_registers);	446 SetIrregexpMaxRegisterCount(*data, result.num_registers);

459 }	447 }

460	448

461 return true;	449 return true;

462 }	450 }

463	451

464	452

465 int RegExpImpl::IrregexpMaxRegisterCount(FixedArray* re) {	453 int RegExpImpl::IrregexpMaxRegisterCount(FixedArray* re) {

(...skipping 10 matching lines...) Expand all Loading...
476 int RegExpImpl::IrregexpNumberOfCaptures(FixedArray* re) {	464 int RegExpImpl::IrregexpNumberOfCaptures(FixedArray* re) {

477 return Smi::cast(re->get(JSRegExp::kIrregexpCaptureCountIndex))->value();	465 return Smi::cast(re->get(JSRegExp::kIrregexpCaptureCountIndex))->value();

478 }	466 }

479	467

480	468

481 int RegExpImpl::IrregexpNumberOfRegisters(FixedArray* re) {	469 int RegExpImpl::IrregexpNumberOfRegisters(FixedArray* re) {

482 return Smi::cast(re->get(JSRegExp::kIrregexpMaxRegisterCountIndex))->value();	470 return Smi::cast(re->get(JSRegExp::kIrregexpMaxRegisterCountIndex))->value();

483 }	471 }

484	472

485	473

486 ByteArray* RegExpImpl::IrregexpByteCode(FixedArray* re, bool is_ascii) {	474 ByteArray* RegExpImpl::IrregexpByteCode(FixedArray* re, bool is_one_byte) {

487 return ByteArray::cast(re->get(JSRegExp::code_index(is_ascii)));	475 return ByteArray::cast(re->get(JSRegExp::code_index(is_one_byte)));

488 }	476 }

489	477

490	478

491 Code* RegExpImpl::IrregexpNativeCode(FixedArray* re, bool is_ascii) {	479 Code* RegExpImpl::IrregexpNativeCode(FixedArray* re, bool is_one_byte) {

492 return Code::cast(re->get(JSRegExp::code_index(is_ascii)));	480 return Code::cast(re->get(JSRegExp::code_index(is_one_byte)));

493 }	481 }

494	482

495	483

496 void RegExpImpl::IrregexpInitialize(Handle<JSRegExp> re,	484 void RegExpImpl::IrregexpInitialize(Handle<JSRegExp> re,

497 Handle<String> pattern,	485 Handle<String> pattern,

498 JSRegExp::Flags flags,	486 JSRegExp::Flags flags,

499 int capture_count) {	487 int capture_count) {

500 // Initialize compiled code entries to null.	488 // Initialize compiled code entries to null.

501 re->GetIsolate()->factory()->SetRegExpIrregexpData(re,	489 re->GetIsolate()->factory()->SetRegExpIrregexpData(re,

502 JSRegExp::IRREGEXP,	490 JSRegExp::IRREGEXP,

503 pattern,	491 pattern,

504 flags,	492 flags,

505 capture_count);	493 capture_count);

506 }	494 }

507	495

508	496

509 int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp,	497 int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp,

510 Handle<String> subject) {	498 Handle<String> subject) {

511 subject = String::Flatten(subject);	499 subject = String::Flatten(subject);

512	500

513 // Check the asciiness of the underlying storage.	501 // Check representation of the underlying storage.

514 bool is_ascii = subject->IsOneByteRepresentationUnderneath();	502 bool is_one_byte = subject->IsOneByteRepresentationUnderneath();

515 if (!EnsureCompiledIrregexp(regexp, subject, is_ascii)) return -1;	503 if (!EnsureCompiledIrregexp(regexp, subject, is_one_byte)) return -1;

516	504

517 #ifdef V8_INTERPRETED_REGEXP	505 #ifdef V8_INTERPRETED_REGEXP

518 // Byte-code regexp needs space allocated for all its registers.	506 // Byte-code regexp needs space allocated for all its registers.

519 // The result captures are copied to the start of the registers array	507 // The result captures are copied to the start of the registers array

520 // if the match succeeds. This way those registers are not clobbered	508 // if the match succeeds. This way those registers are not clobbered

521 // when we set the last match info from last successful match.	509 // when we set the last match info from last successful match.

522 return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data())) +	510 return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data())) +

523 (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2;	511 (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2;

524 #else // V8_INTERPRETED_REGEXP	512 #else // V8_INTERPRETED_REGEXP

525 // Native regexp only needs room to output captures. Registers are handled	513 // Native regexp only needs room to output captures. Registers are handled

526 // internally.	514 // internally.

527 return (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2;	515 return (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2;

528 #endif // V8_INTERPRETED_REGEXP	516 #endif // V8_INTERPRETED_REGEXP

529 }	517 }

530	518

531	519

532 int RegExpImpl::IrregexpExecRaw(Handle<JSRegExp> regexp,	520 int RegExpImpl::IrregexpExecRaw(Handle<JSRegExp> regexp,

533 Handle<String> subject,	521 Handle<String> subject,

534 int index,	522 int index,

535 int32_t* output,	523 int32_t* output,

536 int output_size) {	524 int output_size) {

537 Isolate* isolate = regexp->GetIsolate();	525 Isolate* isolate = regexp->GetIsolate();

538	526

539 Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()), isolate);	527 Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()), isolate);

540	528

541 DCHECK(index >= 0);	529 DCHECK(index >= 0);

542 DCHECK(index <= subject->length());	530 DCHECK(index <= subject->length());

543 DCHECK(subject->IsFlat());	531 DCHECK(subject->IsFlat());

544	532

545 bool is_ascii = subject->IsOneByteRepresentationUnderneath();	533 bool is_one_byte = subject->IsOneByteRepresentationUnderneath();

546	534

547 #ifndef V8_INTERPRETED_REGEXP	535 #ifndef V8_INTERPRETED_REGEXP

548 DCHECK(output_size >= (IrregexpNumberOfCaptures(irregexp) + 1) 2);	536 DCHECK(output_size >= (IrregexpNumberOfCaptures(irregexp) + 1) 2);

549 do {	537 do {

550 EnsureCompiledIrregexp(regexp, subject, is_ascii);	538 EnsureCompiledIrregexp(regexp, subject, is_one_byte);

551 Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii), isolate);	539 Handle<Code> code(IrregexpNativeCode(*irregexp, is_one_byte), isolate);

552 // The stack is used to allocate registers for the compiled regexp code.	540 // The stack is used to allocate registers for the compiled regexp code.

553 // This means that in case of failure, the output registers array is left	541 // This means that in case of failure, the output registers array is left

554 // untouched and contains the capture results from the previous successful	542 // untouched and contains the capture results from the previous successful

555 // match. We can use that to set the last match info lazily.	543 // match. We can use that to set the last match info lazily.

556 NativeRegExpMacroAssembler::Result res =	544 NativeRegExpMacroAssembler::Result res =

557 NativeRegExpMacroAssembler::Match(code,	545 NativeRegExpMacroAssembler::Match(code,

558 subject,	546 subject,

559 output,	547 output,

560 output_size,	548 output_size,

561 index,	549 index,

562 isolate);	550 isolate);

563 if (res != NativeRegExpMacroAssembler::RETRY) {	551 if (res != NativeRegExpMacroAssembler::RETRY) {

564 DCHECK(res != NativeRegExpMacroAssembler::EXCEPTION \|\|	552 DCHECK(res != NativeRegExpMacroAssembler::EXCEPTION \|\|

565 isolate->has_pending_exception());	553 isolate->has_pending_exception());

566 STATIC_ASSERT(	554 STATIC_ASSERT(

567 static_cast<int>(NativeRegExpMacroAssembler::SUCCESS) == RE_SUCCESS);	555 static_cast<int>(NativeRegExpMacroAssembler::SUCCESS) == RE_SUCCESS);

568 STATIC_ASSERT(	556 STATIC_ASSERT(

569 static_cast<int>(NativeRegExpMacroAssembler::FAILURE) == RE_FAILURE);	557 static_cast<int>(NativeRegExpMacroAssembler::FAILURE) == RE_FAILURE);

570 STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::EXCEPTION)	558 STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::EXCEPTION)

571 == RE_EXCEPTION);	559 == RE_EXCEPTION);

572 return static_cast<IrregexpResult>(res);	560 return static_cast<IrregexpResult>(res);

573 }	561 }

574 // If result is RETRY, the string has changed representation, and we	562 // If result is RETRY, the string has changed representation, and we

575 // must restart from scratch.	563 // must restart from scratch.

576 // In this case, it means we must make sure we are prepared to handle	564 // In this case, it means we must make sure we are prepared to handle

577 // the, potentially, different subject (the string can switch between	565 // the, potentially, different subject (the string can switch between

578 // being internal and external, and even between being ASCII and UC16,	566 // being internal and external, and even between being Latin1 and UC16,

579 // but the characters are always the same).	567 // but the characters are always the same).

580 IrregexpPrepare(regexp, subject);	568 IrregexpPrepare(regexp, subject);

581 is_ascii = subject->IsOneByteRepresentationUnderneath();	569 is_one_byte = subject->IsOneByteRepresentationUnderneath();

582 } while (true);	570 } while (true);

583 UNREACHABLE();	571 UNREACHABLE();

584 return RE_EXCEPTION;	572 return RE_EXCEPTION;

585 #else // V8_INTERPRETED_REGEXP	573 #else // V8_INTERPRETED_REGEXP

586	574

587 DCHECK(output_size >= IrregexpNumberOfRegisters(*irregexp));	575 DCHECK(output_size >= IrregexpNumberOfRegisters(*irregexp));

588 // We must have done EnsureCompiledIrregexp, so we can get the number of	576 // We must have done EnsureCompiledIrregexp, so we can get the number of

589 // registers.	577 // registers.

590 int number_of_capture_registers =	578 int number_of_capture_registers =

591 (IrregexpNumberOfCaptures(irregexp) + 1) 2;	579 (IrregexpNumberOfCaptures(irregexp) + 1) 2;

592 int32_t* raw_output = &output[number_of_capture_registers];	580 int32_t* raw_output = &output[number_of_capture_registers];

593 // We do not touch the actual capture result registers until we know there	581 // We do not touch the actual capture result registers until we know there

594 // has been a match so that we can use those capture results to set the	582 // has been a match so that we can use those capture results to set the

595 // last match info.	583 // last match info.

596 for (int i = number_of_capture_registers - 1; i >= 0; i--) {	584 for (int i = number_of_capture_registers - 1; i >= 0; i--) {

597 raw_output[i] = -1;	585 raw_output[i] = -1;

598 }	586 }

599 Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_ascii), isolate);	587 Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_one_byte),

	588 isolate);

600	589

601 IrregexpResult result = IrregexpInterpreter::Match(isolate,	590 IrregexpResult result = IrregexpInterpreter::Match(isolate,

602 byte_codes,	591 byte_codes,

603 subject,	592 subject,

604 raw_output,	593 raw_output,

605 index);	594 index);

606 if (result == RE_SUCCESS) {	595 if (result == RE_SUCCESS) {

607 // Copy capture results to the start of the registers array.	596 // Copy capture results to the start of the registers array.

608 MemCopy(output, raw_output, number_of_capture_registers * sizeof(int32_t));	597 MemCopy(output, raw_output, number_of_capture_registers * sizeof(int32_t));

609 }	598 }

(...skipping 380 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
990	979

991	980

992 private:	981 private:

993 CharacterFrequency frequencies_[RegExpMacroAssembler::kTableSize];	982 CharacterFrequency frequencies_[RegExpMacroAssembler::kTableSize];

994 int total_samples_;	983 int total_samples_;

995 };	984 };

996	985

997	986

998 class RegExpCompiler {	987 class RegExpCompiler {

999 public:	988 public:

1000 RegExpCompiler(int capture_count, bool ignore_case, bool is_ascii,	989 RegExpCompiler(int capture_count, bool ignore_case, bool is_one_byte,

1001 Zone* zone);	990 Zone* zone);

1002	991

1003 int AllocateRegister() {	992 int AllocateRegister() {

1004 if (next_register_ >= RegExpMacroAssembler::kMaxRegister) {	993 if (next_register_ >= RegExpMacroAssembler::kMaxRegister) {

1005 reg_exp_too_big_ = true;	994 reg_exp_too_big_ = true;

1006 return next_register_;	995 return next_register_;

1007 }	996 }

1008 return next_register_++;	997 return next_register_++;

1009 }	998 }

1010	999

(...skipping 12 matching lines...) Expand all Loading...
1023 EndNode* accept() { return accept_; }	1012 EndNode* accept() { return accept_; }

1024	1013

1025 static const int kMaxRecursion = 100;	1014 static const int kMaxRecursion = 100;

1026 inline int recursion_depth() { return recursion_depth_; }	1015 inline int recursion_depth() { return recursion_depth_; }

1027 inline void IncrementRecursionDepth() { recursion_depth_++; }	1016 inline void IncrementRecursionDepth() { recursion_depth_++; }

1028 inline void DecrementRecursionDepth() { recursion_depth_--; }	1017 inline void DecrementRecursionDepth() { recursion_depth_--; }

1029	1018

1030 void SetRegExpTooBig() { reg_exp_too_big_ = true; }	1019 void SetRegExpTooBig() { reg_exp_too_big_ = true; }

1031	1020

1032 inline bool ignore_case() { return ignore_case_; }	1021 inline bool ignore_case() { return ignore_case_; }

1033 inline bool ascii() { return ascii_; }	1022 inline bool one_byte() { return one_byte_; }

1034 FrequencyCollator* frequency_collator() { return &frequency_collator_; }	1023 FrequencyCollator* frequency_collator() { return &frequency_collator_; }

1035	1024

1036 int current_expansion_factor() { return current_expansion_factor_; }	1025 int current_expansion_factor() { return current_expansion_factor_; }

1037 void set_current_expansion_factor(int value) {	1026 void set_current_expansion_factor(int value) {

1038 current_expansion_factor_ = value;	1027 current_expansion_factor_ = value;

1039 }	1028 }

1040	1029

1041 Zone* zone() const { return zone_; }	1030 Zone* zone() const { return zone_; }

1042	1031

1043 static const int kNoRegister = -1;	1032 static const int kNoRegister = -1;

1044	1033

1045 private:	1034 private:

1046 EndNode* accept_;	1035 EndNode* accept_;

1047 int next_register_;	1036 int next_register_;

1048 List<RegExpNode> work_list_;	1037 List<RegExpNode> work_list_;

1049 int recursion_depth_;	1038 int recursion_depth_;

1050 RegExpMacroAssembler* macro_assembler_;	1039 RegExpMacroAssembler* macro_assembler_;

1051 bool ignore_case_;	1040 bool ignore_case_;

1052 bool ascii_;	1041 bool one_byte_;

1053 bool reg_exp_too_big_;	1042 bool reg_exp_too_big_;

1054 int current_expansion_factor_;	1043 int current_expansion_factor_;

1055 FrequencyCollator frequency_collator_;	1044 FrequencyCollator frequency_collator_;

1056 Zone* zone_;	1045 Zone* zone_;

1057 };	1046 };

1058	1047

1059	1048

1060 class RecursionCheck {	1049 class RecursionCheck {

1061 public:	1050 public:

1062 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) {	1051 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) {

1063 compiler->IncrementRecursionDepth();	1052 compiler->IncrementRecursionDepth();

1064 }	1053 }

1065 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); }	1054 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); }

1066 private:	1055 private:

1067 RegExpCompiler* compiler_;	1056 RegExpCompiler* compiler_;

1068 };	1057 };

1069	1058

1070	1059

1071 static RegExpEngine::CompilationResult IrregexpRegExpTooBig(Isolate* isolate) {	1060 static RegExpEngine::CompilationResult IrregexpRegExpTooBig(Isolate* isolate) {

1072 return RegExpEngine::CompilationResult(isolate, "RegExp too big");	1061 return RegExpEngine::CompilationResult(isolate, "RegExp too big");

1073 }	1062 }

1074	1063

1075	1064

1076 // Attempts to compile the regexp using an Irregexp code generator. Returns	1065 // Attempts to compile the regexp using an Irregexp code generator. Returns

1077 // a fixed array or a null handle depending on whether it succeeded.	1066 // a fixed array or a null handle depending on whether it succeeded.

1078 RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case, bool ascii,	1067 RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case,

1079 Zone* zone)	1068 bool one_byte, Zone* zone)

1080 : next_register_(2 * (capture_count + 1)),	1069 : next_register_(2 * (capture_count + 1)),

1081 work_list_(NULL),	1070 work_list_(NULL),

1082 recursion_depth_(0),	1071 recursion_depth_(0),

1083 ignore_case_(ignore_case),	1072 ignore_case_(ignore_case),

1084 ascii_(ascii),	1073 one_byte_(one_byte),

1085 reg_exp_too_big_(false),	1074 reg_exp_too_big_(false),

1086 current_expansion_factor_(1),	1075 current_expansion_factor_(1),

1087 frequency_collator_(),	1076 frequency_collator_(),

1088 zone_(zone) {	1077 zone_(zone) {

1089 accept_ = new(zone) EndNode(EndNode::ACCEPT, zone);	1078 accept_ = new(zone) EndNode(EndNode::ACCEPT, zone);

1090 DCHECK(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister);	1079 DCHECK(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister);

1091 }	1080 }

1092	1081

1093	1082

1094 RegExpEngine::CompilationResult RegExpCompiler::Assemble(	1083 RegExpEngine::CompilationResult RegExpCompiler::Assemble(

(...skipping 490 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1585 macro_assembler->IfRegisterLT(guard->reg(),	1574 macro_assembler->IfRegisterLT(guard->reg(),

1586 guard->value(),	1575 guard->value(),

1587 trace->backtrack());	1576 trace->backtrack());

1588 break;	1577 break;

1589 }	1578 }

1590 }	1579 }

1591	1580

1592	1581

1593 // Returns the number of characters in the equivalence class, omitting those	1582 // Returns the number of characters in the equivalence class, omitting those

1594 // that cannot occur in the source string because it is ASCII.	1583 // that cannot occur in the source string because it is ASCII.

1595 static int GetCaseIndependentLetters(Isolate* isolate,	1584 static int GetCaseIndependentLetters(Isolate* isolate, uc16 character,

1596 uc16 character,	1585 bool one_byte_subject,

1597 bool ascii_subject,

1598 unibrow::uchar* letters) {	1586 unibrow::uchar* letters) {

1599 int length =	1587 int length =

1600 isolate->jsregexp_uncanonicalize()->get(character, '\0', letters);	1588 isolate->jsregexp_uncanonicalize()->get(character, '\0', letters);

1601 // Unibrow returns 0 or 1 for characters where case independence is	1589 // Unibrow returns 0 or 1 for characters where case independence is

1602 // trivial.	1590 // trivial.

1603 if (length == 0) {	1591 if (length == 0) {

1604 letters[0] = character;	1592 letters[0] = character;

1605 length = 1;	1593 length = 1;

1606 }	1594 }

1607 if (!ascii_subject \|\| character <= String::kMaxOneByteCharCode) {	1595 if (!one_byte_subject \|\| character <= String::kMaxOneByteCharCode) {

1608 return length;	1596 return length;

1609 }	1597 }

	1598

1610 // The standard requires that non-ASCII characters cannot have ASCII	1599 // The standard requires that non-ASCII characters cannot have ASCII

1611 // character codes in their equivalence class.	1600 // character codes in their equivalence class.

	1601 // TODO(dcarney): issue 3550 this is not actually true for Latin1 anymore,

	1602 // is it? For example, \u00C5 is equivalent to \u212B.
	Yang 2014/09/10 08:26:36 This is one of the TODOs I mentioned. This is one of the TODOs I mentioned. dcarney 2014/09/10 09:35:12 I checked other browsers I think originally, and w Show quoted text On 2014/09/10 08:26:36, Yang wrote: > This is one of the TODOs I mentioned. I checked other browsers I think originally, and we were matching behaviour. Will check it out when I'm back
1612 return 0;	1603 return 0;

1613 }	1604 }

1614	1605

1615	1606

1616 static inline bool EmitSimpleCharacter(Isolate* isolate,	1607 static inline bool EmitSimpleCharacter(Isolate* isolate,

1617 RegExpCompiler* compiler,	1608 RegExpCompiler* compiler,

1618 uc16 c,	1609 uc16 c,

1619 Label* on_failure,	1610 Label* on_failure,

1620 int cp_offset,	1611 int cp_offset,

1621 bool check,	1612 bool check,

(...skipping 15 matching lines...) Expand all Loading...
1637 // Only emits non-letters (things that don't have case). Only used for case	1628 // Only emits non-letters (things that don't have case). Only used for case

1638 // independent matches.	1629 // independent matches.

1639 static inline bool EmitAtomNonLetter(Isolate* isolate,	1630 static inline bool EmitAtomNonLetter(Isolate* isolate,

1640 RegExpCompiler* compiler,	1631 RegExpCompiler* compiler,

1641 uc16 c,	1632 uc16 c,

1642 Label* on_failure,	1633 Label* on_failure,

1643 int cp_offset,	1634 int cp_offset,

1644 bool check,	1635 bool check,

1645 bool preloaded) {	1636 bool preloaded) {

1646 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();	1637 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();

1647 bool ascii = compiler->ascii();	1638 bool one_byte = compiler->one_byte();

1648 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];	1639 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];

1649 int length = GetCaseIndependentLetters(isolate, c, ascii, chars);	1640 int length = GetCaseIndependentLetters(isolate, c, one_byte, chars);

1650 if (length < 1) {	1641 if (length < 1) {

1651 // This can't match. Must be an ASCII subject and a non-ASCII character.	1642 // This can't match. Must be an one-byte subject and a non-one-byte

1652 // We do not need to do anything since the ASCII pass already handled this.	1643 // character. We do not need to do anything since the one-byte pass

	1644 // already handled this.

1653 return false; // Bounds not checked.	1645 return false; // Bounds not checked.

1654 }	1646 }

1655 bool checked = false;	1647 bool checked = false;

1656 // We handle the length > 1 case in a later pass.	1648 // We handle the length > 1 case in a later pass.

1657 if (length == 1) {	1649 if (length == 1) {

1658 if (ascii && c > String::kMaxOneByteCharCodeU) {	1650 if (one_byte && c > String::kMaxOneByteCharCodeU) {

1659 // Can't match - see above.	1651 // Can't match - see above.

1660 return false; // Bounds not checked.	1652 return false; // Bounds not checked.

1661 }	1653 }

1662 if (!preloaded) {	1654 if (!preloaded) {

1663 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);	1655 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);

1664 checked = check;	1656 checked = check;

1665 }	1657 }

1666 macro_assembler->CheckNotCharacter(c, on_failure);	1658 macro_assembler->CheckNotCharacter(c, on_failure);

1667 }	1659 }

1668 return checked;	1660 return checked;

1669 }	1661 }

1670	1662

1671	1663

1672 static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,	1664 static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,

1673 bool ascii,	1665 bool one_byte, uc16 c1, uc16 c2,

1674 uc16 c1,

1675 uc16 c2,

1676 Label* on_failure) {	1666 Label* on_failure) {

1677 uc16 char_mask;	1667 uc16 char_mask;

1678 if (ascii) {	1668 if (one_byte) {

1679 char_mask = String::kMaxOneByteCharCode;	1669 char_mask = String::kMaxOneByteCharCode;

1680 } else {	1670 } else {

1681 char_mask = String::kMaxUtf16CodeUnit;	1671 char_mask = String::kMaxUtf16CodeUnit;

1682 }	1672 }

1683 uc16 exor = c1 ^ c2;	1673 uc16 exor = c1 ^ c2;

1684 // Check whether exor has only one bit set.	1674 // Check whether exor has only one bit set.

1685 if (((exor - 1) & exor) == 0) {	1675 if (((exor - 1) & exor) == 0) {

1686 // If c1 and c2 differ only by one bit.	1676 // If c1 and c2 differ only by one bit.

1687 // Ecma262UnCanonicalize always gives the highest number last.	1677 // Ecma262UnCanonicalize always gives the highest number last.

1688 DCHECK(c2 > c1);	1678 DCHECK(c2 > c1);

(...skipping 30 matching lines...) Expand all Loading...
1719 // Only emits letters (things that have case). Only used for case independent	1709 // Only emits letters (things that have case). Only used for case independent

1720 // matches.	1710 // matches.

1721 static inline bool EmitAtomLetter(Isolate* isolate,	1711 static inline bool EmitAtomLetter(Isolate* isolate,

1722 RegExpCompiler* compiler,	1712 RegExpCompiler* compiler,

1723 uc16 c,	1713 uc16 c,

1724 Label* on_failure,	1714 Label* on_failure,

1725 int cp_offset,	1715 int cp_offset,

1726 bool check,	1716 bool check,

1727 bool preloaded) {	1717 bool preloaded) {

1728 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();	1718 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();

1729 bool ascii = compiler->ascii();	1719 bool one_byte = compiler->one_byte();

1730 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];	1720 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];

1731 int length = GetCaseIndependentLetters(isolate, c, ascii, chars);	1721 int length = GetCaseIndependentLetters(isolate, c, one_byte, chars);

1732 if (length <= 1) return false;	1722 if (length <= 1) return false;

1733 // We may not need to check against the end of the input string	1723 // We may not need to check against the end of the input string

1734 // if this character lies before a character that matched.	1724 // if this character lies before a character that matched.

1735 if (!preloaded) {	1725 if (!preloaded) {

1736 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);	1726 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);

1737 }	1727 }

1738 Label ok;	1728 Label ok;

1739 DCHECK(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4);	1729 DCHECK(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4);

1740 switch (length) {	1730 switch (length) {

1741 case 2: {	1731 case 2: {

1742 if (ShortCutEmitCharacterPair(macro_assembler,	1732 if (ShortCutEmitCharacterPair(macro_assembler, one_byte, chars[0],

1743 ascii,	1733 chars[1], on_failure)) {

1744 chars[0],

1745 chars[1],

1746 on_failure)) {

1747 } else {	1734 } else {

1748 macro_assembler->CheckCharacter(chars[0], &ok);	1735 macro_assembler->CheckCharacter(chars[0], &ok);

1749 macro_assembler->CheckNotCharacter(chars[1], on_failure);	1736 macro_assembler->CheckNotCharacter(chars[1], on_failure);

1750 macro_assembler->Bind(&ok);	1737 macro_assembler->Bind(&ok);

1751 }	1738 }

1752 break;	1739 break;

1753 }	1740 }

1754 case 4:	1741 case 4:

1755 macro_assembler->CheckCharacter(chars[3], &ok);	1742 macro_assembler->CheckCharacter(chars[3], &ok);

1756 // Fall through!	1743 // Fall through!

(...skipping 154 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1911	1898

1912 *new_start_index = start_index;	1899 *new_start_index = start_index;

1913 *border = (ranges->at(start_index) & ~kMask) + kSize;	1900 *border = (ranges->at(start_index) & ~kMask) + kSize;

1914 while (*new_start_index < end_index) {	1901 while (*new_start_index < end_index) {

1915 if (ranges->at(new_start_index) > border) break;	1902 if (ranges->at(new_start_index) > border) break;

1916 (*new_start_index)++;	1903 (*new_start_index)++;

1917 }	1904 }

1918 // new_start_index is the index of the first edge that is beyond the	1905 // new_start_index is the index of the first edge that is beyond the

1919 // current kSize space.	1906 // current kSize space.

1920	1907

1921 // For very large search spaces we do a binary chop search of the non-ASCII	1908 // For very large search spaces we do a binary chop search of the non-Latin1

1922 // space instead of just going to the end of the current kSize space. The	1909 // space instead of just going to the end of the current kSize space. The

1923 // heuristics are complicated a little by the fact that any 128-character	1910 // heuristics are complicated a little by the fact that any 128-character

1924 // encoding space can be quickly tested with a table lookup, so we don't	1911 // encoding space can be quickly tested with a table lookup, so we don't

1925 // wish to do binary chop search at a smaller granularity than that. A	1912 // wish to do binary chop search at a smaller granularity than that. A

1926 // 128-character space can take up a lot of space in the ranges array if,	1913 // 128-character space can take up a lot of space in the ranges array if,

1927 // for example, we only want to match every second character (eg. the lower	1914 // for example, we only want to match every second character (eg. the lower

1928 // case characters on some Unicode pages).	1915 // case characters on some Unicode pages).

1929 int binary_chop_index = (end_index + start_index) / 2;	1916 int binary_chop_index = (end_index + start_index) / 2;

1930 // The first test ensures that we get to the code that handles the ASCII	1917 // The first test ensures that we get to the code that handles the Latin1

1931 // range with a single not-taken branch, speeding up this important	1918 // range with a single not-taken branch, speeding up this important

1932 // character range (even non-ASCII charset-based text has spaces and	1919 // character range (even non-Latin1 charset-based text has spaces and

1933 // punctuation).	1920 // punctuation).

1934 if (*border - 1 > String::kMaxOneByteCharCode && // ASCII case.	1921 if (*border - 1 > String::kMaxOneByteCharCode && // Latin1 case.

1935 end_index - start_index > (new_start_index - start_index) 2 &&	1922 end_index - start_index > (new_start_index - start_index) 2 &&

1936 last - first > kSize * 2 &&	1923 last - first > kSize * 2 && binary_chop_index > *new_start_index &&

1937 binary_chop_index > *new_start_index &&

1938 ranges->at(binary_chop_index) >= first + 2 * kSize) {	1924 ranges->at(binary_chop_index) >= first + 2 * kSize) {

1939 int scan_forward_for_section_border = binary_chop_index;;	1925 int scan_forward_for_section_border = binary_chop_index;;

1940 int new_border = (ranges->at(binary_chop_index) \| kMask) + 1;	1926 int new_border = (ranges->at(binary_chop_index) \| kMask) + 1;

1941	1927

1942 while (scan_forward_for_section_border < end_index) {	1928 while (scan_forward_for_section_border < end_index) {

1943 if (ranges->at(scan_forward_for_section_border) > new_border) {	1929 if (ranges->at(scan_forward_for_section_border) > new_border) {

1944 *new_start_index = scan_forward_for_section_border;	1930 *new_start_index = scan_forward_for_section_border;

1945 *border = new_border;	1931 *border = new_border;

1946 break;	1932 break;

1947 }	1933 }

(...skipping 166 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2114 border,	2100 border,

2115 max_char,	2101 max_char,

2116 &dummy,	2102 &dummy,

2117 flip ? odd_label : even_label,	2103 flip ? odd_label : even_label,

2118 flip ? even_label : odd_label);	2104 flip ? even_label : odd_label);

2119 }	2105 }

2120 }	2106 }

2121	2107

2122	2108

2123 static void EmitCharClass(RegExpMacroAssembler* macro_assembler,	2109 static void EmitCharClass(RegExpMacroAssembler* macro_assembler,

2124 RegExpCharacterClass* cc,	2110 RegExpCharacterClass* cc, bool one_byte,

2125 bool ascii,	2111 Label* on_failure, int cp_offset, bool check_offset,

2126 Label* on_failure,	2112 bool preloaded, Zone* zone) {

2127 int cp_offset,

2128 bool check_offset,

2129 bool preloaded,

2130 Zone* zone) {

2131 ZoneList<CharacterRange>* ranges = cc->ranges(zone);	2113 ZoneList<CharacterRange>* ranges = cc->ranges(zone);

2132 if (!CharacterRange::IsCanonical(ranges)) {	2114 if (!CharacterRange::IsCanonical(ranges)) {

2133 CharacterRange::Canonicalize(ranges);	2115 CharacterRange::Canonicalize(ranges);

2134 }	2116 }

2135	2117

2136 int max_char;	2118 int max_char;

2137 if (ascii) {	2119 if (one_byte) {

2138 max_char = String::kMaxOneByteCharCode;	2120 max_char = String::kMaxOneByteCharCode;

2139 } else {	2121 } else {

2140 max_char = String::kMaxUtf16CodeUnit;	2122 max_char = String::kMaxUtf16CodeUnit;

2141 }	2123 }

2142	2124

2143 int range_count = ranges->length();	2125 int range_count = ranges->length();

2144	2126

2145 int last_valid_range = range_count - 1;	2127 int last_valid_range = range_count - 1;

2146 while (last_valid_range >= 0) {	2128 while (last_valid_range >= 0) {

2147 CharacterRange& range = ranges->at(last_valid_range);	2129 CharacterRange& range = ranges->at(last_valid_range);

(...skipping 309 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2457 bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler,	2439 bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler,

2458 Trace* trace,	2440 Trace* trace,

2459 bool preload_has_checked_bounds,	2441 bool preload_has_checked_bounds,

2460 Label* on_possible_success,	2442 Label* on_possible_success,

2461 QuickCheckDetails* details,	2443 QuickCheckDetails* details,

2462 bool fall_through_on_failure) {	2444 bool fall_through_on_failure) {

2463 if (details->characters() == 0) return false;	2445 if (details->characters() == 0) return false;

2464 GetQuickCheckDetails(	2446 GetQuickCheckDetails(

2465 details, compiler, 0, trace->at_start() == Trace::FALSE_VALUE);	2447 details, compiler, 0, trace->at_start() == Trace::FALSE_VALUE);

2466 if (details->cannot_match()) return false;	2448 if (details->cannot_match()) return false;

2467 if (!details->Rationalize(compiler->ascii())) return false;	2449 if (!details->Rationalize(compiler->one_byte())) return false;

2468 DCHECK(details->characters() == 1 \|\|	2450 DCHECK(details->characters() == 1 \|\|

2469 compiler->macro_assembler()->CanReadUnaligned());	2451 compiler->macro_assembler()->CanReadUnaligned());

2470 uint32_t mask = details->mask();	2452 uint32_t mask = details->mask();

2471 uint32_t value = details->value();	2453 uint32_t value = details->value();

2472	2454

2473 RegExpMacroAssembler* assembler = compiler->macro_assembler();	2455 RegExpMacroAssembler* assembler = compiler->macro_assembler();

2474	2456

2475 if (trace->characters_preloaded() != details->characters()) {	2457 if (trace->characters_preloaded() != details->characters()) {

2476 assembler->LoadCurrentCharacter(trace->cp_offset(),	2458 assembler->LoadCurrentCharacter(trace->cp_offset(),

2477 trace->backtrack(),	2459 trace->backtrack(),

2478 !preload_has_checked_bounds,	2460 !preload_has_checked_bounds,

2479 details->characters());	2461 details->characters());

2480 }	2462 }

2481	2463

2482	2464

2483 bool need_mask = true;	2465 bool need_mask = true;

2484	2466

2485 if (details->characters() == 1) {	2467 if (details->characters() == 1) {

2486 // If number of characters preloaded is 1 then we used a byte or 16 bit	2468 // If number of characters preloaded is 1 then we used a byte or 16 bit

2487 // load so the value is already masked down.	2469 // load so the value is already masked down.

2488 uint32_t char_mask;	2470 uint32_t char_mask;

2489 if (compiler->ascii()) {	2471 if (compiler->one_byte()) {

2490 char_mask = String::kMaxOneByteCharCode;	2472 char_mask = String::kMaxOneByteCharCode;

2491 } else {	2473 } else {

2492 char_mask = String::kMaxUtf16CodeUnit;	2474 char_mask = String::kMaxUtf16CodeUnit;

2493 }	2475 }

2494 if ((mask & char_mask) == char_mask) need_mask = false;	2476 if ((mask & char_mask) == char_mask) need_mask = false;

2495 mask &= char_mask;	2477 mask &= char_mask;

2496 } else {	2478 } else {

2497 // For 2-character preloads in ASCII mode or 1-character preloads in	2479 // For 2-character preloads in one-byte mode or 1-character preloads in

2498 // TWO_BYTE mode we also use a 16 bit load with zero extend.	2480 // two-byte mode we also use a 16 bit load with zero extend.

2499 if (details->characters() == 2 && compiler->ascii()) {	2481 if (details->characters() == 2 && compiler->one_byte()) {

2500 if ((mask & 0xffff) == 0xffff) need_mask = false;	2482 if ((mask & 0xffff) == 0xffff) need_mask = false;

2501 } else if (details->characters() == 1 && !compiler->ascii()) {	2483 } else if (details->characters() == 1 && !compiler->one_byte()) {

2502 if ((mask & 0xffff) == 0xffff) need_mask = false;	2484 if ((mask & 0xffff) == 0xffff) need_mask = false;

2503 } else {	2485 } else {

2504 if (mask == 0xffffffff) need_mask = false;	2486 if (mask == 0xffffffff) need_mask = false;

2505 }	2487 }

2506 }	2488 }

2507	2489

2508 if (fall_through_on_failure) {	2490 if (fall_through_on_failure) {

2509 if (need_mask) {	2491 if (need_mask) {

2510 assembler->CheckCharacterAfterAnd(value, mask, on_possible_success);	2492 assembler->CheckCharacterAfterAnd(value, mask, on_possible_success);

2511 } else {	2493 } else {

(...skipping 19 matching lines...) Expand all Loading...
2531 // machine word for the current character width in order to be used in	2513 // machine word for the current character width in order to be used in

2532 // generating a quick check.	2514 // generating a quick check.

2533 void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,	2515 void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,

2534 RegExpCompiler* compiler,	2516 RegExpCompiler* compiler,

2535 int characters_filled_in,	2517 int characters_filled_in,

2536 bool not_at_start) {	2518 bool not_at_start) {

2537 Isolate* isolate = compiler->macro_assembler()->zone()->isolate();	2519 Isolate* isolate = compiler->macro_assembler()->zone()->isolate();

2538 DCHECK(characters_filled_in < details->characters());	2520 DCHECK(characters_filled_in < details->characters());

2539 int characters = details->characters();	2521 int characters = details->characters();

2540 int char_mask;	2522 int char_mask;

2541 if (compiler->ascii()) {	2523 if (compiler->one_byte()) {

2542 char_mask = String::kMaxOneByteCharCode;	2524 char_mask = String::kMaxOneByteCharCode;

2543 } else {	2525 } else {

2544 char_mask = String::kMaxUtf16CodeUnit;	2526 char_mask = String::kMaxUtf16CodeUnit;

2545 }	2527 }

2546 for (int k = 0; k < elms_->length(); k++) {	2528 for (int k = 0; k < elms_->length(); k++) {

2547 TextElement elm = elms_->at(k);	2529 TextElement elm = elms_->at(k);

2548 if (elm.text_type() == TextElement::ATOM) {	2530 if (elm.text_type() == TextElement::ATOM) {

2549 Vector<const uc16> quarks = elm.atom()->data();	2531 Vector<const uc16> quarks = elm.atom()->data();

2550 for (int i = 0; i < characters && i < quarks.length(); i++) {	2532 for (int i = 0; i < characters && i < quarks.length(); i++) {

2551 QuickCheckDetails::Position* pos =	2533 QuickCheckDetails::Position* pos =

2552 details->positions(characters_filled_in);	2534 details->positions(characters_filled_in);

2553 uc16 c = quarks[i];	2535 uc16 c = quarks[i];

2554 if (c > char_mask) {	2536 if (c > char_mask) {

2555 // If we expect a non-ASCII character from an ASCII string,	2537 // If we expect a non-Latin1 character from an one-byte string,

2556 // there is no way we can match. Not even case independent	2538 // there is no way we can match. Not even case-independent

2557 // matching can turn an ASCII character into non-ASCII or	2539 // matching can turn an Latin1 character into non-Latin1 or

2558 // vice versa.	2540 // vice versa.

	2541 // TODO(dcarney): issue 3550. Verify that this works as expected.

	2542 // For example, \u0178 is uppercase of \u00ff (y-umlaut).
	Yang 2014/09/10 08:26:36 This is the other. This is the other.
2559 details->set_cannot_match();	2543 details->set_cannot_match();

2560 pos->determines_perfectly = false;	2544 pos->determines_perfectly = false;

2561 return;	2545 return;

2562 }	2546 }

2563 if (compiler->ignore_case()) {	2547 if (compiler->ignore_case()) {

2564 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];	2548 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];

2565 int length = GetCaseIndependentLetters(isolate, c, compiler->ascii(),	2549 int length = GetCaseIndependentLetters(isolate, c,

2566 chars);	2550 compiler->one_byte(), chars);

2567 DCHECK(length != 0); // Can only happen if c > char_mask (see above).	2551 DCHECK(length != 0); // Can only happen if c > char_mask (see above).

2568 if (length == 1) {	2552 if (length == 1) {

2569 // This letter has no case equivalents, so it's nice and simple	2553 // This letter has no case equivalents, so it's nice and simple

2570 // and the mask-compare will determine definitely whether we have	2554 // and the mask-compare will determine definitely whether we have

2571 // a match at this character position.	2555 // a match at this character position.

2572 pos->mask = char_mask;	2556 pos->mask = char_mask;

2573 pos->value = c;	2557 pos->value = c;

2574 pos->determines_perfectly = true;	2558 pos->determines_perfectly = true;

2575 } else {	2559 } else {

2576 uint32_t common_bits = char_mask;	2560 uint32_t common_bits = char_mask;

(...skipping 108 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2685 void QuickCheckDetails::Clear() {	2669 void QuickCheckDetails::Clear() {

2686 for (int i = 0; i < characters_; i++) {	2670 for (int i = 0; i < characters_; i++) {

2687 positions_[i].mask = 0;	2671 positions_[i].mask = 0;

2688 positions_[i].value = 0;	2672 positions_[i].value = 0;

2689 positions_[i].determines_perfectly = false;	2673 positions_[i].determines_perfectly = false;

2690 }	2674 }

2691 characters_ = 0;	2675 characters_ = 0;

2692 }	2676 }

2693	2677

2694	2678

2695 void QuickCheckDetails::Advance(int by, bool ascii) {	2679 void QuickCheckDetails::Advance(int by, bool one_byte) {

2696 DCHECK(by >= 0);	2680 DCHECK(by >= 0);

2697 if (by >= characters_) {	2681 if (by >= characters_) {

2698 Clear();	2682 Clear();

2699 return;	2683 return;

2700 }	2684 }

2701 for (int i = 0; i < characters_ - by; i++) {	2685 for (int i = 0; i < characters_ - by; i++) {

2702 positions_[i] = positions_[by + i];	2686 positions_[i] = positions_[by + i];

2703 }	2687 }

2704 for (int i = characters_ - by; i < characters_; i++) {	2688 for (int i = characters_ - by; i < characters_; i++) {

2705 positions_[i].mask = 0;	2689 positions_[i].mask = 0;

(...skipping 43 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2749 info->visited = true;	2733 info->visited = true;

2750 }	2734 }

2751 ~VisitMarker() {	2735 ~VisitMarker() {

2752 info_->visited = false;	2736 info_->visited = false;

2753 }	2737 }

2754 private:	2738 private:

2755 NodeInfo* info_;	2739 NodeInfo* info_;

2756 };	2740 };

2757	2741

2758	2742

2759 RegExpNode* SeqRegExpNode::FilterASCII(int depth, bool ignore_case) {	2743 RegExpNode* SeqRegExpNode::FilterOneByte(int depth, bool ignore_case) {

2760 if (info()->replacement_calculated) return replacement();	2744 if (info()->replacement_calculated) return replacement();

2761 if (depth < 0) return this;	2745 if (depth < 0) return this;

2762 DCHECK(!info()->visited);	2746 DCHECK(!info()->visited);

2763 VisitMarker marker(info());	2747 VisitMarker marker(info());

2764 return FilterSuccessor(depth - 1, ignore_case);	2748 return FilterSuccessor(depth - 1, ignore_case);

2765 }	2749 }

2766	2750

2767	2751

2768 RegExpNode* SeqRegExpNode::FilterSuccessor(int depth, bool ignore_case) {	2752 RegExpNode* SeqRegExpNode::FilterSuccessor(int depth, bool ignore_case) {

2769 RegExpNode* next = on_success_->FilterASCII(depth - 1, ignore_case);	2753 RegExpNode* next = on_success_->FilterOneByte(depth - 1, ignore_case);

2770 if (next == NULL) return set_replacement(NULL);	2754 if (next == NULL) return set_replacement(NULL);

2771 on_success_ = next;	2755 on_success_ = next;

2772 return set_replacement(this);	2756 return set_replacement(this);

2773 }	2757 }

2774	2758

2775	2759

2776 // We need to check for the following characters: 0x39c 0x3bc 0x178.	2760 // We need to check for the following characters: 0x39c 0x3bc 0x178.

2777 static inline bool RangeContainsLatin1Equivalents(CharacterRange range) {	2761 static inline bool RangeContainsLatin1Equivalents(CharacterRange range) {

2778 // TODO(dcarney): this could be a lot more efficient.	2762 // TODO(dcarney): this could be a lot more efficient.

2779 return range.Contains(0x39c) \|\|	2763 return range.Contains(0x39c) \|\|

2780 range.Contains(0x3bc) \|\| range.Contains(0x178);	2764 range.Contains(0x3bc) \|\| range.Contains(0x178);

2781 }	2765 }

2782	2766

2783	2767

2784 static bool RangesContainLatin1Equivalents(ZoneList<CharacterRange>* ranges) {	2768 static bool RangesContainLatin1Equivalents(ZoneList<CharacterRange>* ranges) {

2785 for (int i = 0; i < ranges->length(); i++) {	2769 for (int i = 0; i < ranges->length(); i++) {

2786 // TODO(dcarney): this could be a lot more efficient.	2770 // TODO(dcarney): this could be a lot more efficient.

2787 if (RangeContainsLatin1Equivalents(ranges->at(i))) return true;	2771 if (RangeContainsLatin1Equivalents(ranges->at(i))) return true;

2788 }	2772 }

2789 return false;	2773 return false;

2790 }	2774 }

2791	2775

2792	2776

2793 RegExpNode* TextNode::FilterASCII(int depth, bool ignore_case) {	2777 RegExpNode* TextNode::FilterOneByte(int depth, bool ignore_case) {

2794 if (info()->replacement_calculated) return replacement();	2778 if (info()->replacement_calculated) return replacement();

2795 if (depth < 0) return this;	2779 if (depth < 0) return this;

2796 DCHECK(!info()->visited);	2780 DCHECK(!info()->visited);

2797 VisitMarker marker(info());	2781 VisitMarker marker(info());

2798 int element_count = elms_->length();	2782 int element_count = elms_->length();

2799 for (int i = 0; i < element_count; i++) {	2783 for (int i = 0; i < element_count; i++) {

2800 TextElement elm = elms_->at(i);	2784 TextElement elm = elms_->at(i);

2801 if (elm.text_type() == TextElement::ATOM) {	2785 if (elm.text_type() == TextElement::ATOM) {

2802 Vector<const uc16> quarks = elm.atom()->data();	2786 Vector<const uc16> quarks = elm.atom()->data();

2803 for (int j = 0; j < quarks.length(); j++) {	2787 for (int j = 0; j < quarks.length(); j++) {

(...skipping 33 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2837 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue;	2821 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue;

2838 return set_replacement(NULL);	2822 return set_replacement(NULL);

2839 }	2823 }

2840 }	2824 }

2841 }	2825 }

2842 }	2826 }

2843 return FilterSuccessor(depth - 1, ignore_case);	2827 return FilterSuccessor(depth - 1, ignore_case);

2844 }	2828 }

2845	2829

2846	2830

2847 RegExpNode* LoopChoiceNode::FilterASCII(int depth, bool ignore_case) {	2831 RegExpNode* LoopChoiceNode::FilterOneByte(int depth, bool ignore_case) {

2848 if (info()->replacement_calculated) return replacement();	2832 if (info()->replacement_calculated) return replacement();

2849 if (depth < 0) return this;	2833 if (depth < 0) return this;

2850 if (info()->visited) return this;	2834 if (info()->visited) return this;

2851 {	2835 {

2852 VisitMarker marker(info());	2836 VisitMarker marker(info());

2853	2837

2854 RegExpNode* continue_replacement =	2838 RegExpNode* continue_replacement =

2855 continue_node_->FilterASCII(depth - 1, ignore_case);	2839 continue_node_->FilterOneByte(depth - 1, ignore_case);

2856 // If we can't continue after the loop then there is no sense in doing the	2840 // If we can't continue after the loop then there is no sense in doing the

2857 // loop.	2841 // loop.

2858 if (continue_replacement == NULL) return set_replacement(NULL);	2842 if (continue_replacement == NULL) return set_replacement(NULL);

2859 }	2843 }

2860	2844

2861 return ChoiceNode::FilterASCII(depth - 1, ignore_case);	2845 return ChoiceNode::FilterOneByte(depth - 1, ignore_case);

2862 }	2846 }

2863	2847

2864	2848

2865 RegExpNode* ChoiceNode::FilterASCII(int depth, bool ignore_case) {	2849 RegExpNode* ChoiceNode::FilterOneByte(int depth, bool ignore_case) {

2866 if (info()->replacement_calculated) return replacement();	2850 if (info()->replacement_calculated) return replacement();

2867 if (depth < 0) return this;	2851 if (depth < 0) return this;

2868 if (info()->visited) return this;	2852 if (info()->visited) return this;

2869 VisitMarker marker(info());	2853 VisitMarker marker(info());

2870 int choice_count = alternatives_->length();	2854 int choice_count = alternatives_->length();

2871	2855

2872 for (int i = 0; i < choice_count; i++) {	2856 for (int i = 0; i < choice_count; i++) {

2873 GuardedAlternative alternative = alternatives_->at(i);	2857 GuardedAlternative alternative = alternatives_->at(i);

2874 if (alternative.guards() != NULL && alternative.guards()->length() != 0) {	2858 if (alternative.guards() != NULL && alternative.guards()->length() != 0) {

2875 set_replacement(this);	2859 set_replacement(this);

2876 return this;	2860 return this;

2877 }	2861 }

2878 }	2862 }

2879	2863

2880 int surviving = 0;	2864 int surviving = 0;

2881 RegExpNode* survivor = NULL;	2865 RegExpNode* survivor = NULL;

2882 for (int i = 0; i < choice_count; i++) {	2866 for (int i = 0; i < choice_count; i++) {

2883 GuardedAlternative alternative = alternatives_->at(i);	2867 GuardedAlternative alternative = alternatives_->at(i);

2884 RegExpNode* replacement =	2868 RegExpNode* replacement =

2885 alternative.node()->FilterASCII(depth - 1, ignore_case);	2869 alternative.node()->FilterOneByte(depth - 1, ignore_case);

2886 DCHECK(replacement != this); // No missing EMPTY_MATCH_CHECK.	2870 DCHECK(replacement != this); // No missing EMPTY_MATCH_CHECK.

2887 if (replacement != NULL) {	2871 if (replacement != NULL) {

2888 alternatives_->at(i).set_node(replacement);	2872 alternatives_->at(i).set_node(replacement);

2889 surviving++;	2873 surviving++;

2890 survivor = replacement;	2874 survivor = replacement;

2891 }	2875 }

2892 }	2876 }

2893 if (surviving < 2) return set_replacement(survivor);	2877 if (surviving < 2) return set_replacement(survivor);

2894	2878

2895 set_replacement(this);	2879 set_replacement(this);

2896 if (surviving == choice_count) {	2880 if (surviving == choice_count) {

2897 return this;	2881 return this;

2898 }	2882 }

2899 // Only some of the nodes survived the filtering. We need to rebuild the	2883 // Only some of the nodes survived the filtering. We need to rebuild the

2900 // alternatives list.	2884 // alternatives list.

2901 ZoneList<GuardedAlternative>* new_alternatives =	2885 ZoneList<GuardedAlternative>* new_alternatives =

2902 new(zone()) ZoneList<GuardedAlternative>(surviving, zone());	2886 new(zone()) ZoneList<GuardedAlternative>(surviving, zone());

2903 for (int i = 0; i < choice_count; i++) {	2887 for (int i = 0; i < choice_count; i++) {

2904 RegExpNode* replacement =	2888 RegExpNode* replacement =

2905 alternatives_->at(i).node()->FilterASCII(depth - 1, ignore_case);	2889 alternatives_->at(i).node()->FilterOneByte(depth - 1, ignore_case);

2906 if (replacement != NULL) {	2890 if (replacement != NULL) {

2907 alternatives_->at(i).set_node(replacement);	2891 alternatives_->at(i).set_node(replacement);

2908 new_alternatives->Add(alternatives_->at(i), zone());	2892 new_alternatives->Add(alternatives_->at(i), zone());

2909 }	2893 }

2910 }	2894 }

2911 alternatives_ = new_alternatives;	2895 alternatives_ = new_alternatives;

2912 return this;	2896 return this;

2913 }	2897 }

2914	2898

2915	2899

2916 RegExpNode* NegativeLookaheadChoiceNode::FilterASCII(int depth,	2900 RegExpNode* NegativeLookaheadChoiceNode::FilterOneByte(int depth,

2917 bool ignore_case) {	2901 bool ignore_case) {

2918 if (info()->replacement_calculated) return replacement();	2902 if (info()->replacement_calculated) return replacement();

2919 if (depth < 0) return this;	2903 if (depth < 0) return this;

2920 if (info()->visited) return this;	2904 if (info()->visited) return this;

2921 VisitMarker marker(info());	2905 VisitMarker marker(info());

2922 // Alternative 0 is the negative lookahead, alternative 1 is what comes	2906 // Alternative 0 is the negative lookahead, alternative 1 is what comes

2923 // afterwards.	2907 // afterwards.

2924 RegExpNode* node = alternatives_->at(1).node();	2908 RegExpNode* node = alternatives_->at(1).node();

2925 RegExpNode* replacement = node->FilterASCII(depth - 1, ignore_case);	2909 RegExpNode* replacement = node->FilterOneByte(depth - 1, ignore_case);

2926 if (replacement == NULL) return set_replacement(NULL);	2910 if (replacement == NULL) return set_replacement(NULL);

2927 alternatives_->at(1).set_node(replacement);	2911 alternatives_->at(1).set_node(replacement);

2928	2912

2929 RegExpNode* neg_node = alternatives_->at(0).node();	2913 RegExpNode* neg_node = alternatives_->at(0).node();

2930 RegExpNode* neg_replacement = neg_node->FilterASCII(depth - 1, ignore_case);	2914 RegExpNode* neg_replacement = neg_node->FilterOneByte(depth - 1, ignore_case);

2931 // If the negative lookahead is always going to fail then	2915 // If the negative lookahead is always going to fail then

2932 // we don't need to check it.	2916 // we don't need to check it.

2933 if (neg_replacement == NULL) return set_replacement(replacement);	2917 if (neg_replacement == NULL) return set_replacement(replacement);

2934 alternatives_->at(0).set_node(neg_replacement);	2918 alternatives_->at(0).set_node(neg_replacement);

2935 return set_replacement(this);	2919 return set_replacement(this);

2936 }	2920 }

2937	2921

2938	2922

2939 void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,	2923 void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,

2940 RegExpCompiler* compiler,	2924 RegExpCompiler* compiler,

(...skipping 88 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3029 assembler->CheckAtStart(&ok);	3013 assembler->CheckAtStart(&ok);

3030 }	3014 }

3031 // We already checked that we are not at the start of input so it must be	3015 // We already checked that we are not at the start of input so it must be

3032 // OK to load the previous character.	3016 // OK to load the previous character.

3033 assembler->LoadCurrentCharacter(new_trace.cp_offset() -1,	3017 assembler->LoadCurrentCharacter(new_trace.cp_offset() -1,

3034 new_trace.backtrack(),	3018 new_trace.backtrack(),

3035 false);	3019 false);

3036 if (!assembler->CheckSpecialCharacterClass('n',	3020 if (!assembler->CheckSpecialCharacterClass('n',

3037 new_trace.backtrack())) {	3021 new_trace.backtrack())) {

3038 // Newline means \n, \r, 0x2028 or 0x2029.	3022 // Newline means \n, \r, 0x2028 or 0x2029.

3039 if (!compiler->ascii()) {	3023 if (!compiler->one_byte()) {

3040 assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok);	3024 assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok);

3041 }	3025 }

3042 assembler->CheckCharacter('\n', &ok);	3026 assembler->CheckCharacter('\n', &ok);

3043 assembler->CheckNotCharacter('\r', new_trace.backtrack());	3027 assembler->CheckNotCharacter('\r', new_trace.backtrack());

3044 }	3028 }

3045 assembler->Bind(&ok);	3029 assembler->Bind(&ok);

3046 on_success->Emit(compiler, &new_trace);	3030 on_success->Emit(compiler, &new_trace);

3047 }	3031 }

3048	3032

3049	3033

(...skipping 177 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3227 // check can have involved a mask and compare operation which may simplify	3211 // check can have involved a mask and compare operation which may simplify

3228 // or obviate the need for further checks at some character positions.	3212 // or obviate the need for further checks at some character positions.

3229 void TextNode::TextEmitPass(RegExpCompiler* compiler,	3213 void TextNode::TextEmitPass(RegExpCompiler* compiler,

3230 TextEmitPassType pass,	3214 TextEmitPassType pass,

3231 bool preloaded,	3215 bool preloaded,

3232 Trace* trace,	3216 Trace* trace,

3233 bool first_element_checked,	3217 bool first_element_checked,

3234 int* checked_up_to) {	3218 int* checked_up_to) {

3235 RegExpMacroAssembler* assembler = compiler->macro_assembler();	3219 RegExpMacroAssembler* assembler = compiler->macro_assembler();

3236 Isolate* isolate = assembler->zone()->isolate();	3220 Isolate* isolate = assembler->zone()->isolate();

3237 bool ascii = compiler->ascii();	3221 bool one_byte = compiler->one_byte();

3238 Label* backtrack = trace->backtrack();	3222 Label* backtrack = trace->backtrack();

3239 QuickCheckDetails* quick_check = trace->quick_check_performed();	3223 QuickCheckDetails* quick_check = trace->quick_check_performed();

3240 int element_count = elms_->length();	3224 int element_count = elms_->length();

3241 for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) {	3225 for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) {

3242 TextElement elm = elms_->at(i);	3226 TextElement elm = elms_->at(i);

3243 int cp_offset = trace->cp_offset() + elm.cp_offset();	3227 int cp_offset = trace->cp_offset() + elm.cp_offset();

3244 if (elm.text_type() == TextElement::ATOM) {	3228 if (elm.text_type() == TextElement::ATOM) {

3245 Vector<const uc16> quarks = elm.atom()->data();	3229 Vector<const uc16> quarks = elm.atom()->data();

3246 for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) {	3230 for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) {

3247 if (first_element_checked && i == 0 && j == 0) continue;	3231 if (first_element_checked && i == 0 && j == 0) continue;

3248 if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue;	3232 if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue;

3249 EmitCharacterFunction* emit_function = NULL;	3233 EmitCharacterFunction* emit_function = NULL;

3250 switch (pass) {	3234 switch (pass) {

3251 case NON_ASCII_MATCH:	3235 case NON_LATIN1_MATCH:

3252 DCHECK(ascii);	3236 DCHECK(one_byte);

3253 if (quarks[j] > String::kMaxOneByteCharCode) {	3237 if (quarks[j] > String::kMaxOneByteCharCode) {

3254 assembler->GoTo(backtrack);	3238 assembler->GoTo(backtrack);

3255 return;	3239 return;

3256 }	3240 }

3257 break;	3241 break;

3258 case NON_LETTER_CHARACTER_MATCH:	3242 case NON_LETTER_CHARACTER_MATCH:

3259 emit_function = &EmitAtomNonLetter;	3243 emit_function = &EmitAtomNonLetter;

3260 break;	3244 break;

3261 case SIMPLE_CHARACTER_MATCH:	3245 case SIMPLE_CHARACTER_MATCH:

3262 emit_function = &EmitSimpleCharacter;	3246 emit_function = &EmitSimpleCharacter;

(...skipping 14 matching lines...) Expand all Loading...
3277 preloaded);	3261 preloaded);

3278 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to);	3262 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to);

3279 }	3263 }

3280 }	3264 }

3281 } else {	3265 } else {

3282 DCHECK_EQ(TextElement::CHAR_CLASS, elm.text_type());	3266 DCHECK_EQ(TextElement::CHAR_CLASS, elm.text_type());

3283 if (pass == CHARACTER_CLASS_MATCH) {	3267 if (pass == CHARACTER_CLASS_MATCH) {

3284 if (first_element_checked && i == 0) continue;	3268 if (first_element_checked && i == 0) continue;

3285 if (DeterminedAlready(quick_check, elm.cp_offset())) continue;	3269 if (DeterminedAlready(quick_check, elm.cp_offset())) continue;

3286 RegExpCharacterClass* cc = elm.char_class();	3270 RegExpCharacterClass* cc = elm.char_class();

3287 EmitCharClass(assembler,	3271 EmitCharClass(assembler, cc, one_byte, backtrack, cp_offset,

3288 cc,	3272 *checked_up_to < cp_offset, preloaded, zone());

3289 ascii,

3290 backtrack,

3291 cp_offset,

3292 *checked_up_to < cp_offset,

3293 preloaded,

3294 zone());

3295 UpdateBoundsCheck(cp_offset, checked_up_to);	3273 UpdateBoundsCheck(cp_offset, checked_up_to);

3296 }	3274 }

3297 }	3275 }

3298 }	3276 }

3299 }	3277 }

3300	3278

3301	3279

3302 int TextNode::Length() {	3280 int TextNode::Length() {

3303 TextElement elm = elms_->last();	3281 TextElement elm = elms_->last();

3304 DCHECK(elm.cp_offset() >= 0);	3282 DCHECK(elm.cp_offset() >= 0);

(...skipping 20 matching lines...) Expand all Loading...
3325 void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) {	3303 void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) {

3326 LimitResult limit_result = LimitVersions(compiler, trace);	3304 LimitResult limit_result = LimitVersions(compiler, trace);

3327 if (limit_result == DONE) return;	3305 if (limit_result == DONE) return;

3328 DCHECK(limit_result == CONTINUE);	3306 DCHECK(limit_result == CONTINUE);

3329	3307

3330 if (trace->cp_offset() + Length() > RegExpMacroAssembler::kMaxCPOffset) {	3308 if (trace->cp_offset() + Length() > RegExpMacroAssembler::kMaxCPOffset) {

3331 compiler->SetRegExpTooBig();	3309 compiler->SetRegExpTooBig();

3332 return;	3310 return;

3333 }	3311 }

3334	3312

3335 if (compiler->ascii()) {	3313 if (compiler->one_byte()) {

3336 int dummy = 0;	3314 int dummy = 0;

3337 TextEmitPass(compiler, NON_ASCII_MATCH, false, trace, false, &dummy);	3315 TextEmitPass(compiler, NON_LATIN1_MATCH, false, trace, false, &dummy);

3338 }	3316 }

3339	3317

3340 bool first_elt_done = false;	3318 bool first_elt_done = false;

3341 int bound_checked_to = trace->cp_offset() - 1;	3319 int bound_checked_to = trace->cp_offset() - 1;

3342 bound_checked_to += trace->bound_checked_up_to();	3320 bound_checked_to += trace->bound_checked_up_to();

3343	3321

3344 // If a character is preloaded into the current character register then	3322 // If a character is preloaded into the current character register then

3345 // check that now.	3323 // check that now.

3346 if (trace->characters_preloaded() == 1) {	3324 if (trace->characters_preloaded() == 1) {

3347 for (int pass = kFirstRealPass; pass <= kLastPass; pass++) {	3325 for (int pass = kFirstRealPass; pass <= kLastPass; pass++) {

(...skipping 35 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3383	3361

3384 void Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler) {	3362 void Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler) {

3385 DCHECK(by > 0);	3363 DCHECK(by > 0);

3386 // We don't have an instruction for shifting the current character register	3364 // We don't have an instruction for shifting the current character register

3387 // down or for using a shifted value for anything so lets just forget that	3365 // down or for using a shifted value for anything so lets just forget that

3388 // we preloaded any characters into it.	3366 // we preloaded any characters into it.

3389 characters_preloaded_ = 0;	3367 characters_preloaded_ = 0;

3390 // Adjust the offsets of the quick check performed information. This	3368 // Adjust the offsets of the quick check performed information. This

3391 // information is used to find out what we already determined about the	3369 // information is used to find out what we already determined about the

3392 // characters by means of mask and compare.	3370 // characters by means of mask and compare.

3393 quick_check_performed_.Advance(by, compiler->ascii());	3371 quick_check_performed_.Advance(by, compiler->one_byte());

3394 cp_offset_ += by;	3372 cp_offset_ += by;

3395 if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) {	3373 if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) {

3396 compiler->SetRegExpTooBig();	3374 compiler->SetRegExpTooBig();

3397 cp_offset_ = 0;	3375 cp_offset_ = 0;

3398 }	3376 }

3399 bound_checked_up_to_ = Max(0, bound_checked_up_to_ - by);	3377 bound_checked_up_to_ = Max(0, bound_checked_up_to_ - by);

3400 }	3378 }

3401	3379

3402	3380

3403 void TextNode::MakeCaseIndependent(bool is_ascii) {	3381 void TextNode::MakeCaseIndependent(bool is_one_byte) {

3404 int element_count = elms_->length();	3382 int element_count = elms_->length();

3405 for (int i = 0; i < element_count; i++) {	3383 for (int i = 0; i < element_count; i++) {

3406 TextElement elm = elms_->at(i);	3384 TextElement elm = elms_->at(i);

3407 if (elm.text_type() == TextElement::CHAR_CLASS) {	3385 if (elm.text_type() == TextElement::CHAR_CLASS) {

3408 RegExpCharacterClass* cc = elm.char_class();	3386 RegExpCharacterClass* cc = elm.char_class();

3409 // None of the standard character classes is different in the case	3387 // None of the standard character classes is different in the case

3410 // independent case and it slows us down if we don't know that.	3388 // independent case and it slows us down if we don't know that.

3411 if (cc->is_standard(zone())) continue;	3389 if (cc->is_standard(zone())) continue;

3412 ZoneList<CharacterRange>* ranges = cc->ranges(zone());	3390 ZoneList<CharacterRange>* ranges = cc->ranges(zone());

3413 int range_count = ranges->length();	3391 int range_count = ranges->length();

3414 for (int j = 0; j < range_count; j++) {	3392 for (int j = 0; j < range_count; j++) {

3415 ranges->at(j).AddCaseEquivalents(ranges, is_ascii, zone());	3393 ranges->at(j).AddCaseEquivalents(ranges, is_one_byte, zone());

3416 }	3394 }

3417 }	3395 }

3418 }	3396 }

3419 }	3397 }

3420	3398

3421	3399

3422 int TextNode::GreedyLoopTextLength() {	3400 int TextNode::GreedyLoopTextLength() {

3423 TextElement elm = elms_->at(elms_->length() - 1);	3401 TextElement elm = elms_->at(elms_->length() - 1);

3424 return elm.cp_offset() + elm.length();	3402 return elm.cp_offset() + elm.length();

3425 }	3403 }

3426	3404

3427	3405

3428 RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode(	3406 RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode(

3429 RegExpCompiler* compiler) {	3407 RegExpCompiler* compiler) {

3430 if (elms_->length() != 1) return NULL;	3408 if (elms_->length() != 1) return NULL;

3431 TextElement elm = elms_->at(0);	3409 TextElement elm = elms_->at(0);

3432 if (elm.text_type() != TextElement::CHAR_CLASS) return NULL;	3410 if (elm.text_type() != TextElement::CHAR_CLASS) return NULL;

3433 RegExpCharacterClass* node = elm.char_class();	3411 RegExpCharacterClass* node = elm.char_class();

3434 ZoneList<CharacterRange>* ranges = node->ranges(zone());	3412 ZoneList<CharacterRange>* ranges = node->ranges(zone());

3435 if (!CharacterRange::IsCanonical(ranges)) {	3413 if (!CharacterRange::IsCanonical(ranges)) {

3436 CharacterRange::Canonicalize(ranges);	3414 CharacterRange::Canonicalize(ranges);

3437 }	3415 }

3438 if (node->is_negated()) {	3416 if (node->is_negated()) {

3439 return ranges->length() == 0 ? on_success() : NULL;	3417 return ranges->length() == 0 ? on_success() : NULL;

3440 }	3418 }

3441 if (ranges->length() != 1) return NULL;	3419 if (ranges->length() != 1) return NULL;

3442 uint32_t max_char;	3420 uint32_t max_char;

3443 if (compiler->ascii()) {	3421 if (compiler->one_byte()) {

3444 max_char = String::kMaxOneByteCharCode;	3422 max_char = String::kMaxOneByteCharCode;

3445 } else {	3423 } else {

3446 max_char = String::kMaxUtf16CodeUnit;	3424 max_char = String::kMaxUtf16CodeUnit;

3447 }	3425 }

3448 return ranges->at(0).IsEverything(max_char) ? on_success() : NULL;	3426 return ranges->at(0).IsEverything(max_char) ? on_success() : NULL;

3449 }	3427 }

3450	3428

3451	3429

3452 // Finds the fixed match length of a sequence of nodes that goes from	3430 // Finds the fixed match length of a sequence of nodes that goes from

3453 // this alternative and back to this choice node. If there are variable	3431 // this alternative and back to this choice node. If there are variable

(...skipping 56 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3510 return;	3488 return;

3511 }	3489 }

3512 ChoiceNode::Emit(compiler, trace);	3490 ChoiceNode::Emit(compiler, trace);

3513 }	3491 }

3514	3492

3515	3493

3516 int ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler,	3494 int ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler,

3517 int eats_at_least) {	3495 int eats_at_least) {

3518 int preload_characters = Min(4, eats_at_least);	3496 int preload_characters = Min(4, eats_at_least);

3519 if (compiler->macro_assembler()->CanReadUnaligned()) {	3497 if (compiler->macro_assembler()->CanReadUnaligned()) {

3520 bool ascii = compiler->ascii();	3498 bool one_byte = compiler->one_byte();

3521 if (ascii) {	3499 if (one_byte) {

3522 if (preload_characters > 4) preload_characters = 4;	3500 if (preload_characters > 4) preload_characters = 4;

3523 // We can't preload 3 characters because there is no machine instruction	3501 // We can't preload 3 characters because there is no machine instruction

3524 // to do that. We can't just load 4 because we could be reading	3502 // to do that. We can't just load 4 because we could be reading

3525 // beyond the end of the string, which could cause a memory fault.	3503 // beyond the end of the string, which could cause a memory fault.

3526 if (preload_characters == 3) preload_characters = 2;	3504 if (preload_characters == 3) preload_characters = 2;

3527 } else {	3505 } else {

3528 if (preload_characters > 2) preload_characters = 2;	3506 if (preload_characters > 2) preload_characters = 2;

3529 }	3507 }

3530 } else {	3508 } else {

3531 if (preload_characters > 1) preload_characters = 1;	3509 if (preload_characters > 1) preload_characters = 1;

(...skipping 105 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3637 map_count_ = kMapSize;	3615 map_count_ = kMapSize;

3638 for (int i = 0; i < kMapSize; i++) map_->at(i) = true;	3616 for (int i = 0; i < kMapSize; i++) map_->at(i) = true;

3639 }	3617 }

3640 }	3618 }

3641	3619

3642	3620

3643 BoyerMooreLookahead::BoyerMooreLookahead(	3621 BoyerMooreLookahead::BoyerMooreLookahead(

3644 int length, RegExpCompiler* compiler, Zone* zone)	3622 int length, RegExpCompiler* compiler, Zone* zone)

3645 : length_(length),	3623 : length_(length),

3646 compiler_(compiler) {	3624 compiler_(compiler) {

3647 if (compiler->ascii()) {	3625 if (compiler->one_byte()) {

3648 max_char_ = String::kMaxOneByteCharCode;	3626 max_char_ = String::kMaxOneByteCharCode;

3649 } else {	3627 } else {

3650 max_char_ = String::kMaxUtf16CodeUnit;	3628 max_char_ = String::kMaxUtf16CodeUnit;

3651 }	3629 }

3652 bitmaps_ = new(zone) ZoneList<BoyerMoorePositionInfo*>(length, zone);	3630 bitmaps_ = new(zone) ZoneList<BoyerMoorePositionInfo*>(length, zone);

3653 for (int i = 0; i < length; i++) {	3631 for (int i = 0; i < length; i++) {

3654 bitmaps_->Add(new(zone) BoyerMoorePositionInfo(zone), zone);	3632 bitmaps_->Add(new(zone) BoyerMoorePositionInfo(zone), zone);

3655 }	3633 }

3656 }	3634 }

3657	3635

(...skipping 47 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3705 // can theoretically be up to 2*kSize though we treat it mostly as	3683 // can theoretically be up to 2*kSize though we treat it mostly as

3706 // a fraction of kSize.	3684 // a fraction of kSize.

3707 frequency += compiler_->frequency_collator()->Frequency(j) + 1;	3685 frequency += compiler_->frequency_collator()->Frequency(j) + 1;

3708 }	3686 }

3709 }	3687 }

3710 // We use the probability of skipping times the distance we are skipping to	3688 // We use the probability of skipping times the distance we are skipping to

3711 // judge the effectiveness of this. Actually we have a cut-off: By	3689 // judge the effectiveness of this. Actually we have a cut-off: By

3712 // dividing by 2 we switch off the skipping if the probability of skipping	3690 // dividing by 2 we switch off the skipping if the probability of skipping

3713 // is less than 50%. This is because the multibyte mask-and-compare	3691 // is less than 50%. This is because the multibyte mask-and-compare

3714 // skipping in quickcheck is more likely to do well on this case.	3692 // skipping in quickcheck is more likely to do well on this case.

3715 bool in_quickcheck_range = ((i - remembered_from < 4) \|\|	3693 bool in_quickcheck_range =

3716 (compiler_->ascii() ? remembered_from <= 4 : remembered_from <= 2));	3694 ((i - remembered_from < 4) \|\|

	3695 (compiler_->one_byte() ? remembered_from <= 4 : remembered_from <= 2));

3717 // Called 'probability' but it is only a rough estimate and can actually	3696 // Called 'probability' but it is only a rough estimate and can actually

3718 // be outside the 0-kSize range.	3697 // be outside the 0-kSize range.

3719 int probability = (in_quickcheck_range ? kSize / 2 : kSize) - frequency;	3698 int probability = (in_quickcheck_range ? kSize / 2 : kSize) - frequency;

3720 int points = (i - remembered_from) * probability;	3699 int points = (i - remembered_from) * probability;

3721 if (points > biggest_points) {	3700 if (points > biggest_points) {

3722 *from = remembered_from;	3701 *from = remembered_from;

3723 *to = i - 1;	3702 *to = i - 1;

3724 biggest_points = points;	3703 biggest_points = points;

3725 }	3704 }

3726 }	3705 }

(...skipping 197 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3924 #endif	3903 #endif

3925 }	3904 }

3926	3905

3927	3906

3928 void ChoiceNode::SetUpPreLoad(RegExpCompiler* compiler,	3907 void ChoiceNode::SetUpPreLoad(RegExpCompiler* compiler,

3929 Trace* current_trace,	3908 Trace* current_trace,

3930 PreloadState* state) {	3909 PreloadState* state) {

3931 if (state->eats_at_least_ == PreloadState::kEatsAtLeastNotYetInitialized) {	3910 if (state->eats_at_least_ == PreloadState::kEatsAtLeastNotYetInitialized) {

3932 // Save some time by looking at most one machine word ahead.	3911 // Save some time by looking at most one machine word ahead.

3933 state->eats_at_least_ =	3912 state->eats_at_least_ =

3934 EatsAtLeast(compiler->ascii() ? 4 : 2,	3913 EatsAtLeast(compiler->one_byte() ? 4 : 2, kRecursionBudget,

3935 kRecursionBudget,

3936 current_trace->at_start() == Trace::FALSE_VALUE);	3914 current_trace->at_start() == Trace::FALSE_VALUE);

3937 }	3915 }

3938 state->preload_characters_ =	3916 state->preload_characters_ =

3939 CalculatePreloadCharacters(compiler, state->eats_at_least_);	3917 CalculatePreloadCharacters(compiler, state->eats_at_least_);

3940	3918

3941 state->preload_is_current_ =	3919 state->preload_is_current_ =

3942 (current_trace->characters_preloaded() == state->preload_characters_);	3920 (current_trace->characters_preloaded() == state->preload_characters_);

3943 state->preload_has_checked_bounds_ = state->preload_is_current_;	3921 state->preload_has_checked_bounds_ = state->preload_is_current_;

3944 }	3922 }

3945	3923

(...skipping 1394 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5340 for (int i = 0; i < overlay.length(); i += 2) {	5318 for (int i = 0; i < overlay.length(); i += 2) {

5341 table.AddRange(CharacterRange(overlay[i], overlay[i + 1] - 1),	5319 table.AddRange(CharacterRange(overlay[i], overlay[i + 1] - 1),

5342 CharacterRangeSplitter::kInOverlay, zone);	5320 CharacterRangeSplitter::kInOverlay, zone);

5343 }	5321 }

5344 CharacterRangeSplitter callback(included, excluded, zone);	5322 CharacterRangeSplitter callback(included, excluded, zone);

5345 table.ForEach(&callback);	5323 table.ForEach(&callback);

5346 }	5324 }

5347	5325

5348	5326

5349 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges,	5327 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges,

5350 bool is_ascii,	5328 bool is_one_byte, Zone* zone) {

5351 Zone* zone) {

5352 Isolate* isolate = zone->isolate();	5329 Isolate* isolate = zone->isolate();

5353 uc16 bottom = from();	5330 uc16 bottom = from();

5354 uc16 top = to();	5331 uc16 top = to();

5355 if (is_ascii && !RangeContainsLatin1Equivalents(*this)) {	5332 if (is_one_byte && !RangeContainsLatin1Equivalents(*this)) {

5356 if (bottom > String::kMaxOneByteCharCode) return;	5333 if (bottom > String::kMaxOneByteCharCode) return;

5357 if (top > String::kMaxOneByteCharCode) top = String::kMaxOneByteCharCode;	5334 if (top > String::kMaxOneByteCharCode) top = String::kMaxOneByteCharCode;

5358 }	5335 }

5359 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];	5336 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];

5360 if (top == bottom) {	5337 if (top == bottom) {

5361 // If this is a singleton we just expand the one character.	5338 // If this is a singleton we just expand the one character.

5362 int length = isolate->jsregexp_uncanonicalize()->get(bottom, '\0', chars);	5339 int length = isolate->jsregexp_uncanonicalize()->get(bottom, '\0', chars);

5363 for (int i = 0; i < length; i++) {	5340 for (int i = 0; i < length; i++) {

5364 uc32 chr = chars[i];	5341 uc32 chr = chars[i];

5365 if (chr != bottom) {	5342 if (chr != bottom) {

(...skipping 398 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5764 for (int i = 0; i < element_count; i++) {	5741 for (int i = 0; i < element_count; i++) {

5765 TextElement& elm = elements()->at(i);	5742 TextElement& elm = elements()->at(i);

5766 elm.set_cp_offset(cp_offset);	5743 elm.set_cp_offset(cp_offset);

5767 cp_offset += elm.length();	5744 cp_offset += elm.length();

5768 }	5745 }

5769 }	5746 }

5770	5747

5771	5748

5772 void Analysis::VisitText(TextNode* that) {	5749 void Analysis::VisitText(TextNode* that) {

5773 if (ignore_case_) {	5750 if (ignore_case_) {

5774 that->MakeCaseIndependent(is_ascii_);	5751 that->MakeCaseIndependent(is_one_byte_);

5775 }	5752 }

5776 EnsureAnalyzed(that->on_success());	5753 EnsureAnalyzed(that->on_success());

5777 if (!has_failed()) {	5754 if (!has_failed()) {

5778 that->CalculateOffsets();	5755 that->CalculateOffsets();

5779 }	5756 }

5780 }	5757 }

5781	5758

5782	5759

5783 void Analysis::VisitAction(ActionNode* that) {	5760 void Analysis::VisitAction(ActionNode* that) {

5784 RegExpNode* target = that->on_success();	5761 RegExpNode* target = that->on_success();

(...skipping 255 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
6040 }	6017 }

6041	6018

6042	6019

6043 void DispatchTableConstructor::VisitAction(ActionNode* that) {	6020 void DispatchTableConstructor::VisitAction(ActionNode* that) {

6044 RegExpNode* target = that->on_success();	6021 RegExpNode* target = that->on_success();

6045 target->Accept(this);	6022 target->Accept(this);

6046 }	6023 }

6047	6024

6048	6025

6049 RegExpEngine::CompilationResult RegExpEngine::Compile(	6026 RegExpEngine::CompilationResult RegExpEngine::Compile(

6050 RegExpCompileData* data,	6027 RegExpCompileData* data, bool ignore_case, bool is_global,

6051 bool ignore_case,	6028 bool is_multiline, Handle<String> pattern, Handle<String> sample_subject,

6052 bool is_global,	6029 bool is_one_byte, Zone* zone) {

6053 bool is_multiline,

6054 Handle<String> pattern,

6055 Handle<String> sample_subject,

6056 bool is_ascii,

6057 Zone* zone) {

6058 if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) {	6030 if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) {

6059 return IrregexpRegExpTooBig(zone->isolate());	6031 return IrregexpRegExpTooBig(zone->isolate());

6060 }	6032 }

6061 RegExpCompiler compiler(data->capture_count, ignore_case, is_ascii, zone);	6033 RegExpCompiler compiler(data->capture_count, ignore_case, is_one_byte, zone);

6062	6034

6063 // Sample some characters from the middle of the string.	6035 // Sample some characters from the middle of the string.

6064 static const int kSampleSize = 128;	6036 static const int kSampleSize = 128;

6065	6037

6066 sample_subject = String::Flatten(sample_subject);	6038 sample_subject = String::Flatten(sample_subject);

6067 int chars_sampled = 0;	6039 int chars_sampled = 0;

6068 int half_way = (sample_subject->length() - kSampleSize) / 2;	6040 int half_way = (sample_subject->length() - kSampleSize) / 2;

6069 for (int i = Max(0, half_way);	6041 for (int i = Max(0, half_way);

6070 i < sample_subject->length() && chars_sampled < kSampleSize;	6042 i < sample_subject->length() && chars_sampled < kSampleSize;

6071 i++, chars_sampled++) {	6043 i++, chars_sampled++) {

(...skipping 26 matching lines...) Expand all Loading...
6098 // at the start of input.	6070 // at the start of input.

6099 ChoiceNode* first_step_node = new(zone) ChoiceNode(2, zone);	6071 ChoiceNode* first_step_node = new(zone) ChoiceNode(2, zone);

6100 first_step_node->AddAlternative(GuardedAlternative(captured_body));	6072 first_step_node->AddAlternative(GuardedAlternative(captured_body));

6101 first_step_node->AddAlternative(GuardedAlternative(	6073 first_step_node->AddAlternative(GuardedAlternative(

6102 new(zone) TextNode(new(zone) RegExpCharacterClass('*'), loop_node)));	6074 new(zone) TextNode(new(zone) RegExpCharacterClass('*'), loop_node)));

6103 node = first_step_node;	6075 node = first_step_node;

6104 } else {	6076 } else {

6105 node = loop_node;	6077 node = loop_node;

6106 }	6078 }

6107 }	6079 }

6108 if (is_ascii) {	6080 if (is_one_byte) {

6109 node = node->FilterASCII(RegExpCompiler::kMaxRecursion, ignore_case);	6081 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case);

6110 // Do it again to propagate the new nodes to places where they were not	6082 // Do it again to propagate the new nodes to places where they were not

6111 // put because they had not been calculated yet.	6083 // put because they had not been calculated yet.

6112 if (node != NULL) {	6084 if (node != NULL) {

6113 node = node->FilterASCII(RegExpCompiler::kMaxRecursion, ignore_case);	6085 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case);

6114 }	6086 }

6115 }	6087 }

6116	6088

6117 if (node == NULL) node = new(zone) EndNode(EndNode::BACKTRACK, zone);	6089 if (node == NULL) node = new(zone) EndNode(EndNode::BACKTRACK, zone);

6118 data->node = node;	6090 data->node = node;

6119 Analysis analysis(ignore_case, is_ascii);	6091 Analysis analysis(ignore_case, is_one_byte);

6120 analysis.EnsureAnalyzed(node);	6092 analysis.EnsureAnalyzed(node);

6121 if (analysis.has_failed()) {	6093 if (analysis.has_failed()) {

6122 const char* error_message = analysis.error_message();	6094 const char* error_message = analysis.error_message();

6123 return CompilationResult(zone->isolate(), error_message);	6095 return CompilationResult(zone->isolate(), error_message);

6124 }	6096 }

6125	6097

6126 // Create the correct assembler for the architecture.	6098 // Create the correct assembler for the architecture.

6127 #ifndef V8_INTERPRETED_REGEXP	6099 #ifndef V8_INTERPRETED_REGEXP

6128 // Native regexp implementation.	6100 // Native regexp implementation.

6129	6101

6130 NativeRegExpMacroAssembler::Mode mode =	6102 NativeRegExpMacroAssembler::Mode mode =

6131 is_ascii ? NativeRegExpMacroAssembler::ASCII	6103 is_one_byte ? NativeRegExpMacroAssembler::LATIN1

6132 : NativeRegExpMacroAssembler::UC16;	6104 : NativeRegExpMacroAssembler::UC16;

6133	6105

6134 #if V8_TARGET_ARCH_IA32	6106 #if V8_TARGET_ARCH_IA32

6135 RegExpMacroAssemblerIA32 macro_assembler(mode, (data->capture_count + 1) * 2,	6107 RegExpMacroAssemblerIA32 macro_assembler(mode, (data->capture_count + 1) * 2,

6136 zone);	6108 zone);

6137 #elif V8_TARGET_ARCH_X64	6109 #elif V8_TARGET_ARCH_X64

6138 RegExpMacroAssemblerX64 macro_assembler(mode, (data->capture_count + 1) * 2,	6110 RegExpMacroAssemblerX64 macro_assembler(mode, (data->capture_count + 1) * 2,

6139 zone);	6111 zone);

6140 #elif V8_TARGET_ARCH_ARM	6112 #elif V8_TARGET_ARCH_ARM

6141 RegExpMacroAssemblerARM macro_assembler(mode, (data->capture_count + 1) * 2,	6113 RegExpMacroAssemblerARM macro_assembler(mode, (data->capture_count + 1) * 2,

6142 zone);	6114 zone);

(...skipping 36 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
6179 }	6151 }

6180	6152

6181 return compiler.Assemble(&macro_assembler,	6153 return compiler.Assemble(&macro_assembler,

6182 node,	6154 node,

6183 data->capture_count,	6155 data->capture_count,

6184 pattern);	6156 pattern);

6185 }	6157 }

6186	6158

6187	6159

6188 }} // namespace v8::internal	6160 }} // namespace v8::internal

OLD	NEW

« no previous file with comments | « src/jsregexp.h ('k') | src/liveedit.cc » ('j') | no next file with comments »