Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(39)

Side by Side Diff: src/jsregexp.cc

Issue 559913002: Rename ascii to one-byte where applicable. (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: Created 6 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/jsregexp.h ('k') | src/liveedit.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/v8.h" 5 #include "src/v8.h"
6 6
7 #include "src/ast.h" 7 #include "src/ast.h"
8 #include "src/base/platform/platform.h" 8 #include "src/base/platform/platform.h"
9 #include "src/compilation-cache.h" 9 #include "src/compilation-cache.h"
10 #include "src/compiler.h" 10 #include "src/compiler.h"
(...skipping 272 matching lines...) Expand 10 before | Expand all | Expand 10 after
283 if (index + needle_len > subject->length()) { 283 if (index + needle_len > subject->length()) {
284 return RegExpImpl::RE_FAILURE; 284 return RegExpImpl::RE_FAILURE;
285 } 285 }
286 286
287 for (int i = 0; i < output_size; i += 2) { 287 for (int i = 0; i < output_size; i += 2) {
288 String::FlatContent needle_content = needle->GetFlatContent(); 288 String::FlatContent needle_content = needle->GetFlatContent();
289 String::FlatContent subject_content = subject->GetFlatContent(); 289 String::FlatContent subject_content = subject->GetFlatContent();
290 DCHECK(needle_content.IsFlat()); 290 DCHECK(needle_content.IsFlat());
291 DCHECK(subject_content.IsFlat()); 291 DCHECK(subject_content.IsFlat());
292 // dispatch on type of strings 292 // dispatch on type of strings
293 index = (needle_content.IsAscii() 293 index =
294 ? (subject_content.IsAscii() 294 (needle_content.IsOneByte()
295 ? SearchString(isolate, 295 ? (subject_content.IsOneByte()
296 subject_content.ToOneByteVector(), 296 ? SearchString(isolate, subject_content.ToOneByteVector(),
297 needle_content.ToOneByteVector(), 297 needle_content.ToOneByteVector(), index)
298 index) 298 : SearchString(isolate, subject_content.ToUC16Vector(),
299 : SearchString(isolate, 299 needle_content.ToOneByteVector(), index))
300 subject_content.ToUC16Vector(), 300 : (subject_content.IsOneByte()
301 needle_content.ToOneByteVector(), 301 ? SearchString(isolate, subject_content.ToOneByteVector(),
302 index)) 302 needle_content.ToUC16Vector(), index)
303 : (subject_content.IsAscii() 303 : SearchString(isolate, subject_content.ToUC16Vector(),
304 ? SearchString(isolate, 304 needle_content.ToUC16Vector(), index)));
305 subject_content.ToOneByteVector(),
306 needle_content.ToUC16Vector(),
307 index)
308 : SearchString(isolate,
309 subject_content.ToUC16Vector(),
310 needle_content.ToUC16Vector(),
311 index)));
312 if (index == -1) { 305 if (index == -1) {
313 return i / 2; // Return number of matches. 306 return i / 2; // Return number of matches.
314 } else { 307 } else {
315 output[i] = index; 308 output[i] = index;
316 output[i+1] = index + needle_len; 309 output[i+1] = index + needle_len;
317 index += needle_len; 310 index += needle_len;
318 } 311 }
319 } 312 }
320 return output_size / 2; 313 return output_size / 2;
321 } 314 }
(...skipping 17 matching lines...) Expand all
339 SealHandleScope shs(isolate); 332 SealHandleScope shs(isolate);
340 FixedArray* array = FixedArray::cast(last_match_info->elements()); 333 FixedArray* array = FixedArray::cast(last_match_info->elements());
341 SetAtomLastCapture(array, *subject, output_registers[0], output_registers[1]); 334 SetAtomLastCapture(array, *subject, output_registers[0], output_registers[1]);
342 return last_match_info; 335 return last_match_info;
343 } 336 }
344 337
345 338
346 // Irregexp implementation. 339 // Irregexp implementation.
347 340
348 // Ensures that the regexp object contains a compiled version of the 341 // Ensures that the regexp object contains a compiled version of the
349 // source for either ASCII or non-ASCII strings. 342 // source for either one-byte or two-byte subject strings.
350 // If the compiled version doesn't already exist, it is compiled 343 // If the compiled version doesn't already exist, it is compiled
351 // from the source pattern. 344 // from the source pattern.
352 // If compilation fails, an exception is thrown and this function 345 // If compilation fails, an exception is thrown and this function
353 // returns false. 346 // returns false.
354 bool RegExpImpl::EnsureCompiledIrregexp( 347 bool RegExpImpl::EnsureCompiledIrregexp(Handle<JSRegExp> re,
355 Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii) { 348 Handle<String> sample_subject,
356 Object* compiled_code = re->DataAt(JSRegExp::code_index(is_ascii)); 349 bool is_one_byte) {
350 Object* compiled_code = re->DataAt(JSRegExp::code_index(is_one_byte));
357 #ifdef V8_INTERPRETED_REGEXP 351 #ifdef V8_INTERPRETED_REGEXP
358 if (compiled_code->IsByteArray()) return true; 352 if (compiled_code->IsByteArray()) return true;
359 #else // V8_INTERPRETED_REGEXP (RegExp native code) 353 #else // V8_INTERPRETED_REGEXP (RegExp native code)
360 if (compiled_code->IsCode()) return true; 354 if (compiled_code->IsCode()) return true;
361 #endif 355 #endif
362 // We could potentially have marked this as flushable, but have kept 356 // We could potentially have marked this as flushable, but have kept
363 // a saved version if we did not flush it yet. 357 // a saved version if we did not flush it yet.
364 Object* saved_code = re->DataAt(JSRegExp::saved_code_index(is_ascii)); 358 Object* saved_code = re->DataAt(JSRegExp::saved_code_index(is_one_byte));
365 if (saved_code->IsCode()) { 359 if (saved_code->IsCode()) {
366 // Reinstate the code in the original place. 360 // Reinstate the code in the original place.
367 re->SetDataAt(JSRegExp::code_index(is_ascii), saved_code); 361 re->SetDataAt(JSRegExp::code_index(is_one_byte), saved_code);
368 DCHECK(compiled_code->IsSmi()); 362 DCHECK(compiled_code->IsSmi());
369 return true; 363 return true;
370 } 364 }
371 return CompileIrregexp(re, sample_subject, is_ascii); 365 return CompileIrregexp(re, sample_subject, is_one_byte);
372 } 366 }
373 367
374 368
375 static void CreateRegExpErrorObjectAndThrow(Handle<JSRegExp> re, bool is_ascii, 369 static void CreateRegExpErrorObjectAndThrow(Handle<JSRegExp> re,
376 Handle<String> error_message, 370 Handle<String> error_message,
377 Isolate* isolate) { 371 Isolate* isolate) {
378 Factory* factory = isolate->factory(); 372 Factory* factory = isolate->factory();
379 Handle<FixedArray> elements = factory->NewFixedArray(2); 373 Handle<FixedArray> elements = factory->NewFixedArray(2);
380 elements->set(0, re->Pattern()); 374 elements->set(0, re->Pattern());
381 elements->set(1, *error_message); 375 elements->set(1, *error_message);
382 Handle<JSArray> array = factory->NewJSArrayWithElements(elements); 376 Handle<JSArray> array = factory->NewJSArrayWithElements(elements);
383 Handle<Object> error; 377 Handle<Object> error;
384 MaybeHandle<Object> maybe_error = 378 MaybeHandle<Object> maybe_error =
385 factory->NewSyntaxError("malformed_regexp", array); 379 factory->NewSyntaxError("malformed_regexp", array);
386 if (maybe_error.ToHandle(&error)) isolate->Throw(*error); 380 if (maybe_error.ToHandle(&error)) isolate->Throw(*error);
387 } 381 }
388 382
389 383
390 bool RegExpImpl::CompileIrregexp(Handle<JSRegExp> re, 384 bool RegExpImpl::CompileIrregexp(Handle<JSRegExp> re,
391 Handle<String> sample_subject, 385 Handle<String> sample_subject,
392 bool is_ascii) { 386 bool is_one_byte) {
393 // Compile the RegExp. 387 // Compile the RegExp.
394 Isolate* isolate = re->GetIsolate(); 388 Isolate* isolate = re->GetIsolate();
395 Zone zone(isolate); 389 Zone zone(isolate);
396 PostponeInterruptsScope postpone(isolate); 390 PostponeInterruptsScope postpone(isolate);
397 // If we had a compilation error the last time this is saved at the 391 // If we had a compilation error the last time this is saved at the
398 // saved code index. 392 // saved code index.
399 Object* entry = re->DataAt(JSRegExp::code_index(is_ascii)); 393 Object* entry = re->DataAt(JSRegExp::code_index(is_one_byte));
400 // When arriving here entry can only be a smi, either representing an 394 // When arriving here entry can only be a smi, either representing an
401 // uncompiled regexp, a previous compilation error, or code that has 395 // uncompiled regexp, a previous compilation error, or code that has
402 // been flushed. 396 // been flushed.
403 DCHECK(entry->IsSmi()); 397 DCHECK(entry->IsSmi());
404 int entry_value = Smi::cast(entry)->value(); 398 int entry_value = Smi::cast(entry)->value();
405 DCHECK(entry_value == JSRegExp::kUninitializedValue || 399 DCHECK(entry_value == JSRegExp::kUninitializedValue ||
406 entry_value == JSRegExp::kCompilationErrorValue || 400 entry_value == JSRegExp::kCompilationErrorValue ||
407 (entry_value < JSRegExp::kCodeAgeMask && entry_value >= 0)); 401 (entry_value < JSRegExp::kCodeAgeMask && entry_value >= 0));
408 402
409 if (entry_value == JSRegExp::kCompilationErrorValue) { 403 if (entry_value == JSRegExp::kCompilationErrorValue) {
410 // A previous compilation failed and threw an error which we store in 404 // A previous compilation failed and threw an error which we store in
411 // the saved code index (we store the error message, not the actual 405 // the saved code index (we store the error message, not the actual
412 // error). Recreate the error object and throw it. 406 // error). Recreate the error object and throw it.
413 Object* error_string = re->DataAt(JSRegExp::saved_code_index(is_ascii)); 407 Object* error_string = re->DataAt(JSRegExp::saved_code_index(is_one_byte));
414 DCHECK(error_string->IsString()); 408 DCHECK(error_string->IsString());
415 Handle<String> error_message(String::cast(error_string)); 409 Handle<String> error_message(String::cast(error_string));
416 CreateRegExpErrorObjectAndThrow(re, is_ascii, error_message, isolate); 410 CreateRegExpErrorObjectAndThrow(re, error_message, isolate);
417 return false; 411 return false;
418 } 412 }
419 413
420 JSRegExp::Flags flags = re->GetFlags(); 414 JSRegExp::Flags flags = re->GetFlags();
421 415
422 Handle<String> pattern(re->Pattern()); 416 Handle<String> pattern(re->Pattern());
423 pattern = String::Flatten(pattern); 417 pattern = String::Flatten(pattern);
424 RegExpCompileData compile_data; 418 RegExpCompileData compile_data;
425 FlatStringReader reader(isolate, pattern); 419 FlatStringReader reader(isolate, pattern);
426 if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(), 420 if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(),
427 &compile_data, 421 &compile_data,
428 &zone)) { 422 &zone)) {
429 // Throw an exception if we fail to parse the pattern. 423 // Throw an exception if we fail to parse the pattern.
430 // THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once. 424 // THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once.
431 USE(ThrowRegExpException(re, 425 USE(ThrowRegExpException(re,
432 pattern, 426 pattern,
433 compile_data.error, 427 compile_data.error,
434 "malformed_regexp")); 428 "malformed_regexp"));
435 return false; 429 return false;
436 } 430 }
437 RegExpEngine::CompilationResult result = 431 RegExpEngine::CompilationResult result = RegExpEngine::Compile(
438 RegExpEngine::Compile(&compile_data, 432 &compile_data, flags.is_ignore_case(), flags.is_global(),
439 flags.is_ignore_case(), 433 flags.is_multiline(), pattern, sample_subject, is_one_byte, &zone);
440 flags.is_global(),
441 flags.is_multiline(),
442 pattern,
443 sample_subject,
444 is_ascii,
445 &zone);
446 if (result.error_message != NULL) { 434 if (result.error_message != NULL) {
447 // Unable to compile regexp. 435 // Unable to compile regexp.
448 Handle<String> error_message = isolate->factory()->NewStringFromUtf8( 436 Handle<String> error_message = isolate->factory()->NewStringFromUtf8(
449 CStrVector(result.error_message)).ToHandleChecked(); 437 CStrVector(result.error_message)).ToHandleChecked();
450 CreateRegExpErrorObjectAndThrow(re, is_ascii, error_message, isolate); 438 CreateRegExpErrorObjectAndThrow(re, error_message, isolate);
451 return false; 439 return false;
452 } 440 }
453 441
454 Handle<FixedArray> data = Handle<FixedArray>(FixedArray::cast(re->data())); 442 Handle<FixedArray> data = Handle<FixedArray>(FixedArray::cast(re->data()));
455 data->set(JSRegExp::code_index(is_ascii), result.code); 443 data->set(JSRegExp::code_index(is_one_byte), result.code);
456 int register_max = IrregexpMaxRegisterCount(*data); 444 int register_max = IrregexpMaxRegisterCount(*data);
457 if (result.num_registers > register_max) { 445 if (result.num_registers > register_max) {
458 SetIrregexpMaxRegisterCount(*data, result.num_registers); 446 SetIrregexpMaxRegisterCount(*data, result.num_registers);
459 } 447 }
460 448
461 return true; 449 return true;
462 } 450 }
463 451
464 452
465 int RegExpImpl::IrregexpMaxRegisterCount(FixedArray* re) { 453 int RegExpImpl::IrregexpMaxRegisterCount(FixedArray* re) {
(...skipping 10 matching lines...) Expand all
476 int RegExpImpl::IrregexpNumberOfCaptures(FixedArray* re) { 464 int RegExpImpl::IrregexpNumberOfCaptures(FixedArray* re) {
477 return Smi::cast(re->get(JSRegExp::kIrregexpCaptureCountIndex))->value(); 465 return Smi::cast(re->get(JSRegExp::kIrregexpCaptureCountIndex))->value();
478 } 466 }
479 467
480 468
481 int RegExpImpl::IrregexpNumberOfRegisters(FixedArray* re) { 469 int RegExpImpl::IrregexpNumberOfRegisters(FixedArray* re) {
482 return Smi::cast(re->get(JSRegExp::kIrregexpMaxRegisterCountIndex))->value(); 470 return Smi::cast(re->get(JSRegExp::kIrregexpMaxRegisterCountIndex))->value();
483 } 471 }
484 472
485 473
486 ByteArray* RegExpImpl::IrregexpByteCode(FixedArray* re, bool is_ascii) { 474 ByteArray* RegExpImpl::IrregexpByteCode(FixedArray* re, bool is_one_byte) {
487 return ByteArray::cast(re->get(JSRegExp::code_index(is_ascii))); 475 return ByteArray::cast(re->get(JSRegExp::code_index(is_one_byte)));
488 } 476 }
489 477
490 478
491 Code* RegExpImpl::IrregexpNativeCode(FixedArray* re, bool is_ascii) { 479 Code* RegExpImpl::IrregexpNativeCode(FixedArray* re, bool is_one_byte) {
492 return Code::cast(re->get(JSRegExp::code_index(is_ascii))); 480 return Code::cast(re->get(JSRegExp::code_index(is_one_byte)));
493 } 481 }
494 482
495 483
496 void RegExpImpl::IrregexpInitialize(Handle<JSRegExp> re, 484 void RegExpImpl::IrregexpInitialize(Handle<JSRegExp> re,
497 Handle<String> pattern, 485 Handle<String> pattern,
498 JSRegExp::Flags flags, 486 JSRegExp::Flags flags,
499 int capture_count) { 487 int capture_count) {
500 // Initialize compiled code entries to null. 488 // Initialize compiled code entries to null.
501 re->GetIsolate()->factory()->SetRegExpIrregexpData(re, 489 re->GetIsolate()->factory()->SetRegExpIrregexpData(re,
502 JSRegExp::IRREGEXP, 490 JSRegExp::IRREGEXP,
503 pattern, 491 pattern,
504 flags, 492 flags,
505 capture_count); 493 capture_count);
506 } 494 }
507 495
508 496
509 int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp, 497 int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp,
510 Handle<String> subject) { 498 Handle<String> subject) {
511 subject = String::Flatten(subject); 499 subject = String::Flatten(subject);
512 500
513 // Check the asciiness of the underlying storage. 501 // Check representation of the underlying storage.
514 bool is_ascii = subject->IsOneByteRepresentationUnderneath(); 502 bool is_one_byte = subject->IsOneByteRepresentationUnderneath();
515 if (!EnsureCompiledIrregexp(regexp, subject, is_ascii)) return -1; 503 if (!EnsureCompiledIrregexp(regexp, subject, is_one_byte)) return -1;
516 504
517 #ifdef V8_INTERPRETED_REGEXP 505 #ifdef V8_INTERPRETED_REGEXP
518 // Byte-code regexp needs space allocated for all its registers. 506 // Byte-code regexp needs space allocated for all its registers.
519 // The result captures are copied to the start of the registers array 507 // The result captures are copied to the start of the registers array
520 // if the match succeeds. This way those registers are not clobbered 508 // if the match succeeds. This way those registers are not clobbered
521 // when we set the last match info from last successful match. 509 // when we set the last match info from last successful match.
522 return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data())) + 510 return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data())) +
523 (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2; 511 (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2;
524 #else // V8_INTERPRETED_REGEXP 512 #else // V8_INTERPRETED_REGEXP
525 // Native regexp only needs room to output captures. Registers are handled 513 // Native regexp only needs room to output captures. Registers are handled
526 // internally. 514 // internally.
527 return (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2; 515 return (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2;
528 #endif // V8_INTERPRETED_REGEXP 516 #endif // V8_INTERPRETED_REGEXP
529 } 517 }
530 518
531 519
532 int RegExpImpl::IrregexpExecRaw(Handle<JSRegExp> regexp, 520 int RegExpImpl::IrregexpExecRaw(Handle<JSRegExp> regexp,
533 Handle<String> subject, 521 Handle<String> subject,
534 int index, 522 int index,
535 int32_t* output, 523 int32_t* output,
536 int output_size) { 524 int output_size) {
537 Isolate* isolate = regexp->GetIsolate(); 525 Isolate* isolate = regexp->GetIsolate();
538 526
539 Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()), isolate); 527 Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()), isolate);
540 528
541 DCHECK(index >= 0); 529 DCHECK(index >= 0);
542 DCHECK(index <= subject->length()); 530 DCHECK(index <= subject->length());
543 DCHECK(subject->IsFlat()); 531 DCHECK(subject->IsFlat());
544 532
545 bool is_ascii = subject->IsOneByteRepresentationUnderneath(); 533 bool is_one_byte = subject->IsOneByteRepresentationUnderneath();
546 534
547 #ifndef V8_INTERPRETED_REGEXP 535 #ifndef V8_INTERPRETED_REGEXP
548 DCHECK(output_size >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2); 536 DCHECK(output_size >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2);
549 do { 537 do {
550 EnsureCompiledIrregexp(regexp, subject, is_ascii); 538 EnsureCompiledIrregexp(regexp, subject, is_one_byte);
551 Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii), isolate); 539 Handle<Code> code(IrregexpNativeCode(*irregexp, is_one_byte), isolate);
552 // The stack is used to allocate registers for the compiled regexp code. 540 // The stack is used to allocate registers for the compiled regexp code.
553 // This means that in case of failure, the output registers array is left 541 // This means that in case of failure, the output registers array is left
554 // untouched and contains the capture results from the previous successful 542 // untouched and contains the capture results from the previous successful
555 // match. We can use that to set the last match info lazily. 543 // match. We can use that to set the last match info lazily.
556 NativeRegExpMacroAssembler::Result res = 544 NativeRegExpMacroAssembler::Result res =
557 NativeRegExpMacroAssembler::Match(code, 545 NativeRegExpMacroAssembler::Match(code,
558 subject, 546 subject,
559 output, 547 output,
560 output_size, 548 output_size,
561 index, 549 index,
562 isolate); 550 isolate);
563 if (res != NativeRegExpMacroAssembler::RETRY) { 551 if (res != NativeRegExpMacroAssembler::RETRY) {
564 DCHECK(res != NativeRegExpMacroAssembler::EXCEPTION || 552 DCHECK(res != NativeRegExpMacroAssembler::EXCEPTION ||
565 isolate->has_pending_exception()); 553 isolate->has_pending_exception());
566 STATIC_ASSERT( 554 STATIC_ASSERT(
567 static_cast<int>(NativeRegExpMacroAssembler::SUCCESS) == RE_SUCCESS); 555 static_cast<int>(NativeRegExpMacroAssembler::SUCCESS) == RE_SUCCESS);
568 STATIC_ASSERT( 556 STATIC_ASSERT(
569 static_cast<int>(NativeRegExpMacroAssembler::FAILURE) == RE_FAILURE); 557 static_cast<int>(NativeRegExpMacroAssembler::FAILURE) == RE_FAILURE);
570 STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::EXCEPTION) 558 STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::EXCEPTION)
571 == RE_EXCEPTION); 559 == RE_EXCEPTION);
572 return static_cast<IrregexpResult>(res); 560 return static_cast<IrregexpResult>(res);
573 } 561 }
574 // If result is RETRY, the string has changed representation, and we 562 // If result is RETRY, the string has changed representation, and we
575 // must restart from scratch. 563 // must restart from scratch.
576 // In this case, it means we must make sure we are prepared to handle 564 // In this case, it means we must make sure we are prepared to handle
577 // the, potentially, different subject (the string can switch between 565 // the, potentially, different subject (the string can switch between
578 // being internal and external, and even between being ASCII and UC16, 566 // being internal and external, and even between being Latin1 and UC16,
579 // but the characters are always the same). 567 // but the characters are always the same).
580 IrregexpPrepare(regexp, subject); 568 IrregexpPrepare(regexp, subject);
581 is_ascii = subject->IsOneByteRepresentationUnderneath(); 569 is_one_byte = subject->IsOneByteRepresentationUnderneath();
582 } while (true); 570 } while (true);
583 UNREACHABLE(); 571 UNREACHABLE();
584 return RE_EXCEPTION; 572 return RE_EXCEPTION;
585 #else // V8_INTERPRETED_REGEXP 573 #else // V8_INTERPRETED_REGEXP
586 574
587 DCHECK(output_size >= IrregexpNumberOfRegisters(*irregexp)); 575 DCHECK(output_size >= IrregexpNumberOfRegisters(*irregexp));
588 // We must have done EnsureCompiledIrregexp, so we can get the number of 576 // We must have done EnsureCompiledIrregexp, so we can get the number of
589 // registers. 577 // registers.
590 int number_of_capture_registers = 578 int number_of_capture_registers =
591 (IrregexpNumberOfCaptures(*irregexp) + 1) * 2; 579 (IrregexpNumberOfCaptures(*irregexp) + 1) * 2;
592 int32_t* raw_output = &output[number_of_capture_registers]; 580 int32_t* raw_output = &output[number_of_capture_registers];
593 // We do not touch the actual capture result registers until we know there 581 // We do not touch the actual capture result registers until we know there
594 // has been a match so that we can use those capture results to set the 582 // has been a match so that we can use those capture results to set the
595 // last match info. 583 // last match info.
596 for (int i = number_of_capture_registers - 1; i >= 0; i--) { 584 for (int i = number_of_capture_registers - 1; i >= 0; i--) {
597 raw_output[i] = -1; 585 raw_output[i] = -1;
598 } 586 }
599 Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_ascii), isolate); 587 Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_one_byte),
588 isolate);
600 589
601 IrregexpResult result = IrregexpInterpreter::Match(isolate, 590 IrregexpResult result = IrregexpInterpreter::Match(isolate,
602 byte_codes, 591 byte_codes,
603 subject, 592 subject,
604 raw_output, 593 raw_output,
605 index); 594 index);
606 if (result == RE_SUCCESS) { 595 if (result == RE_SUCCESS) {
607 // Copy capture results to the start of the registers array. 596 // Copy capture results to the start of the registers array.
608 MemCopy(output, raw_output, number_of_capture_registers * sizeof(int32_t)); 597 MemCopy(output, raw_output, number_of_capture_registers * sizeof(int32_t));
609 } 598 }
(...skipping 380 matching lines...) Expand 10 before | Expand all | Expand 10 after
990 979
991 980
992 private: 981 private:
993 CharacterFrequency frequencies_[RegExpMacroAssembler::kTableSize]; 982 CharacterFrequency frequencies_[RegExpMacroAssembler::kTableSize];
994 int total_samples_; 983 int total_samples_;
995 }; 984 };
996 985
997 986
998 class RegExpCompiler { 987 class RegExpCompiler {
999 public: 988 public:
1000 RegExpCompiler(int capture_count, bool ignore_case, bool is_ascii, 989 RegExpCompiler(int capture_count, bool ignore_case, bool is_one_byte,
1001 Zone* zone); 990 Zone* zone);
1002 991
1003 int AllocateRegister() { 992 int AllocateRegister() {
1004 if (next_register_ >= RegExpMacroAssembler::kMaxRegister) { 993 if (next_register_ >= RegExpMacroAssembler::kMaxRegister) {
1005 reg_exp_too_big_ = true; 994 reg_exp_too_big_ = true;
1006 return next_register_; 995 return next_register_;
1007 } 996 }
1008 return next_register_++; 997 return next_register_++;
1009 } 998 }
1010 999
(...skipping 12 matching lines...) Expand all
1023 EndNode* accept() { return accept_; } 1012 EndNode* accept() { return accept_; }
1024 1013
1025 static const int kMaxRecursion = 100; 1014 static const int kMaxRecursion = 100;
1026 inline int recursion_depth() { return recursion_depth_; } 1015 inline int recursion_depth() { return recursion_depth_; }
1027 inline void IncrementRecursionDepth() { recursion_depth_++; } 1016 inline void IncrementRecursionDepth() { recursion_depth_++; }
1028 inline void DecrementRecursionDepth() { recursion_depth_--; } 1017 inline void DecrementRecursionDepth() { recursion_depth_--; }
1029 1018
1030 void SetRegExpTooBig() { reg_exp_too_big_ = true; } 1019 void SetRegExpTooBig() { reg_exp_too_big_ = true; }
1031 1020
1032 inline bool ignore_case() { return ignore_case_; } 1021 inline bool ignore_case() { return ignore_case_; }
1033 inline bool ascii() { return ascii_; } 1022 inline bool one_byte() { return one_byte_; }
1034 FrequencyCollator* frequency_collator() { return &frequency_collator_; } 1023 FrequencyCollator* frequency_collator() { return &frequency_collator_; }
1035 1024
1036 int current_expansion_factor() { return current_expansion_factor_; } 1025 int current_expansion_factor() { return current_expansion_factor_; }
1037 void set_current_expansion_factor(int value) { 1026 void set_current_expansion_factor(int value) {
1038 current_expansion_factor_ = value; 1027 current_expansion_factor_ = value;
1039 } 1028 }
1040 1029
1041 Zone* zone() const { return zone_; } 1030 Zone* zone() const { return zone_; }
1042 1031
1043 static const int kNoRegister = -1; 1032 static const int kNoRegister = -1;
1044 1033
1045 private: 1034 private:
1046 EndNode* accept_; 1035 EndNode* accept_;
1047 int next_register_; 1036 int next_register_;
1048 List<RegExpNode*>* work_list_; 1037 List<RegExpNode*>* work_list_;
1049 int recursion_depth_; 1038 int recursion_depth_;
1050 RegExpMacroAssembler* macro_assembler_; 1039 RegExpMacroAssembler* macro_assembler_;
1051 bool ignore_case_; 1040 bool ignore_case_;
1052 bool ascii_; 1041 bool one_byte_;
1053 bool reg_exp_too_big_; 1042 bool reg_exp_too_big_;
1054 int current_expansion_factor_; 1043 int current_expansion_factor_;
1055 FrequencyCollator frequency_collator_; 1044 FrequencyCollator frequency_collator_;
1056 Zone* zone_; 1045 Zone* zone_;
1057 }; 1046 };
1058 1047
1059 1048
1060 class RecursionCheck { 1049 class RecursionCheck {
1061 public: 1050 public:
1062 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) { 1051 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) {
1063 compiler->IncrementRecursionDepth(); 1052 compiler->IncrementRecursionDepth();
1064 } 1053 }
1065 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); } 1054 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); }
1066 private: 1055 private:
1067 RegExpCompiler* compiler_; 1056 RegExpCompiler* compiler_;
1068 }; 1057 };
1069 1058
1070 1059
1071 static RegExpEngine::CompilationResult IrregexpRegExpTooBig(Isolate* isolate) { 1060 static RegExpEngine::CompilationResult IrregexpRegExpTooBig(Isolate* isolate) {
1072 return RegExpEngine::CompilationResult(isolate, "RegExp too big"); 1061 return RegExpEngine::CompilationResult(isolate, "RegExp too big");
1073 } 1062 }
1074 1063
1075 1064
1076 // Attempts to compile the regexp using an Irregexp code generator. Returns 1065 // Attempts to compile the regexp using an Irregexp code generator. Returns
1077 // a fixed array or a null handle depending on whether it succeeded. 1066 // a fixed array or a null handle depending on whether it succeeded.
1078 RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case, bool ascii, 1067 RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case,
1079 Zone* zone) 1068 bool one_byte, Zone* zone)
1080 : next_register_(2 * (capture_count + 1)), 1069 : next_register_(2 * (capture_count + 1)),
1081 work_list_(NULL), 1070 work_list_(NULL),
1082 recursion_depth_(0), 1071 recursion_depth_(0),
1083 ignore_case_(ignore_case), 1072 ignore_case_(ignore_case),
1084 ascii_(ascii), 1073 one_byte_(one_byte),
1085 reg_exp_too_big_(false), 1074 reg_exp_too_big_(false),
1086 current_expansion_factor_(1), 1075 current_expansion_factor_(1),
1087 frequency_collator_(), 1076 frequency_collator_(),
1088 zone_(zone) { 1077 zone_(zone) {
1089 accept_ = new(zone) EndNode(EndNode::ACCEPT, zone); 1078 accept_ = new(zone) EndNode(EndNode::ACCEPT, zone);
1090 DCHECK(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister); 1079 DCHECK(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister);
1091 } 1080 }
1092 1081
1093 1082
1094 RegExpEngine::CompilationResult RegExpCompiler::Assemble( 1083 RegExpEngine::CompilationResult RegExpCompiler::Assemble(
(...skipping 490 matching lines...) Expand 10 before | Expand all | Expand 10 after
1585 macro_assembler->IfRegisterLT(guard->reg(), 1574 macro_assembler->IfRegisterLT(guard->reg(),
1586 guard->value(), 1575 guard->value(),
1587 trace->backtrack()); 1576 trace->backtrack());
1588 break; 1577 break;
1589 } 1578 }
1590 } 1579 }
1591 1580
1592 1581
1593 // Returns the number of characters in the equivalence class, omitting those 1582 // Returns the number of characters in the equivalence class, omitting those
1594 // that cannot occur in the source string because it is ASCII. 1583 // that cannot occur in the source string because it is ASCII.
1595 static int GetCaseIndependentLetters(Isolate* isolate, 1584 static int GetCaseIndependentLetters(Isolate* isolate, uc16 character,
1596 uc16 character, 1585 bool one_byte_subject,
1597 bool ascii_subject,
1598 unibrow::uchar* letters) { 1586 unibrow::uchar* letters) {
1599 int length = 1587 int length =
1600 isolate->jsregexp_uncanonicalize()->get(character, '\0', letters); 1588 isolate->jsregexp_uncanonicalize()->get(character, '\0', letters);
1601 // Unibrow returns 0 or 1 for characters where case independence is 1589 // Unibrow returns 0 or 1 for characters where case independence is
1602 // trivial. 1590 // trivial.
1603 if (length == 0) { 1591 if (length == 0) {
1604 letters[0] = character; 1592 letters[0] = character;
1605 length = 1; 1593 length = 1;
1606 } 1594 }
1607 if (!ascii_subject || character <= String::kMaxOneByteCharCode) { 1595 if (!one_byte_subject || character <= String::kMaxOneByteCharCode) {
1608 return length; 1596 return length;
1609 } 1597 }
1598
1610 // The standard requires that non-ASCII characters cannot have ASCII 1599 // The standard requires that non-ASCII characters cannot have ASCII
1611 // character codes in their equivalence class. 1600 // character codes in their equivalence class.
1601 // TODO(dcarney): issue 3550 this is not actually true for Latin1 anymore,
1602 // is it? For example, \u00C5 is equivalent to \u212B.
Yang 2014/09/10 08:26:36 This is one of the TODOs I mentioned.
dcarney 2014/09/10 09:35:12 I checked other browsers I think originally, and w
1612 return 0; 1603 return 0;
1613 } 1604 }
1614 1605
1615 1606
1616 static inline bool EmitSimpleCharacter(Isolate* isolate, 1607 static inline bool EmitSimpleCharacter(Isolate* isolate,
1617 RegExpCompiler* compiler, 1608 RegExpCompiler* compiler,
1618 uc16 c, 1609 uc16 c,
1619 Label* on_failure, 1610 Label* on_failure,
1620 int cp_offset, 1611 int cp_offset,
1621 bool check, 1612 bool check,
(...skipping 15 matching lines...) Expand all
1637 // Only emits non-letters (things that don't have case). Only used for case 1628 // Only emits non-letters (things that don't have case). Only used for case
1638 // independent matches. 1629 // independent matches.
1639 static inline bool EmitAtomNonLetter(Isolate* isolate, 1630 static inline bool EmitAtomNonLetter(Isolate* isolate,
1640 RegExpCompiler* compiler, 1631 RegExpCompiler* compiler,
1641 uc16 c, 1632 uc16 c,
1642 Label* on_failure, 1633 Label* on_failure,
1643 int cp_offset, 1634 int cp_offset,
1644 bool check, 1635 bool check,
1645 bool preloaded) { 1636 bool preloaded) {
1646 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); 1637 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
1647 bool ascii = compiler->ascii(); 1638 bool one_byte = compiler->one_byte();
1648 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 1639 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1649 int length = GetCaseIndependentLetters(isolate, c, ascii, chars); 1640 int length = GetCaseIndependentLetters(isolate, c, one_byte, chars);
1650 if (length < 1) { 1641 if (length < 1) {
1651 // This can't match. Must be an ASCII subject and a non-ASCII character. 1642 // This can't match. Must be an one-byte subject and a non-one-byte
1652 // We do not need to do anything since the ASCII pass already handled this. 1643 // character. We do not need to do anything since the one-byte pass
1644 // already handled this.
1653 return false; // Bounds not checked. 1645 return false; // Bounds not checked.
1654 } 1646 }
1655 bool checked = false; 1647 bool checked = false;
1656 // We handle the length > 1 case in a later pass. 1648 // We handle the length > 1 case in a later pass.
1657 if (length == 1) { 1649 if (length == 1) {
1658 if (ascii && c > String::kMaxOneByteCharCodeU) { 1650 if (one_byte && c > String::kMaxOneByteCharCodeU) {
1659 // Can't match - see above. 1651 // Can't match - see above.
1660 return false; // Bounds not checked. 1652 return false; // Bounds not checked.
1661 } 1653 }
1662 if (!preloaded) { 1654 if (!preloaded) {
1663 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); 1655 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
1664 checked = check; 1656 checked = check;
1665 } 1657 }
1666 macro_assembler->CheckNotCharacter(c, on_failure); 1658 macro_assembler->CheckNotCharacter(c, on_failure);
1667 } 1659 }
1668 return checked; 1660 return checked;
1669 } 1661 }
1670 1662
1671 1663
1672 static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler, 1664 static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,
1673 bool ascii, 1665 bool one_byte, uc16 c1, uc16 c2,
1674 uc16 c1,
1675 uc16 c2,
1676 Label* on_failure) { 1666 Label* on_failure) {
1677 uc16 char_mask; 1667 uc16 char_mask;
1678 if (ascii) { 1668 if (one_byte) {
1679 char_mask = String::kMaxOneByteCharCode; 1669 char_mask = String::kMaxOneByteCharCode;
1680 } else { 1670 } else {
1681 char_mask = String::kMaxUtf16CodeUnit; 1671 char_mask = String::kMaxUtf16CodeUnit;
1682 } 1672 }
1683 uc16 exor = c1 ^ c2; 1673 uc16 exor = c1 ^ c2;
1684 // Check whether exor has only one bit set. 1674 // Check whether exor has only one bit set.
1685 if (((exor - 1) & exor) == 0) { 1675 if (((exor - 1) & exor) == 0) {
1686 // If c1 and c2 differ only by one bit. 1676 // If c1 and c2 differ only by one bit.
1687 // Ecma262UnCanonicalize always gives the highest number last. 1677 // Ecma262UnCanonicalize always gives the highest number last.
1688 DCHECK(c2 > c1); 1678 DCHECK(c2 > c1);
(...skipping 30 matching lines...) Expand all
1719 // Only emits letters (things that have case). Only used for case independent 1709 // Only emits letters (things that have case). Only used for case independent
1720 // matches. 1710 // matches.
1721 static inline bool EmitAtomLetter(Isolate* isolate, 1711 static inline bool EmitAtomLetter(Isolate* isolate,
1722 RegExpCompiler* compiler, 1712 RegExpCompiler* compiler,
1723 uc16 c, 1713 uc16 c,
1724 Label* on_failure, 1714 Label* on_failure,
1725 int cp_offset, 1715 int cp_offset,
1726 bool check, 1716 bool check,
1727 bool preloaded) { 1717 bool preloaded) {
1728 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); 1718 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
1729 bool ascii = compiler->ascii(); 1719 bool one_byte = compiler->one_byte();
1730 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 1720 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1731 int length = GetCaseIndependentLetters(isolate, c, ascii, chars); 1721 int length = GetCaseIndependentLetters(isolate, c, one_byte, chars);
1732 if (length <= 1) return false; 1722 if (length <= 1) return false;
1733 // We may not need to check against the end of the input string 1723 // We may not need to check against the end of the input string
1734 // if this character lies before a character that matched. 1724 // if this character lies before a character that matched.
1735 if (!preloaded) { 1725 if (!preloaded) {
1736 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); 1726 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
1737 } 1727 }
1738 Label ok; 1728 Label ok;
1739 DCHECK(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4); 1729 DCHECK(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4);
1740 switch (length) { 1730 switch (length) {
1741 case 2: { 1731 case 2: {
1742 if (ShortCutEmitCharacterPair(macro_assembler, 1732 if (ShortCutEmitCharacterPair(macro_assembler, one_byte, chars[0],
1743 ascii, 1733 chars[1], on_failure)) {
1744 chars[0],
1745 chars[1],
1746 on_failure)) {
1747 } else { 1734 } else {
1748 macro_assembler->CheckCharacter(chars[0], &ok); 1735 macro_assembler->CheckCharacter(chars[0], &ok);
1749 macro_assembler->CheckNotCharacter(chars[1], on_failure); 1736 macro_assembler->CheckNotCharacter(chars[1], on_failure);
1750 macro_assembler->Bind(&ok); 1737 macro_assembler->Bind(&ok);
1751 } 1738 }
1752 break; 1739 break;
1753 } 1740 }
1754 case 4: 1741 case 4:
1755 macro_assembler->CheckCharacter(chars[3], &ok); 1742 macro_assembler->CheckCharacter(chars[3], &ok);
1756 // Fall through! 1743 // Fall through!
(...skipping 154 matching lines...) Expand 10 before | Expand all | Expand 10 after
1911 1898
1912 *new_start_index = start_index; 1899 *new_start_index = start_index;
1913 *border = (ranges->at(start_index) & ~kMask) + kSize; 1900 *border = (ranges->at(start_index) & ~kMask) + kSize;
1914 while (*new_start_index < end_index) { 1901 while (*new_start_index < end_index) {
1915 if (ranges->at(*new_start_index) > *border) break; 1902 if (ranges->at(*new_start_index) > *border) break;
1916 (*new_start_index)++; 1903 (*new_start_index)++;
1917 } 1904 }
1918 // new_start_index is the index of the first edge that is beyond the 1905 // new_start_index is the index of the first edge that is beyond the
1919 // current kSize space. 1906 // current kSize space.
1920 1907
1921 // For very large search spaces we do a binary chop search of the non-ASCII 1908 // For very large search spaces we do a binary chop search of the non-Latin1
1922 // space instead of just going to the end of the current kSize space. The 1909 // space instead of just going to the end of the current kSize space. The
1923 // heuristics are complicated a little by the fact that any 128-character 1910 // heuristics are complicated a little by the fact that any 128-character
1924 // encoding space can be quickly tested with a table lookup, so we don't 1911 // encoding space can be quickly tested with a table lookup, so we don't
1925 // wish to do binary chop search at a smaller granularity than that. A 1912 // wish to do binary chop search at a smaller granularity than that. A
1926 // 128-character space can take up a lot of space in the ranges array if, 1913 // 128-character space can take up a lot of space in the ranges array if,
1927 // for example, we only want to match every second character (eg. the lower 1914 // for example, we only want to match every second character (eg. the lower
1928 // case characters on some Unicode pages). 1915 // case characters on some Unicode pages).
1929 int binary_chop_index = (end_index + start_index) / 2; 1916 int binary_chop_index = (end_index + start_index) / 2;
1930 // The first test ensures that we get to the code that handles the ASCII 1917 // The first test ensures that we get to the code that handles the Latin1
1931 // range with a single not-taken branch, speeding up this important 1918 // range with a single not-taken branch, speeding up this important
1932 // character range (even non-ASCII charset-based text has spaces and 1919 // character range (even non-Latin1 charset-based text has spaces and
1933 // punctuation). 1920 // punctuation).
1934 if (*border - 1 > String::kMaxOneByteCharCode && // ASCII case. 1921 if (*border - 1 > String::kMaxOneByteCharCode && // Latin1 case.
1935 end_index - start_index > (*new_start_index - start_index) * 2 && 1922 end_index - start_index > (*new_start_index - start_index) * 2 &&
1936 last - first > kSize * 2 && 1923 last - first > kSize * 2 && binary_chop_index > *new_start_index &&
1937 binary_chop_index > *new_start_index &&
1938 ranges->at(binary_chop_index) >= first + 2 * kSize) { 1924 ranges->at(binary_chop_index) >= first + 2 * kSize) {
1939 int scan_forward_for_section_border = binary_chop_index;; 1925 int scan_forward_for_section_border = binary_chop_index;;
1940 int new_border = (ranges->at(binary_chop_index) | kMask) + 1; 1926 int new_border = (ranges->at(binary_chop_index) | kMask) + 1;
1941 1927
1942 while (scan_forward_for_section_border < end_index) { 1928 while (scan_forward_for_section_border < end_index) {
1943 if (ranges->at(scan_forward_for_section_border) > new_border) { 1929 if (ranges->at(scan_forward_for_section_border) > new_border) {
1944 *new_start_index = scan_forward_for_section_border; 1930 *new_start_index = scan_forward_for_section_border;
1945 *border = new_border; 1931 *border = new_border;
1946 break; 1932 break;
1947 } 1933 }
(...skipping 166 matching lines...) Expand 10 before | Expand all | Expand 10 after
2114 border, 2100 border,
2115 max_char, 2101 max_char,
2116 &dummy, 2102 &dummy,
2117 flip ? odd_label : even_label, 2103 flip ? odd_label : even_label,
2118 flip ? even_label : odd_label); 2104 flip ? even_label : odd_label);
2119 } 2105 }
2120 } 2106 }
2121 2107
2122 2108
2123 static void EmitCharClass(RegExpMacroAssembler* macro_assembler, 2109 static void EmitCharClass(RegExpMacroAssembler* macro_assembler,
2124 RegExpCharacterClass* cc, 2110 RegExpCharacterClass* cc, bool one_byte,
2125 bool ascii, 2111 Label* on_failure, int cp_offset, bool check_offset,
2126 Label* on_failure, 2112 bool preloaded, Zone* zone) {
2127 int cp_offset,
2128 bool check_offset,
2129 bool preloaded,
2130 Zone* zone) {
2131 ZoneList<CharacterRange>* ranges = cc->ranges(zone); 2113 ZoneList<CharacterRange>* ranges = cc->ranges(zone);
2132 if (!CharacterRange::IsCanonical(ranges)) { 2114 if (!CharacterRange::IsCanonical(ranges)) {
2133 CharacterRange::Canonicalize(ranges); 2115 CharacterRange::Canonicalize(ranges);
2134 } 2116 }
2135 2117
2136 int max_char; 2118 int max_char;
2137 if (ascii) { 2119 if (one_byte) {
2138 max_char = String::kMaxOneByteCharCode; 2120 max_char = String::kMaxOneByteCharCode;
2139 } else { 2121 } else {
2140 max_char = String::kMaxUtf16CodeUnit; 2122 max_char = String::kMaxUtf16CodeUnit;
2141 } 2123 }
2142 2124
2143 int range_count = ranges->length(); 2125 int range_count = ranges->length();
2144 2126
2145 int last_valid_range = range_count - 1; 2127 int last_valid_range = range_count - 1;
2146 while (last_valid_range >= 0) { 2128 while (last_valid_range >= 0) {
2147 CharacterRange& range = ranges->at(last_valid_range); 2129 CharacterRange& range = ranges->at(last_valid_range);
(...skipping 309 matching lines...) Expand 10 before | Expand all | Expand 10 after
2457 bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler, 2439 bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler,
2458 Trace* trace, 2440 Trace* trace,
2459 bool preload_has_checked_bounds, 2441 bool preload_has_checked_bounds,
2460 Label* on_possible_success, 2442 Label* on_possible_success,
2461 QuickCheckDetails* details, 2443 QuickCheckDetails* details,
2462 bool fall_through_on_failure) { 2444 bool fall_through_on_failure) {
2463 if (details->characters() == 0) return false; 2445 if (details->characters() == 0) return false;
2464 GetQuickCheckDetails( 2446 GetQuickCheckDetails(
2465 details, compiler, 0, trace->at_start() == Trace::FALSE_VALUE); 2447 details, compiler, 0, trace->at_start() == Trace::FALSE_VALUE);
2466 if (details->cannot_match()) return false; 2448 if (details->cannot_match()) return false;
2467 if (!details->Rationalize(compiler->ascii())) return false; 2449 if (!details->Rationalize(compiler->one_byte())) return false;
2468 DCHECK(details->characters() == 1 || 2450 DCHECK(details->characters() == 1 ||
2469 compiler->macro_assembler()->CanReadUnaligned()); 2451 compiler->macro_assembler()->CanReadUnaligned());
2470 uint32_t mask = details->mask(); 2452 uint32_t mask = details->mask();
2471 uint32_t value = details->value(); 2453 uint32_t value = details->value();
2472 2454
2473 RegExpMacroAssembler* assembler = compiler->macro_assembler(); 2455 RegExpMacroAssembler* assembler = compiler->macro_assembler();
2474 2456
2475 if (trace->characters_preloaded() != details->characters()) { 2457 if (trace->characters_preloaded() != details->characters()) {
2476 assembler->LoadCurrentCharacter(trace->cp_offset(), 2458 assembler->LoadCurrentCharacter(trace->cp_offset(),
2477 trace->backtrack(), 2459 trace->backtrack(),
2478 !preload_has_checked_bounds, 2460 !preload_has_checked_bounds,
2479 details->characters()); 2461 details->characters());
2480 } 2462 }
2481 2463
2482 2464
2483 bool need_mask = true; 2465 bool need_mask = true;
2484 2466
2485 if (details->characters() == 1) { 2467 if (details->characters() == 1) {
2486 // If number of characters preloaded is 1 then we used a byte or 16 bit 2468 // If number of characters preloaded is 1 then we used a byte or 16 bit
2487 // load so the value is already masked down. 2469 // load so the value is already masked down.
2488 uint32_t char_mask; 2470 uint32_t char_mask;
2489 if (compiler->ascii()) { 2471 if (compiler->one_byte()) {
2490 char_mask = String::kMaxOneByteCharCode; 2472 char_mask = String::kMaxOneByteCharCode;
2491 } else { 2473 } else {
2492 char_mask = String::kMaxUtf16CodeUnit; 2474 char_mask = String::kMaxUtf16CodeUnit;
2493 } 2475 }
2494 if ((mask & char_mask) == char_mask) need_mask = false; 2476 if ((mask & char_mask) == char_mask) need_mask = false;
2495 mask &= char_mask; 2477 mask &= char_mask;
2496 } else { 2478 } else {
2497 // For 2-character preloads in ASCII mode or 1-character preloads in 2479 // For 2-character preloads in one-byte mode or 1-character preloads in
2498 // TWO_BYTE mode we also use a 16 bit load with zero extend. 2480 // two-byte mode we also use a 16 bit load with zero extend.
2499 if (details->characters() == 2 && compiler->ascii()) { 2481 if (details->characters() == 2 && compiler->one_byte()) {
2500 if ((mask & 0xffff) == 0xffff) need_mask = false; 2482 if ((mask & 0xffff) == 0xffff) need_mask = false;
2501 } else if (details->characters() == 1 && !compiler->ascii()) { 2483 } else if (details->characters() == 1 && !compiler->one_byte()) {
2502 if ((mask & 0xffff) == 0xffff) need_mask = false; 2484 if ((mask & 0xffff) == 0xffff) need_mask = false;
2503 } else { 2485 } else {
2504 if (mask == 0xffffffff) need_mask = false; 2486 if (mask == 0xffffffff) need_mask = false;
2505 } 2487 }
2506 } 2488 }
2507 2489
2508 if (fall_through_on_failure) { 2490 if (fall_through_on_failure) {
2509 if (need_mask) { 2491 if (need_mask) {
2510 assembler->CheckCharacterAfterAnd(value, mask, on_possible_success); 2492 assembler->CheckCharacterAfterAnd(value, mask, on_possible_success);
2511 } else { 2493 } else {
(...skipping 19 matching lines...) Expand all
2531 // machine word for the current character width in order to be used in 2513 // machine word for the current character width in order to be used in
2532 // generating a quick check. 2514 // generating a quick check.
2533 void TextNode::GetQuickCheckDetails(QuickCheckDetails* details, 2515 void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
2534 RegExpCompiler* compiler, 2516 RegExpCompiler* compiler,
2535 int characters_filled_in, 2517 int characters_filled_in,
2536 bool not_at_start) { 2518 bool not_at_start) {
2537 Isolate* isolate = compiler->macro_assembler()->zone()->isolate(); 2519 Isolate* isolate = compiler->macro_assembler()->zone()->isolate();
2538 DCHECK(characters_filled_in < details->characters()); 2520 DCHECK(characters_filled_in < details->characters());
2539 int characters = details->characters(); 2521 int characters = details->characters();
2540 int char_mask; 2522 int char_mask;
2541 if (compiler->ascii()) { 2523 if (compiler->one_byte()) {
2542 char_mask = String::kMaxOneByteCharCode; 2524 char_mask = String::kMaxOneByteCharCode;
2543 } else { 2525 } else {
2544 char_mask = String::kMaxUtf16CodeUnit; 2526 char_mask = String::kMaxUtf16CodeUnit;
2545 } 2527 }
2546 for (int k = 0; k < elms_->length(); k++) { 2528 for (int k = 0; k < elms_->length(); k++) {
2547 TextElement elm = elms_->at(k); 2529 TextElement elm = elms_->at(k);
2548 if (elm.text_type() == TextElement::ATOM) { 2530 if (elm.text_type() == TextElement::ATOM) {
2549 Vector<const uc16> quarks = elm.atom()->data(); 2531 Vector<const uc16> quarks = elm.atom()->data();
2550 for (int i = 0; i < characters && i < quarks.length(); i++) { 2532 for (int i = 0; i < characters && i < quarks.length(); i++) {
2551 QuickCheckDetails::Position* pos = 2533 QuickCheckDetails::Position* pos =
2552 details->positions(characters_filled_in); 2534 details->positions(characters_filled_in);
2553 uc16 c = quarks[i]; 2535 uc16 c = quarks[i];
2554 if (c > char_mask) { 2536 if (c > char_mask) {
2555 // If we expect a non-ASCII character from an ASCII string, 2537 // If we expect a non-Latin1 character from an one-byte string,
2556 // there is no way we can match. Not even case independent 2538 // there is no way we can match. Not even case-independent
2557 // matching can turn an ASCII character into non-ASCII or 2539 // matching can turn an Latin1 character into non-Latin1 or
2558 // vice versa. 2540 // vice versa.
2541 // TODO(dcarney): issue 3550. Verify that this works as expected.
2542 // For example, \u0178 is uppercase of \u00ff (y-umlaut).
Yang 2014/09/10 08:26:36 This is the other.
2559 details->set_cannot_match(); 2543 details->set_cannot_match();
2560 pos->determines_perfectly = false; 2544 pos->determines_perfectly = false;
2561 return; 2545 return;
2562 } 2546 }
2563 if (compiler->ignore_case()) { 2547 if (compiler->ignore_case()) {
2564 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 2548 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
2565 int length = GetCaseIndependentLetters(isolate, c, compiler->ascii(), 2549 int length = GetCaseIndependentLetters(isolate, c,
2566 chars); 2550 compiler->one_byte(), chars);
2567 DCHECK(length != 0); // Can only happen if c > char_mask (see above). 2551 DCHECK(length != 0); // Can only happen if c > char_mask (see above).
2568 if (length == 1) { 2552 if (length == 1) {
2569 // This letter has no case equivalents, so it's nice and simple 2553 // This letter has no case equivalents, so it's nice and simple
2570 // and the mask-compare will determine definitely whether we have 2554 // and the mask-compare will determine definitely whether we have
2571 // a match at this character position. 2555 // a match at this character position.
2572 pos->mask = char_mask; 2556 pos->mask = char_mask;
2573 pos->value = c; 2557 pos->value = c;
2574 pos->determines_perfectly = true; 2558 pos->determines_perfectly = true;
2575 } else { 2559 } else {
2576 uint32_t common_bits = char_mask; 2560 uint32_t common_bits = char_mask;
(...skipping 108 matching lines...) Expand 10 before | Expand all | Expand 10 after
2685 void QuickCheckDetails::Clear() { 2669 void QuickCheckDetails::Clear() {
2686 for (int i = 0; i < characters_; i++) { 2670 for (int i = 0; i < characters_; i++) {
2687 positions_[i].mask = 0; 2671 positions_[i].mask = 0;
2688 positions_[i].value = 0; 2672 positions_[i].value = 0;
2689 positions_[i].determines_perfectly = false; 2673 positions_[i].determines_perfectly = false;
2690 } 2674 }
2691 characters_ = 0; 2675 characters_ = 0;
2692 } 2676 }
2693 2677
2694 2678
2695 void QuickCheckDetails::Advance(int by, bool ascii) { 2679 void QuickCheckDetails::Advance(int by, bool one_byte) {
2696 DCHECK(by >= 0); 2680 DCHECK(by >= 0);
2697 if (by >= characters_) { 2681 if (by >= characters_) {
2698 Clear(); 2682 Clear();
2699 return; 2683 return;
2700 } 2684 }
2701 for (int i = 0; i < characters_ - by; i++) { 2685 for (int i = 0; i < characters_ - by; i++) {
2702 positions_[i] = positions_[by + i]; 2686 positions_[i] = positions_[by + i];
2703 } 2687 }
2704 for (int i = characters_ - by; i < characters_; i++) { 2688 for (int i = characters_ - by; i < characters_; i++) {
2705 positions_[i].mask = 0; 2689 positions_[i].mask = 0;
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after
2749 info->visited = true; 2733 info->visited = true;
2750 } 2734 }
2751 ~VisitMarker() { 2735 ~VisitMarker() {
2752 info_->visited = false; 2736 info_->visited = false;
2753 } 2737 }
2754 private: 2738 private:
2755 NodeInfo* info_; 2739 NodeInfo* info_;
2756 }; 2740 };
2757 2741
2758 2742
2759 RegExpNode* SeqRegExpNode::FilterASCII(int depth, bool ignore_case) { 2743 RegExpNode* SeqRegExpNode::FilterOneByte(int depth, bool ignore_case) {
2760 if (info()->replacement_calculated) return replacement(); 2744 if (info()->replacement_calculated) return replacement();
2761 if (depth < 0) return this; 2745 if (depth < 0) return this;
2762 DCHECK(!info()->visited); 2746 DCHECK(!info()->visited);
2763 VisitMarker marker(info()); 2747 VisitMarker marker(info());
2764 return FilterSuccessor(depth - 1, ignore_case); 2748 return FilterSuccessor(depth - 1, ignore_case);
2765 } 2749 }
2766 2750
2767 2751
2768 RegExpNode* SeqRegExpNode::FilterSuccessor(int depth, bool ignore_case) { 2752 RegExpNode* SeqRegExpNode::FilterSuccessor(int depth, bool ignore_case) {
2769 RegExpNode* next = on_success_->FilterASCII(depth - 1, ignore_case); 2753 RegExpNode* next = on_success_->FilterOneByte(depth - 1, ignore_case);
2770 if (next == NULL) return set_replacement(NULL); 2754 if (next == NULL) return set_replacement(NULL);
2771 on_success_ = next; 2755 on_success_ = next;
2772 return set_replacement(this); 2756 return set_replacement(this);
2773 } 2757 }
2774 2758
2775 2759
2776 // We need to check for the following characters: 0x39c 0x3bc 0x178. 2760 // We need to check for the following characters: 0x39c 0x3bc 0x178.
2777 static inline bool RangeContainsLatin1Equivalents(CharacterRange range) { 2761 static inline bool RangeContainsLatin1Equivalents(CharacterRange range) {
2778 // TODO(dcarney): this could be a lot more efficient. 2762 // TODO(dcarney): this could be a lot more efficient.
2779 return range.Contains(0x39c) || 2763 return range.Contains(0x39c) ||
2780 range.Contains(0x3bc) || range.Contains(0x178); 2764 range.Contains(0x3bc) || range.Contains(0x178);
2781 } 2765 }
2782 2766
2783 2767
2784 static bool RangesContainLatin1Equivalents(ZoneList<CharacterRange>* ranges) { 2768 static bool RangesContainLatin1Equivalents(ZoneList<CharacterRange>* ranges) {
2785 for (int i = 0; i < ranges->length(); i++) { 2769 for (int i = 0; i < ranges->length(); i++) {
2786 // TODO(dcarney): this could be a lot more efficient. 2770 // TODO(dcarney): this could be a lot more efficient.
2787 if (RangeContainsLatin1Equivalents(ranges->at(i))) return true; 2771 if (RangeContainsLatin1Equivalents(ranges->at(i))) return true;
2788 } 2772 }
2789 return false; 2773 return false;
2790 } 2774 }
2791 2775
2792 2776
2793 RegExpNode* TextNode::FilterASCII(int depth, bool ignore_case) { 2777 RegExpNode* TextNode::FilterOneByte(int depth, bool ignore_case) {
2794 if (info()->replacement_calculated) return replacement(); 2778 if (info()->replacement_calculated) return replacement();
2795 if (depth < 0) return this; 2779 if (depth < 0) return this;
2796 DCHECK(!info()->visited); 2780 DCHECK(!info()->visited);
2797 VisitMarker marker(info()); 2781 VisitMarker marker(info());
2798 int element_count = elms_->length(); 2782 int element_count = elms_->length();
2799 for (int i = 0; i < element_count; i++) { 2783 for (int i = 0; i < element_count; i++) {
2800 TextElement elm = elms_->at(i); 2784 TextElement elm = elms_->at(i);
2801 if (elm.text_type() == TextElement::ATOM) { 2785 if (elm.text_type() == TextElement::ATOM) {
2802 Vector<const uc16> quarks = elm.atom()->data(); 2786 Vector<const uc16> quarks = elm.atom()->data();
2803 for (int j = 0; j < quarks.length(); j++) { 2787 for (int j = 0; j < quarks.length(); j++) {
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
2837 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue; 2821 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue;
2838 return set_replacement(NULL); 2822 return set_replacement(NULL);
2839 } 2823 }
2840 } 2824 }
2841 } 2825 }
2842 } 2826 }
2843 return FilterSuccessor(depth - 1, ignore_case); 2827 return FilterSuccessor(depth - 1, ignore_case);
2844 } 2828 }
2845 2829
2846 2830
2847 RegExpNode* LoopChoiceNode::FilterASCII(int depth, bool ignore_case) { 2831 RegExpNode* LoopChoiceNode::FilterOneByte(int depth, bool ignore_case) {
2848 if (info()->replacement_calculated) return replacement(); 2832 if (info()->replacement_calculated) return replacement();
2849 if (depth < 0) return this; 2833 if (depth < 0) return this;
2850 if (info()->visited) return this; 2834 if (info()->visited) return this;
2851 { 2835 {
2852 VisitMarker marker(info()); 2836 VisitMarker marker(info());
2853 2837
2854 RegExpNode* continue_replacement = 2838 RegExpNode* continue_replacement =
2855 continue_node_->FilterASCII(depth - 1, ignore_case); 2839 continue_node_->FilterOneByte(depth - 1, ignore_case);
2856 // If we can't continue after the loop then there is no sense in doing the 2840 // If we can't continue after the loop then there is no sense in doing the
2857 // loop. 2841 // loop.
2858 if (continue_replacement == NULL) return set_replacement(NULL); 2842 if (continue_replacement == NULL) return set_replacement(NULL);
2859 } 2843 }
2860 2844
2861 return ChoiceNode::FilterASCII(depth - 1, ignore_case); 2845 return ChoiceNode::FilterOneByte(depth - 1, ignore_case);
2862 } 2846 }
2863 2847
2864 2848
2865 RegExpNode* ChoiceNode::FilterASCII(int depth, bool ignore_case) { 2849 RegExpNode* ChoiceNode::FilterOneByte(int depth, bool ignore_case) {
2866 if (info()->replacement_calculated) return replacement(); 2850 if (info()->replacement_calculated) return replacement();
2867 if (depth < 0) return this; 2851 if (depth < 0) return this;
2868 if (info()->visited) return this; 2852 if (info()->visited) return this;
2869 VisitMarker marker(info()); 2853 VisitMarker marker(info());
2870 int choice_count = alternatives_->length(); 2854 int choice_count = alternatives_->length();
2871 2855
2872 for (int i = 0; i < choice_count; i++) { 2856 for (int i = 0; i < choice_count; i++) {
2873 GuardedAlternative alternative = alternatives_->at(i); 2857 GuardedAlternative alternative = alternatives_->at(i);
2874 if (alternative.guards() != NULL && alternative.guards()->length() != 0) { 2858 if (alternative.guards() != NULL && alternative.guards()->length() != 0) {
2875 set_replacement(this); 2859 set_replacement(this);
2876 return this; 2860 return this;
2877 } 2861 }
2878 } 2862 }
2879 2863
2880 int surviving = 0; 2864 int surviving = 0;
2881 RegExpNode* survivor = NULL; 2865 RegExpNode* survivor = NULL;
2882 for (int i = 0; i < choice_count; i++) { 2866 for (int i = 0; i < choice_count; i++) {
2883 GuardedAlternative alternative = alternatives_->at(i); 2867 GuardedAlternative alternative = alternatives_->at(i);
2884 RegExpNode* replacement = 2868 RegExpNode* replacement =
2885 alternative.node()->FilterASCII(depth - 1, ignore_case); 2869 alternative.node()->FilterOneByte(depth - 1, ignore_case);
2886 DCHECK(replacement != this); // No missing EMPTY_MATCH_CHECK. 2870 DCHECK(replacement != this); // No missing EMPTY_MATCH_CHECK.
2887 if (replacement != NULL) { 2871 if (replacement != NULL) {
2888 alternatives_->at(i).set_node(replacement); 2872 alternatives_->at(i).set_node(replacement);
2889 surviving++; 2873 surviving++;
2890 survivor = replacement; 2874 survivor = replacement;
2891 } 2875 }
2892 } 2876 }
2893 if (surviving < 2) return set_replacement(survivor); 2877 if (surviving < 2) return set_replacement(survivor);
2894 2878
2895 set_replacement(this); 2879 set_replacement(this);
2896 if (surviving == choice_count) { 2880 if (surviving == choice_count) {
2897 return this; 2881 return this;
2898 } 2882 }
2899 // Only some of the nodes survived the filtering. We need to rebuild the 2883 // Only some of the nodes survived the filtering. We need to rebuild the
2900 // alternatives list. 2884 // alternatives list.
2901 ZoneList<GuardedAlternative>* new_alternatives = 2885 ZoneList<GuardedAlternative>* new_alternatives =
2902 new(zone()) ZoneList<GuardedAlternative>(surviving, zone()); 2886 new(zone()) ZoneList<GuardedAlternative>(surviving, zone());
2903 for (int i = 0; i < choice_count; i++) { 2887 for (int i = 0; i < choice_count; i++) {
2904 RegExpNode* replacement = 2888 RegExpNode* replacement =
2905 alternatives_->at(i).node()->FilterASCII(depth - 1, ignore_case); 2889 alternatives_->at(i).node()->FilterOneByte(depth - 1, ignore_case);
2906 if (replacement != NULL) { 2890 if (replacement != NULL) {
2907 alternatives_->at(i).set_node(replacement); 2891 alternatives_->at(i).set_node(replacement);
2908 new_alternatives->Add(alternatives_->at(i), zone()); 2892 new_alternatives->Add(alternatives_->at(i), zone());
2909 } 2893 }
2910 } 2894 }
2911 alternatives_ = new_alternatives; 2895 alternatives_ = new_alternatives;
2912 return this; 2896 return this;
2913 } 2897 }
2914 2898
2915 2899
2916 RegExpNode* NegativeLookaheadChoiceNode::FilterASCII(int depth, 2900 RegExpNode* NegativeLookaheadChoiceNode::FilterOneByte(int depth,
2917 bool ignore_case) { 2901 bool ignore_case) {
2918 if (info()->replacement_calculated) return replacement(); 2902 if (info()->replacement_calculated) return replacement();
2919 if (depth < 0) return this; 2903 if (depth < 0) return this;
2920 if (info()->visited) return this; 2904 if (info()->visited) return this;
2921 VisitMarker marker(info()); 2905 VisitMarker marker(info());
2922 // Alternative 0 is the negative lookahead, alternative 1 is what comes 2906 // Alternative 0 is the negative lookahead, alternative 1 is what comes
2923 // afterwards. 2907 // afterwards.
2924 RegExpNode* node = alternatives_->at(1).node(); 2908 RegExpNode* node = alternatives_->at(1).node();
2925 RegExpNode* replacement = node->FilterASCII(depth - 1, ignore_case); 2909 RegExpNode* replacement = node->FilterOneByte(depth - 1, ignore_case);
2926 if (replacement == NULL) return set_replacement(NULL); 2910 if (replacement == NULL) return set_replacement(NULL);
2927 alternatives_->at(1).set_node(replacement); 2911 alternatives_->at(1).set_node(replacement);
2928 2912
2929 RegExpNode* neg_node = alternatives_->at(0).node(); 2913 RegExpNode* neg_node = alternatives_->at(0).node();
2930 RegExpNode* neg_replacement = neg_node->FilterASCII(depth - 1, ignore_case); 2914 RegExpNode* neg_replacement = neg_node->FilterOneByte(depth - 1, ignore_case);
2931 // If the negative lookahead is always going to fail then 2915 // If the negative lookahead is always going to fail then
2932 // we don't need to check it. 2916 // we don't need to check it.
2933 if (neg_replacement == NULL) return set_replacement(replacement); 2917 if (neg_replacement == NULL) return set_replacement(replacement);
2934 alternatives_->at(0).set_node(neg_replacement); 2918 alternatives_->at(0).set_node(neg_replacement);
2935 return set_replacement(this); 2919 return set_replacement(this);
2936 } 2920 }
2937 2921
2938 2922
2939 void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, 2923 void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
2940 RegExpCompiler* compiler, 2924 RegExpCompiler* compiler,
(...skipping 88 matching lines...) Expand 10 before | Expand all | Expand 10 after
3029 assembler->CheckAtStart(&ok); 3013 assembler->CheckAtStart(&ok);
3030 } 3014 }
3031 // We already checked that we are not at the start of input so it must be 3015 // We already checked that we are not at the start of input so it must be
3032 // OK to load the previous character. 3016 // OK to load the previous character.
3033 assembler->LoadCurrentCharacter(new_trace.cp_offset() -1, 3017 assembler->LoadCurrentCharacter(new_trace.cp_offset() -1,
3034 new_trace.backtrack(), 3018 new_trace.backtrack(),
3035 false); 3019 false);
3036 if (!assembler->CheckSpecialCharacterClass('n', 3020 if (!assembler->CheckSpecialCharacterClass('n',
3037 new_trace.backtrack())) { 3021 new_trace.backtrack())) {
3038 // Newline means \n, \r, 0x2028 or 0x2029. 3022 // Newline means \n, \r, 0x2028 or 0x2029.
3039 if (!compiler->ascii()) { 3023 if (!compiler->one_byte()) {
3040 assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok); 3024 assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok);
3041 } 3025 }
3042 assembler->CheckCharacter('\n', &ok); 3026 assembler->CheckCharacter('\n', &ok);
3043 assembler->CheckNotCharacter('\r', new_trace.backtrack()); 3027 assembler->CheckNotCharacter('\r', new_trace.backtrack());
3044 } 3028 }
3045 assembler->Bind(&ok); 3029 assembler->Bind(&ok);
3046 on_success->Emit(compiler, &new_trace); 3030 on_success->Emit(compiler, &new_trace);
3047 } 3031 }
3048 3032
3049 3033
(...skipping 177 matching lines...) Expand 10 before | Expand all | Expand 10 after
3227 // check can have involved a mask and compare operation which may simplify 3211 // check can have involved a mask and compare operation which may simplify
3228 // or obviate the need for further checks at some character positions. 3212 // or obviate the need for further checks at some character positions.
3229 void TextNode::TextEmitPass(RegExpCompiler* compiler, 3213 void TextNode::TextEmitPass(RegExpCompiler* compiler,
3230 TextEmitPassType pass, 3214 TextEmitPassType pass,
3231 bool preloaded, 3215 bool preloaded,
3232 Trace* trace, 3216 Trace* trace,
3233 bool first_element_checked, 3217 bool first_element_checked,
3234 int* checked_up_to) { 3218 int* checked_up_to) {
3235 RegExpMacroAssembler* assembler = compiler->macro_assembler(); 3219 RegExpMacroAssembler* assembler = compiler->macro_assembler();
3236 Isolate* isolate = assembler->zone()->isolate(); 3220 Isolate* isolate = assembler->zone()->isolate();
3237 bool ascii = compiler->ascii(); 3221 bool one_byte = compiler->one_byte();
3238 Label* backtrack = trace->backtrack(); 3222 Label* backtrack = trace->backtrack();
3239 QuickCheckDetails* quick_check = trace->quick_check_performed(); 3223 QuickCheckDetails* quick_check = trace->quick_check_performed();
3240 int element_count = elms_->length(); 3224 int element_count = elms_->length();
3241 for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) { 3225 for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) {
3242 TextElement elm = elms_->at(i); 3226 TextElement elm = elms_->at(i);
3243 int cp_offset = trace->cp_offset() + elm.cp_offset(); 3227 int cp_offset = trace->cp_offset() + elm.cp_offset();
3244 if (elm.text_type() == TextElement::ATOM) { 3228 if (elm.text_type() == TextElement::ATOM) {
3245 Vector<const uc16> quarks = elm.atom()->data(); 3229 Vector<const uc16> quarks = elm.atom()->data();
3246 for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) { 3230 for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) {
3247 if (first_element_checked && i == 0 && j == 0) continue; 3231 if (first_element_checked && i == 0 && j == 0) continue;
3248 if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue; 3232 if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue;
3249 EmitCharacterFunction* emit_function = NULL; 3233 EmitCharacterFunction* emit_function = NULL;
3250 switch (pass) { 3234 switch (pass) {
3251 case NON_ASCII_MATCH: 3235 case NON_LATIN1_MATCH:
3252 DCHECK(ascii); 3236 DCHECK(one_byte);
3253 if (quarks[j] > String::kMaxOneByteCharCode) { 3237 if (quarks[j] > String::kMaxOneByteCharCode) {
3254 assembler->GoTo(backtrack); 3238 assembler->GoTo(backtrack);
3255 return; 3239 return;
3256 } 3240 }
3257 break; 3241 break;
3258 case NON_LETTER_CHARACTER_MATCH: 3242 case NON_LETTER_CHARACTER_MATCH:
3259 emit_function = &EmitAtomNonLetter; 3243 emit_function = &EmitAtomNonLetter;
3260 break; 3244 break;
3261 case SIMPLE_CHARACTER_MATCH: 3245 case SIMPLE_CHARACTER_MATCH:
3262 emit_function = &EmitSimpleCharacter; 3246 emit_function = &EmitSimpleCharacter;
(...skipping 14 matching lines...) Expand all
3277 preloaded); 3261 preloaded);
3278 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to); 3262 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to);
3279 } 3263 }
3280 } 3264 }
3281 } else { 3265 } else {
3282 DCHECK_EQ(TextElement::CHAR_CLASS, elm.text_type()); 3266 DCHECK_EQ(TextElement::CHAR_CLASS, elm.text_type());
3283 if (pass == CHARACTER_CLASS_MATCH) { 3267 if (pass == CHARACTER_CLASS_MATCH) {
3284 if (first_element_checked && i == 0) continue; 3268 if (first_element_checked && i == 0) continue;
3285 if (DeterminedAlready(quick_check, elm.cp_offset())) continue; 3269 if (DeterminedAlready(quick_check, elm.cp_offset())) continue;
3286 RegExpCharacterClass* cc = elm.char_class(); 3270 RegExpCharacterClass* cc = elm.char_class();
3287 EmitCharClass(assembler, 3271 EmitCharClass(assembler, cc, one_byte, backtrack, cp_offset,
3288 cc, 3272 *checked_up_to < cp_offset, preloaded, zone());
3289 ascii,
3290 backtrack,
3291 cp_offset,
3292 *checked_up_to < cp_offset,
3293 preloaded,
3294 zone());
3295 UpdateBoundsCheck(cp_offset, checked_up_to); 3273 UpdateBoundsCheck(cp_offset, checked_up_to);
3296 } 3274 }
3297 } 3275 }
3298 } 3276 }
3299 } 3277 }
3300 3278
3301 3279
3302 int TextNode::Length() { 3280 int TextNode::Length() {
3303 TextElement elm = elms_->last(); 3281 TextElement elm = elms_->last();
3304 DCHECK(elm.cp_offset() >= 0); 3282 DCHECK(elm.cp_offset() >= 0);
(...skipping 20 matching lines...) Expand all
3325 void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) { 3303 void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) {
3326 LimitResult limit_result = LimitVersions(compiler, trace); 3304 LimitResult limit_result = LimitVersions(compiler, trace);
3327 if (limit_result == DONE) return; 3305 if (limit_result == DONE) return;
3328 DCHECK(limit_result == CONTINUE); 3306 DCHECK(limit_result == CONTINUE);
3329 3307
3330 if (trace->cp_offset() + Length() > RegExpMacroAssembler::kMaxCPOffset) { 3308 if (trace->cp_offset() + Length() > RegExpMacroAssembler::kMaxCPOffset) {
3331 compiler->SetRegExpTooBig(); 3309 compiler->SetRegExpTooBig();
3332 return; 3310 return;
3333 } 3311 }
3334 3312
3335 if (compiler->ascii()) { 3313 if (compiler->one_byte()) {
3336 int dummy = 0; 3314 int dummy = 0;
3337 TextEmitPass(compiler, NON_ASCII_MATCH, false, trace, false, &dummy); 3315 TextEmitPass(compiler, NON_LATIN1_MATCH, false, trace, false, &dummy);
3338 } 3316 }
3339 3317
3340 bool first_elt_done = false; 3318 bool first_elt_done = false;
3341 int bound_checked_to = trace->cp_offset() - 1; 3319 int bound_checked_to = trace->cp_offset() - 1;
3342 bound_checked_to += trace->bound_checked_up_to(); 3320 bound_checked_to += trace->bound_checked_up_to();
3343 3321
3344 // If a character is preloaded into the current character register then 3322 // If a character is preloaded into the current character register then
3345 // check that now. 3323 // check that now.
3346 if (trace->characters_preloaded() == 1) { 3324 if (trace->characters_preloaded() == 1) {
3347 for (int pass = kFirstRealPass; pass <= kLastPass; pass++) { 3325 for (int pass = kFirstRealPass; pass <= kLastPass; pass++) {
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after
3383 3361
3384 void Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler) { 3362 void Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler) {
3385 DCHECK(by > 0); 3363 DCHECK(by > 0);
3386 // We don't have an instruction for shifting the current character register 3364 // We don't have an instruction for shifting the current character register
3387 // down or for using a shifted value for anything so lets just forget that 3365 // down or for using a shifted value for anything so lets just forget that
3388 // we preloaded any characters into it. 3366 // we preloaded any characters into it.
3389 characters_preloaded_ = 0; 3367 characters_preloaded_ = 0;
3390 // Adjust the offsets of the quick check performed information. This 3368 // Adjust the offsets of the quick check performed information. This
3391 // information is used to find out what we already determined about the 3369 // information is used to find out what we already determined about the
3392 // characters by means of mask and compare. 3370 // characters by means of mask and compare.
3393 quick_check_performed_.Advance(by, compiler->ascii()); 3371 quick_check_performed_.Advance(by, compiler->one_byte());
3394 cp_offset_ += by; 3372 cp_offset_ += by;
3395 if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) { 3373 if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) {
3396 compiler->SetRegExpTooBig(); 3374 compiler->SetRegExpTooBig();
3397 cp_offset_ = 0; 3375 cp_offset_ = 0;
3398 } 3376 }
3399 bound_checked_up_to_ = Max(0, bound_checked_up_to_ - by); 3377 bound_checked_up_to_ = Max(0, bound_checked_up_to_ - by);
3400 } 3378 }
3401 3379
3402 3380
3403 void TextNode::MakeCaseIndependent(bool is_ascii) { 3381 void TextNode::MakeCaseIndependent(bool is_one_byte) {
3404 int element_count = elms_->length(); 3382 int element_count = elms_->length();
3405 for (int i = 0; i < element_count; i++) { 3383 for (int i = 0; i < element_count; i++) {
3406 TextElement elm = elms_->at(i); 3384 TextElement elm = elms_->at(i);
3407 if (elm.text_type() == TextElement::CHAR_CLASS) { 3385 if (elm.text_type() == TextElement::CHAR_CLASS) {
3408 RegExpCharacterClass* cc = elm.char_class(); 3386 RegExpCharacterClass* cc = elm.char_class();
3409 // None of the standard character classes is different in the case 3387 // None of the standard character classes is different in the case
3410 // independent case and it slows us down if we don't know that. 3388 // independent case and it slows us down if we don't know that.
3411 if (cc->is_standard(zone())) continue; 3389 if (cc->is_standard(zone())) continue;
3412 ZoneList<CharacterRange>* ranges = cc->ranges(zone()); 3390 ZoneList<CharacterRange>* ranges = cc->ranges(zone());
3413 int range_count = ranges->length(); 3391 int range_count = ranges->length();
3414 for (int j = 0; j < range_count; j++) { 3392 for (int j = 0; j < range_count; j++) {
3415 ranges->at(j).AddCaseEquivalents(ranges, is_ascii, zone()); 3393 ranges->at(j).AddCaseEquivalents(ranges, is_one_byte, zone());
3416 } 3394 }
3417 } 3395 }
3418 } 3396 }
3419 } 3397 }
3420 3398
3421 3399
3422 int TextNode::GreedyLoopTextLength() { 3400 int TextNode::GreedyLoopTextLength() {
3423 TextElement elm = elms_->at(elms_->length() - 1); 3401 TextElement elm = elms_->at(elms_->length() - 1);
3424 return elm.cp_offset() + elm.length(); 3402 return elm.cp_offset() + elm.length();
3425 } 3403 }
3426 3404
3427 3405
3428 RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode( 3406 RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode(
3429 RegExpCompiler* compiler) { 3407 RegExpCompiler* compiler) {
3430 if (elms_->length() != 1) return NULL; 3408 if (elms_->length() != 1) return NULL;
3431 TextElement elm = elms_->at(0); 3409 TextElement elm = elms_->at(0);
3432 if (elm.text_type() != TextElement::CHAR_CLASS) return NULL; 3410 if (elm.text_type() != TextElement::CHAR_CLASS) return NULL;
3433 RegExpCharacterClass* node = elm.char_class(); 3411 RegExpCharacterClass* node = elm.char_class();
3434 ZoneList<CharacterRange>* ranges = node->ranges(zone()); 3412 ZoneList<CharacterRange>* ranges = node->ranges(zone());
3435 if (!CharacterRange::IsCanonical(ranges)) { 3413 if (!CharacterRange::IsCanonical(ranges)) {
3436 CharacterRange::Canonicalize(ranges); 3414 CharacterRange::Canonicalize(ranges);
3437 } 3415 }
3438 if (node->is_negated()) { 3416 if (node->is_negated()) {
3439 return ranges->length() == 0 ? on_success() : NULL; 3417 return ranges->length() == 0 ? on_success() : NULL;
3440 } 3418 }
3441 if (ranges->length() != 1) return NULL; 3419 if (ranges->length() != 1) return NULL;
3442 uint32_t max_char; 3420 uint32_t max_char;
3443 if (compiler->ascii()) { 3421 if (compiler->one_byte()) {
3444 max_char = String::kMaxOneByteCharCode; 3422 max_char = String::kMaxOneByteCharCode;
3445 } else { 3423 } else {
3446 max_char = String::kMaxUtf16CodeUnit; 3424 max_char = String::kMaxUtf16CodeUnit;
3447 } 3425 }
3448 return ranges->at(0).IsEverything(max_char) ? on_success() : NULL; 3426 return ranges->at(0).IsEverything(max_char) ? on_success() : NULL;
3449 } 3427 }
3450 3428
3451 3429
3452 // Finds the fixed match length of a sequence of nodes that goes from 3430 // Finds the fixed match length of a sequence of nodes that goes from
3453 // this alternative and back to this choice node. If there are variable 3431 // this alternative and back to this choice node. If there are variable
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after
3510 return; 3488 return;
3511 } 3489 }
3512 ChoiceNode::Emit(compiler, trace); 3490 ChoiceNode::Emit(compiler, trace);
3513 } 3491 }
3514 3492
3515 3493
3516 int ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler, 3494 int ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler,
3517 int eats_at_least) { 3495 int eats_at_least) {
3518 int preload_characters = Min(4, eats_at_least); 3496 int preload_characters = Min(4, eats_at_least);
3519 if (compiler->macro_assembler()->CanReadUnaligned()) { 3497 if (compiler->macro_assembler()->CanReadUnaligned()) {
3520 bool ascii = compiler->ascii(); 3498 bool one_byte = compiler->one_byte();
3521 if (ascii) { 3499 if (one_byte) {
3522 if (preload_characters > 4) preload_characters = 4; 3500 if (preload_characters > 4) preload_characters = 4;
3523 // We can't preload 3 characters because there is no machine instruction 3501 // We can't preload 3 characters because there is no machine instruction
3524 // to do that. We can't just load 4 because we could be reading 3502 // to do that. We can't just load 4 because we could be reading
3525 // beyond the end of the string, which could cause a memory fault. 3503 // beyond the end of the string, which could cause a memory fault.
3526 if (preload_characters == 3) preload_characters = 2; 3504 if (preload_characters == 3) preload_characters = 2;
3527 } else { 3505 } else {
3528 if (preload_characters > 2) preload_characters = 2; 3506 if (preload_characters > 2) preload_characters = 2;
3529 } 3507 }
3530 } else { 3508 } else {
3531 if (preload_characters > 1) preload_characters = 1; 3509 if (preload_characters > 1) preload_characters = 1;
(...skipping 105 matching lines...) Expand 10 before | Expand all | Expand 10 after
3637 map_count_ = kMapSize; 3615 map_count_ = kMapSize;
3638 for (int i = 0; i < kMapSize; i++) map_->at(i) = true; 3616 for (int i = 0; i < kMapSize; i++) map_->at(i) = true;
3639 } 3617 }
3640 } 3618 }
3641 3619
3642 3620
3643 BoyerMooreLookahead::BoyerMooreLookahead( 3621 BoyerMooreLookahead::BoyerMooreLookahead(
3644 int length, RegExpCompiler* compiler, Zone* zone) 3622 int length, RegExpCompiler* compiler, Zone* zone)
3645 : length_(length), 3623 : length_(length),
3646 compiler_(compiler) { 3624 compiler_(compiler) {
3647 if (compiler->ascii()) { 3625 if (compiler->one_byte()) {
3648 max_char_ = String::kMaxOneByteCharCode; 3626 max_char_ = String::kMaxOneByteCharCode;
3649 } else { 3627 } else {
3650 max_char_ = String::kMaxUtf16CodeUnit; 3628 max_char_ = String::kMaxUtf16CodeUnit;
3651 } 3629 }
3652 bitmaps_ = new(zone) ZoneList<BoyerMoorePositionInfo*>(length, zone); 3630 bitmaps_ = new(zone) ZoneList<BoyerMoorePositionInfo*>(length, zone);
3653 for (int i = 0; i < length; i++) { 3631 for (int i = 0; i < length; i++) {
3654 bitmaps_->Add(new(zone) BoyerMoorePositionInfo(zone), zone); 3632 bitmaps_->Add(new(zone) BoyerMoorePositionInfo(zone), zone);
3655 } 3633 }
3656 } 3634 }
3657 3635
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after
3705 // can theoretically be up to 2*kSize though we treat it mostly as 3683 // can theoretically be up to 2*kSize though we treat it mostly as
3706 // a fraction of kSize. 3684 // a fraction of kSize.
3707 frequency += compiler_->frequency_collator()->Frequency(j) + 1; 3685 frequency += compiler_->frequency_collator()->Frequency(j) + 1;
3708 } 3686 }
3709 } 3687 }
3710 // We use the probability of skipping times the distance we are skipping to 3688 // We use the probability of skipping times the distance we are skipping to
3711 // judge the effectiveness of this. Actually we have a cut-off: By 3689 // judge the effectiveness of this. Actually we have a cut-off: By
3712 // dividing by 2 we switch off the skipping if the probability of skipping 3690 // dividing by 2 we switch off the skipping if the probability of skipping
3713 // is less than 50%. This is because the multibyte mask-and-compare 3691 // is less than 50%. This is because the multibyte mask-and-compare
3714 // skipping in quickcheck is more likely to do well on this case. 3692 // skipping in quickcheck is more likely to do well on this case.
3715 bool in_quickcheck_range = ((i - remembered_from < 4) || 3693 bool in_quickcheck_range =
3716 (compiler_->ascii() ? remembered_from <= 4 : remembered_from <= 2)); 3694 ((i - remembered_from < 4) ||
3695 (compiler_->one_byte() ? remembered_from <= 4 : remembered_from <= 2));
3717 // Called 'probability' but it is only a rough estimate and can actually 3696 // Called 'probability' but it is only a rough estimate and can actually
3718 // be outside the 0-kSize range. 3697 // be outside the 0-kSize range.
3719 int probability = (in_quickcheck_range ? kSize / 2 : kSize) - frequency; 3698 int probability = (in_quickcheck_range ? kSize / 2 : kSize) - frequency;
3720 int points = (i - remembered_from) * probability; 3699 int points = (i - remembered_from) * probability;
3721 if (points > biggest_points) { 3700 if (points > biggest_points) {
3722 *from = remembered_from; 3701 *from = remembered_from;
3723 *to = i - 1; 3702 *to = i - 1;
3724 biggest_points = points; 3703 biggest_points = points;
3725 } 3704 }
3726 } 3705 }
(...skipping 197 matching lines...) Expand 10 before | Expand all | Expand 10 after
3924 #endif 3903 #endif
3925 } 3904 }
3926 3905
3927 3906
3928 void ChoiceNode::SetUpPreLoad(RegExpCompiler* compiler, 3907 void ChoiceNode::SetUpPreLoad(RegExpCompiler* compiler,
3929 Trace* current_trace, 3908 Trace* current_trace,
3930 PreloadState* state) { 3909 PreloadState* state) {
3931 if (state->eats_at_least_ == PreloadState::kEatsAtLeastNotYetInitialized) { 3910 if (state->eats_at_least_ == PreloadState::kEatsAtLeastNotYetInitialized) {
3932 // Save some time by looking at most one machine word ahead. 3911 // Save some time by looking at most one machine word ahead.
3933 state->eats_at_least_ = 3912 state->eats_at_least_ =
3934 EatsAtLeast(compiler->ascii() ? 4 : 2, 3913 EatsAtLeast(compiler->one_byte() ? 4 : 2, kRecursionBudget,
3935 kRecursionBudget,
3936 current_trace->at_start() == Trace::FALSE_VALUE); 3914 current_trace->at_start() == Trace::FALSE_VALUE);
3937 } 3915 }
3938 state->preload_characters_ = 3916 state->preload_characters_ =
3939 CalculatePreloadCharacters(compiler, state->eats_at_least_); 3917 CalculatePreloadCharacters(compiler, state->eats_at_least_);
3940 3918
3941 state->preload_is_current_ = 3919 state->preload_is_current_ =
3942 (current_trace->characters_preloaded() == state->preload_characters_); 3920 (current_trace->characters_preloaded() == state->preload_characters_);
3943 state->preload_has_checked_bounds_ = state->preload_is_current_; 3921 state->preload_has_checked_bounds_ = state->preload_is_current_;
3944 } 3922 }
3945 3923
(...skipping 1394 matching lines...) Expand 10 before | Expand all | Expand 10 after
5340 for (int i = 0; i < overlay.length(); i += 2) { 5318 for (int i = 0; i < overlay.length(); i += 2) {
5341 table.AddRange(CharacterRange(overlay[i], overlay[i + 1] - 1), 5319 table.AddRange(CharacterRange(overlay[i], overlay[i + 1] - 1),
5342 CharacterRangeSplitter::kInOverlay, zone); 5320 CharacterRangeSplitter::kInOverlay, zone);
5343 } 5321 }
5344 CharacterRangeSplitter callback(included, excluded, zone); 5322 CharacterRangeSplitter callback(included, excluded, zone);
5345 table.ForEach(&callback); 5323 table.ForEach(&callback);
5346 } 5324 }
5347 5325
5348 5326
5349 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges, 5327 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges,
5350 bool is_ascii, 5328 bool is_one_byte, Zone* zone) {
5351 Zone* zone) {
5352 Isolate* isolate = zone->isolate(); 5329 Isolate* isolate = zone->isolate();
5353 uc16 bottom = from(); 5330 uc16 bottom = from();
5354 uc16 top = to(); 5331 uc16 top = to();
5355 if (is_ascii && !RangeContainsLatin1Equivalents(*this)) { 5332 if (is_one_byte && !RangeContainsLatin1Equivalents(*this)) {
5356 if (bottom > String::kMaxOneByteCharCode) return; 5333 if (bottom > String::kMaxOneByteCharCode) return;
5357 if (top > String::kMaxOneByteCharCode) top = String::kMaxOneByteCharCode; 5334 if (top > String::kMaxOneByteCharCode) top = String::kMaxOneByteCharCode;
5358 } 5335 }
5359 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 5336 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
5360 if (top == bottom) { 5337 if (top == bottom) {
5361 // If this is a singleton we just expand the one character. 5338 // If this is a singleton we just expand the one character.
5362 int length = isolate->jsregexp_uncanonicalize()->get(bottom, '\0', chars); 5339 int length = isolate->jsregexp_uncanonicalize()->get(bottom, '\0', chars);
5363 for (int i = 0; i < length; i++) { 5340 for (int i = 0; i < length; i++) {
5364 uc32 chr = chars[i]; 5341 uc32 chr = chars[i];
5365 if (chr != bottom) { 5342 if (chr != bottom) {
(...skipping 398 matching lines...) Expand 10 before | Expand all | Expand 10 after
5764 for (int i = 0; i < element_count; i++) { 5741 for (int i = 0; i < element_count; i++) {
5765 TextElement& elm = elements()->at(i); 5742 TextElement& elm = elements()->at(i);
5766 elm.set_cp_offset(cp_offset); 5743 elm.set_cp_offset(cp_offset);
5767 cp_offset += elm.length(); 5744 cp_offset += elm.length();
5768 } 5745 }
5769 } 5746 }
5770 5747
5771 5748
5772 void Analysis::VisitText(TextNode* that) { 5749 void Analysis::VisitText(TextNode* that) {
5773 if (ignore_case_) { 5750 if (ignore_case_) {
5774 that->MakeCaseIndependent(is_ascii_); 5751 that->MakeCaseIndependent(is_one_byte_);
5775 } 5752 }
5776 EnsureAnalyzed(that->on_success()); 5753 EnsureAnalyzed(that->on_success());
5777 if (!has_failed()) { 5754 if (!has_failed()) {
5778 that->CalculateOffsets(); 5755 that->CalculateOffsets();
5779 } 5756 }
5780 } 5757 }
5781 5758
5782 5759
5783 void Analysis::VisitAction(ActionNode* that) { 5760 void Analysis::VisitAction(ActionNode* that) {
5784 RegExpNode* target = that->on_success(); 5761 RegExpNode* target = that->on_success();
(...skipping 255 matching lines...) Expand 10 before | Expand all | Expand 10 after
6040 } 6017 }
6041 6018
6042 6019
6043 void DispatchTableConstructor::VisitAction(ActionNode* that) { 6020 void DispatchTableConstructor::VisitAction(ActionNode* that) {
6044 RegExpNode* target = that->on_success(); 6021 RegExpNode* target = that->on_success();
6045 target->Accept(this); 6022 target->Accept(this);
6046 } 6023 }
6047 6024
6048 6025
6049 RegExpEngine::CompilationResult RegExpEngine::Compile( 6026 RegExpEngine::CompilationResult RegExpEngine::Compile(
6050 RegExpCompileData* data, 6027 RegExpCompileData* data, bool ignore_case, bool is_global,
6051 bool ignore_case, 6028 bool is_multiline, Handle<String> pattern, Handle<String> sample_subject,
6052 bool is_global, 6029 bool is_one_byte, Zone* zone) {
6053 bool is_multiline,
6054 Handle<String> pattern,
6055 Handle<String> sample_subject,
6056 bool is_ascii,
6057 Zone* zone) {
6058 if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) { 6030 if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) {
6059 return IrregexpRegExpTooBig(zone->isolate()); 6031 return IrregexpRegExpTooBig(zone->isolate());
6060 } 6032 }
6061 RegExpCompiler compiler(data->capture_count, ignore_case, is_ascii, zone); 6033 RegExpCompiler compiler(data->capture_count, ignore_case, is_one_byte, zone);
6062 6034
6063 // Sample some characters from the middle of the string. 6035 // Sample some characters from the middle of the string.
6064 static const int kSampleSize = 128; 6036 static const int kSampleSize = 128;
6065 6037
6066 sample_subject = String::Flatten(sample_subject); 6038 sample_subject = String::Flatten(sample_subject);
6067 int chars_sampled = 0; 6039 int chars_sampled = 0;
6068 int half_way = (sample_subject->length() - kSampleSize) / 2; 6040 int half_way = (sample_subject->length() - kSampleSize) / 2;
6069 for (int i = Max(0, half_way); 6041 for (int i = Max(0, half_way);
6070 i < sample_subject->length() && chars_sampled < kSampleSize; 6042 i < sample_subject->length() && chars_sampled < kSampleSize;
6071 i++, chars_sampled++) { 6043 i++, chars_sampled++) {
(...skipping 26 matching lines...) Expand all
6098 // at the start of input. 6070 // at the start of input.
6099 ChoiceNode* first_step_node = new(zone) ChoiceNode(2, zone); 6071 ChoiceNode* first_step_node = new(zone) ChoiceNode(2, zone);
6100 first_step_node->AddAlternative(GuardedAlternative(captured_body)); 6072 first_step_node->AddAlternative(GuardedAlternative(captured_body));
6101 first_step_node->AddAlternative(GuardedAlternative( 6073 first_step_node->AddAlternative(GuardedAlternative(
6102 new(zone) TextNode(new(zone) RegExpCharacterClass('*'), loop_node))); 6074 new(zone) TextNode(new(zone) RegExpCharacterClass('*'), loop_node)));
6103 node = first_step_node; 6075 node = first_step_node;
6104 } else { 6076 } else {
6105 node = loop_node; 6077 node = loop_node;
6106 } 6078 }
6107 } 6079 }
6108 if (is_ascii) { 6080 if (is_one_byte) {
6109 node = node->FilterASCII(RegExpCompiler::kMaxRecursion, ignore_case); 6081 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case);
6110 // Do it again to propagate the new nodes to places where they were not 6082 // Do it again to propagate the new nodes to places where they were not
6111 // put because they had not been calculated yet. 6083 // put because they had not been calculated yet.
6112 if (node != NULL) { 6084 if (node != NULL) {
6113 node = node->FilterASCII(RegExpCompiler::kMaxRecursion, ignore_case); 6085 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case);
6114 } 6086 }
6115 } 6087 }
6116 6088
6117 if (node == NULL) node = new(zone) EndNode(EndNode::BACKTRACK, zone); 6089 if (node == NULL) node = new(zone) EndNode(EndNode::BACKTRACK, zone);
6118 data->node = node; 6090 data->node = node;
6119 Analysis analysis(ignore_case, is_ascii); 6091 Analysis analysis(ignore_case, is_one_byte);
6120 analysis.EnsureAnalyzed(node); 6092 analysis.EnsureAnalyzed(node);
6121 if (analysis.has_failed()) { 6093 if (analysis.has_failed()) {
6122 const char* error_message = analysis.error_message(); 6094 const char* error_message = analysis.error_message();
6123 return CompilationResult(zone->isolate(), error_message); 6095 return CompilationResult(zone->isolate(), error_message);
6124 } 6096 }
6125 6097
6126 // Create the correct assembler for the architecture. 6098 // Create the correct assembler for the architecture.
6127 #ifndef V8_INTERPRETED_REGEXP 6099 #ifndef V8_INTERPRETED_REGEXP
6128 // Native regexp implementation. 6100 // Native regexp implementation.
6129 6101
6130 NativeRegExpMacroAssembler::Mode mode = 6102 NativeRegExpMacroAssembler::Mode mode =
6131 is_ascii ? NativeRegExpMacroAssembler::ASCII 6103 is_one_byte ? NativeRegExpMacroAssembler::LATIN1
6132 : NativeRegExpMacroAssembler::UC16; 6104 : NativeRegExpMacroAssembler::UC16;
6133 6105
6134 #if V8_TARGET_ARCH_IA32 6106 #if V8_TARGET_ARCH_IA32
6135 RegExpMacroAssemblerIA32 macro_assembler(mode, (data->capture_count + 1) * 2, 6107 RegExpMacroAssemblerIA32 macro_assembler(mode, (data->capture_count + 1) * 2,
6136 zone); 6108 zone);
6137 #elif V8_TARGET_ARCH_X64 6109 #elif V8_TARGET_ARCH_X64
6138 RegExpMacroAssemblerX64 macro_assembler(mode, (data->capture_count + 1) * 2, 6110 RegExpMacroAssemblerX64 macro_assembler(mode, (data->capture_count + 1) * 2,
6139 zone); 6111 zone);
6140 #elif V8_TARGET_ARCH_ARM 6112 #elif V8_TARGET_ARCH_ARM
6141 RegExpMacroAssemblerARM macro_assembler(mode, (data->capture_count + 1) * 2, 6113 RegExpMacroAssemblerARM macro_assembler(mode, (data->capture_count + 1) * 2,
6142 zone); 6114 zone);
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
6179 } 6151 }
6180 6152
6181 return compiler.Assemble(&macro_assembler, 6153 return compiler.Assemble(&macro_assembler,
6182 node, 6154 node,
6183 data->capture_count, 6155 data->capture_count,
6184 pattern); 6156 pattern);
6185 } 6157 }
6186 6158
6187 6159
6188 }} // namespace v8::internal 6160 }} // namespace v8::internal
OLDNEW
« no previous file with comments | « src/jsregexp.h ('k') | src/liveedit.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698