Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(5)

Side by Side Diff: src/ia32/regexp-macro-assembler-ia32.cc

Issue 507051: Attempt to make \b\w+ faster. Slight performance increase on, e.g., string unpacking. (Closed)
Patch Set: Addressed review comments. Created 10 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/ia32/regexp-macro-assembler-ia32.h ('k') | src/jsregexp.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2008-2009 the V8 project authors. All rights reserved. 1 // Copyright 2008-2009 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 453 matching lines...) Expand 10 before | Expand all | Expand 10 after
464 Label* on_not_equal) { 464 Label* on_not_equal) {
465 ASSERT(minus < String::kMaxUC16CharCode); 465 ASSERT(minus < String::kMaxUC16CharCode);
466 __ lea(eax, Operand(current_character(), -minus)); 466 __ lea(eax, Operand(current_character(), -minus));
467 __ and_(eax, mask); 467 __ and_(eax, mask);
468 __ cmp(eax, c); 468 __ cmp(eax, c);
469 BranchOrBacktrack(not_equal, on_not_equal); 469 BranchOrBacktrack(not_equal, on_not_equal);
470 } 470 }
471 471
472 472
473 bool RegExpMacroAssemblerIA32::CheckSpecialCharacterClass(uc16 type, 473 bool RegExpMacroAssemblerIA32::CheckSpecialCharacterClass(uc16 type,
474 int cp_offset,
475 bool check_offset,
476 Label* on_no_match) { 474 Label* on_no_match) {
477 // Range checks (c in min..max) are generally implemented by an unsigned 475 // Range checks (c in min..max) are generally implemented by an unsigned
478 // (c - min) <= (max - min) check 476 // (c - min) <= (max - min) check
479 switch (type) { 477 switch (type) {
480 case 's': 478 case 's':
481 // Match space-characters 479 // Match space-characters
482 if (mode_ == ASCII) { 480 if (mode_ == ASCII) {
483 // ASCII space characters are '\t'..'\r' and ' '. 481 // ASCII space characters are '\t'..'\r' and ' '.
484 if (check_offset) {
485 LoadCurrentCharacter(cp_offset, on_no_match);
486 } else {
487 LoadCurrentCharacterUnchecked(cp_offset, 1);
488 }
489 Label success; 482 Label success;
490 __ cmp(current_character(), ' '); 483 __ cmp(current_character(), ' ');
491 __ j(equal, &success); 484 __ j(equal, &success);
492 // Check range 0x09..0x0d 485 // Check range 0x09..0x0d
493 __ sub(Operand(current_character()), Immediate('\t')); 486 __ lea(eax, Operand(current_character(), -'\t'));
494 __ cmp(current_character(), '\r' - '\t'); 487 __ cmp(eax, '\r' - '\t');
495 BranchOrBacktrack(above, on_no_match); 488 BranchOrBacktrack(above, on_no_match);
496 __ bind(&success); 489 __ bind(&success);
497 return true; 490 return true;
498 } 491 }
499 return false; 492 return false;
500 case 'S': 493 case 'S':
501 // Match non-space characters. 494 // Match non-space characters.
502 if (check_offset) {
503 LoadCurrentCharacter(cp_offset, on_no_match, 1);
504 } else {
505 LoadCurrentCharacterUnchecked(cp_offset, 1);
506 }
507 if (mode_ == ASCII) { 495 if (mode_ == ASCII) {
508 // ASCII space characters are '\t'..'\r' and ' '. 496 // ASCII space characters are '\t'..'\r' and ' '.
509 __ cmp(current_character(), ' '); 497 __ cmp(current_character(), ' ');
510 BranchOrBacktrack(equal, on_no_match); 498 BranchOrBacktrack(equal, on_no_match);
511 __ sub(Operand(current_character()), Immediate('\t')); 499 __ lea(eax, Operand(current_character(), -'\t'));
512 __ cmp(current_character(), '\r' - '\t'); 500 __ cmp(eax, '\r' - '\t');
513 BranchOrBacktrack(below_equal, on_no_match); 501 BranchOrBacktrack(below_equal, on_no_match);
514 return true; 502 return true;
515 } 503 }
516 return false; 504 return false;
517 case 'd': 505 case 'd':
518 // Match ASCII digits ('0'..'9') 506 // Match ASCII digits ('0'..'9')
519 if (check_offset) { 507 __ lea(eax, Operand(current_character(), -'0'));
520 LoadCurrentCharacter(cp_offset, on_no_match, 1); 508 __ cmp(eax, '9' - '0');
521 } else {
522 LoadCurrentCharacterUnchecked(cp_offset, 1);
523 }
524 __ sub(Operand(current_character()), Immediate('0'));
525 __ cmp(current_character(), '9' - '0');
526 BranchOrBacktrack(above, on_no_match); 509 BranchOrBacktrack(above, on_no_match);
527 return true; 510 return true;
528 case 'D': 511 case 'D':
529 // Match non ASCII-digits 512 // Match non ASCII-digits
530 if (check_offset) { 513 __ lea(eax, Operand(current_character(), -'0'));
531 LoadCurrentCharacter(cp_offset, on_no_match, 1); 514 __ cmp(eax, '9' - '0');
532 } else {
533 LoadCurrentCharacterUnchecked(cp_offset, 1);
534 }
535 __ sub(Operand(current_character()), Immediate('0'));
536 __ cmp(current_character(), '9' - '0');
537 BranchOrBacktrack(below_equal, on_no_match); 515 BranchOrBacktrack(below_equal, on_no_match);
538 return true; 516 return true;
539 case '.': { 517 case '.': {
540 // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029) 518 // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
541 if (check_offset) { 519 __ mov(Operand(eax), current_character());
542 LoadCurrentCharacter(cp_offset, on_no_match, 1); 520 __ xor_(Operand(eax), Immediate(0x01));
543 } else {
544 LoadCurrentCharacterUnchecked(cp_offset, 1);
545 }
546 __ xor_(Operand(current_character()), Immediate(0x01));
547 // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c 521 // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c
548 __ sub(Operand(current_character()), Immediate(0x0b)); 522 __ sub(Operand(eax), Immediate(0x0b));
549 __ cmp(current_character(), 0x0c - 0x0b); 523 __ cmp(eax, 0x0c - 0x0b);
550 BranchOrBacktrack(below_equal, on_no_match); 524 BranchOrBacktrack(below_equal, on_no_match);
551 if (mode_ == UC16) { 525 if (mode_ == UC16) {
552 // Compare original value to 0x2028 and 0x2029, using the already 526 // Compare original value to 0x2028 and 0x2029, using the already
553 // computed (current_char ^ 0x01 - 0x0b). I.e., check for 527 // computed (current_char ^ 0x01 - 0x0b). I.e., check for
554 // 0x201d (0x2028 - 0x0b) or 0x201e. 528 // 0x201d (0x2028 - 0x0b) or 0x201e.
555 __ sub(Operand(current_character()), Immediate(0x2028 - 0x0b)); 529 __ sub(Operand(eax), Immediate(0x2028 - 0x0b));
556 __ cmp(current_character(), 1); 530 __ cmp(eax, 0x2029 - 0x2028);
557 BranchOrBacktrack(below_equal, on_no_match); 531 BranchOrBacktrack(below_equal, on_no_match);
558 } 532 }
559 return true; 533 return true;
560 } 534 }
535 case 'w': {
536 Label done, check_digits;
537 __ cmp(Operand(current_character()), Immediate('9'));
538 __ j(less_equal, &check_digits);
539 __ cmp(Operand(current_character()), Immediate('_'));
540 __ j(equal, &done);
541 // Convert to lower case if letter.
542 __ mov(Operand(eax), current_character());
543 __ or_(eax, 0x20);
544 // check current character in range ['a'..'z'], nondestructively.
545 __ sub(Operand(eax), Immediate('a'));
546 __ cmp(Operand(eax), Immediate('z' - 'a'));
547 BranchOrBacktrack(above, on_no_match);
548 __ jmp(&done);
549 __ bind(&check_digits);
550 // Check current character in range ['0'..'9'].
551 __ cmp(Operand(current_character()), Immediate('0'));
552 BranchOrBacktrack(below, on_no_match);
553 __ bind(&done);
554
555 return true;
556 }
557 case 'W': {
558 Label done, check_digits;
559 __ cmp(Operand(current_character()), Immediate('9'));
560 __ j(less_equal, &check_digits);
561 __ cmp(Operand(current_character()), Immediate('_'));
562 BranchOrBacktrack(equal, on_no_match);
563 // Convert to lower case if letter.
564 __ mov(Operand(eax), current_character());
565 __ or_(eax, 0x20);
566 // check current character in range ['a'..'z'], nondestructively.
567 __ sub(Operand(eax), Immediate('a'));
568 __ cmp(Operand(eax), Immediate('z' - 'a'));
569 BranchOrBacktrack(below_equal, on_no_match);
570 __ jmp(&done);
571 __ bind(&check_digits);
572 // Check current character in range ['0'..'9'].
573 __ cmp(Operand(current_character()), Immediate('0'));
574 BranchOrBacktrack(above_equal, on_no_match);
575 __ bind(&done);
576 return true;
577 }
578 // Non-standard classes (with no syntactic shorthand) used internally.
561 case '*': 579 case '*':
562 // Match any character. 580 // Match any character.
563 if (check_offset) { 581 return true;
564 CheckPosition(cp_offset, on_no_match); 582 case 'n': {
583 // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 or 0x2029).
584 // The opposite of '.'.
585 __ mov(Operand(eax), current_character());
586 __ xor_(Operand(eax), Immediate(0x01));
587 // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c
588 __ sub(Operand(eax), Immediate(0x0b));
589 __ cmp(eax, 0x0c - 0x0b);
590 if (mode_ == ASCII) {
591 BranchOrBacktrack(above, on_no_match);
592 } else {
593 Label done;
594 BranchOrBacktrack(below_equal, &done);
595 ASSERT_EQ(UC16, mode_);
596 // Compare original value to 0x2028 and 0x2029, using the already
597 // computed (current_char ^ 0x01 - 0x0b). I.e., check for
598 // 0x201d (0x2028 - 0x0b) or 0x201e.
599 __ sub(Operand(eax), Immediate(0x2028 - 0x0b));
600 __ cmp(eax, 1);
601 BranchOrBacktrack(above, on_no_match);
602 __ bind(&done);
565 } 603 }
566 return true; 604 return true;
567 // No custom implementation (yet): w, W, s(UC16), S(UC16). 605 }
606 // No custom implementation (yet): s(UC16), S(UC16).
568 default: 607 default:
569 return false; 608 return false;
570 } 609 }
571 } 610 }
572 611
573 612
574 void RegExpMacroAssemblerIA32::Fail() { 613 void RegExpMacroAssemblerIA32::Fail() {
575 ASSERT(FAILURE == 0); // Return value for failure is zero. 614 ASSERT(FAILURE == 0); // Return value for failure is zero.
576 __ xor_(eax, Operand(eax)); // zero eax. 615 __ xor_(eax, Operand(eax)); // zero eax.
577 __ jmp(&exit_label_); 616 __ jmp(&exit_label_);
(...skipping 581 matching lines...) Expand 10 before | Expand all | Expand 10 after
1159 } 1198 }
1160 } 1199 }
1161 } 1200 }
1162 1201
1163 1202
1164 #undef __ 1203 #undef __
1165 1204
1166 #endif // V8_NATIVE_REGEXP 1205 #endif // V8_NATIVE_REGEXP
1167 1206
1168 }} // namespace v8::internal 1207 }} // namespace v8::internal
OLDNEW
« no previous file with comments | « src/ia32/regexp-macro-assembler-ia32.h ('k') | src/jsregexp.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698