Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(48)

Side by Side Diff: src/arm/regexp-macro-assembler-arm.cc

Issue 507051: Attempt to make \b\w+ faster. Slight performance increase on, e.g., string unpacking. (Closed)
Patch Set: Addressed review comments. Created 10 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/arm/regexp-macro-assembler-arm.h ('k') | src/ast.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2009 the V8 project authors. All rights reserved. 1 // Copyright 2009 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 441 matching lines...) Expand 10 before | Expand all | Expand 10 after
452 Label* on_not_equal) { 452 Label* on_not_equal) {
453 ASSERT(minus < String::kMaxUC16CharCode); 453 ASSERT(minus < String::kMaxUC16CharCode);
454 __ sub(r0, current_character(), Operand(minus)); 454 __ sub(r0, current_character(), Operand(minus));
455 __ and_(r0, r0, Operand(mask)); 455 __ and_(r0, r0, Operand(mask));
456 __ cmp(r0, Operand(c)); 456 __ cmp(r0, Operand(c));
457 BranchOrBacktrack(ne, on_not_equal); 457 BranchOrBacktrack(ne, on_not_equal);
458 } 458 }
459 459
460 460
461 bool RegExpMacroAssemblerARM::CheckSpecialCharacterClass(uc16 type, 461 bool RegExpMacroAssemblerARM::CheckSpecialCharacterClass(uc16 type,
462 int cp_offset,
463 bool check_offset,
464 Label* on_no_match) { 462 Label* on_no_match) {
465 // Range checks (c in min..max) are generally implemented by an unsigned 463 // Range checks (c in min..max) are generally implemented by an unsigned
466 // (c - min) <= (max - min) check 464 // (c - min) <= (max - min) check
467 switch (type) { 465 switch (type) {
468 case 's': 466 case 's':
469 // Match space-characters 467 // Match space-characters
470 if (mode_ == ASCII) { 468 if (mode_ == ASCII) {
471 // ASCII space characters are '\t'..'\r' and ' '. 469 // ASCII space characters are '\t'..'\r' and ' '.
472 if (check_offset) {
473 LoadCurrentCharacter(cp_offset, on_no_match);
474 } else {
475 LoadCurrentCharacterUnchecked(cp_offset, 1);
476 }
477 Label success; 470 Label success;
478 __ cmp(current_character(), Operand(' ')); 471 __ cmp(current_character(), Operand(' '));
479 __ b(eq, &success); 472 __ b(eq, &success);
480 // Check range 0x09..0x0d 473 // Check range 0x09..0x0d
481 __ sub(r0, current_character(), Operand('\t')); 474 __ sub(r0, current_character(), Operand('\t'));
482 __ cmp(r0, Operand('\r' - '\t')); 475 __ cmp(r0, Operand('\r' - '\t'));
483 BranchOrBacktrack(hi, on_no_match); 476 BranchOrBacktrack(hi, on_no_match);
484 __ bind(&success); 477 __ bind(&success);
485 return true; 478 return true;
486 } 479 }
487 return false; 480 return false;
488 case 'S': 481 case 'S':
489 // Match non-space characters. 482 // Match non-space characters.
490 if (check_offset) {
491 LoadCurrentCharacter(cp_offset, on_no_match, 1);
492 } else {
493 LoadCurrentCharacterUnchecked(cp_offset, 1);
494 }
495 if (mode_ == ASCII) { 483 if (mode_ == ASCII) {
496 // ASCII space characters are '\t'..'\r' and ' '. 484 // ASCII space characters are '\t'..'\r' and ' '.
497 __ cmp(current_character(), Operand(' ')); 485 __ cmp(current_character(), Operand(' '));
498 BranchOrBacktrack(eq, on_no_match); 486 BranchOrBacktrack(eq, on_no_match);
499 __ sub(r0, current_character(), Operand('\t')); 487 __ sub(r0, current_character(), Operand('\t'));
500 __ cmp(r0, Operand('\r' - '\t')); 488 __ cmp(r0, Operand('\r' - '\t'));
501 BranchOrBacktrack(ls, on_no_match); 489 BranchOrBacktrack(ls, on_no_match);
502 return true; 490 return true;
503 } 491 }
504 return false; 492 return false;
505 case 'd': 493 case 'd':
506 // Match ASCII digits ('0'..'9') 494 // Match ASCII digits ('0'..'9')
507 if (check_offset) {
508 LoadCurrentCharacter(cp_offset, on_no_match, 1);
509 } else {
510 LoadCurrentCharacterUnchecked(cp_offset, 1);
511 }
512 __ sub(r0, current_character(), Operand('0')); 495 __ sub(r0, current_character(), Operand('0'));
513 __ cmp(current_character(), Operand('9' - '0')); 496 __ cmp(current_character(), Operand('9' - '0'));
514 BranchOrBacktrack(hi, on_no_match); 497 BranchOrBacktrack(hi, on_no_match);
515 return true; 498 return true;
516 case 'D': 499 case 'D':
517 // Match non ASCII-digits 500 // Match non ASCII-digits
518 if (check_offset) {
519 LoadCurrentCharacter(cp_offset, on_no_match, 1);
520 } else {
521 LoadCurrentCharacterUnchecked(cp_offset, 1);
522 }
523 __ sub(r0, current_character(), Operand('0')); 501 __ sub(r0, current_character(), Operand('0'));
524 __ cmp(r0, Operand('9' - '0')); 502 __ cmp(r0, Operand('9' - '0'));
525 BranchOrBacktrack(ls, on_no_match); 503 BranchOrBacktrack(ls, on_no_match);
526 return true; 504 return true;
527 case '.': { 505 case '.': {
528 // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029) 506 // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
529 if (check_offset) {
530 LoadCurrentCharacter(cp_offset, on_no_match, 1);
531 } else {
532 LoadCurrentCharacterUnchecked(cp_offset, 1);
533 }
534 __ eor(r0, current_character(), Operand(0x01)); 507 __ eor(r0, current_character(), Operand(0x01));
535 // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c 508 // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c
536 __ sub(r0, r0, Operand(0x0b)); 509 __ sub(r0, r0, Operand(0x0b));
537 __ cmp(r0, Operand(0x0c - 0x0b)); 510 __ cmp(r0, Operand(0x0c - 0x0b));
538 BranchOrBacktrack(ls, on_no_match); 511 BranchOrBacktrack(ls, on_no_match);
539 if (mode_ == UC16) { 512 if (mode_ == UC16) {
540 // Compare original value to 0x2028 and 0x2029, using the already 513 // Compare original value to 0x2028 and 0x2029, using the already
541 // computed (current_char ^ 0x01 - 0x0b). I.e., check for 514 // computed (current_char ^ 0x01 - 0x0b). I.e., check for
542 // 0x201d (0x2028 - 0x0b) or 0x201e. 515 // 0x201d (0x2028 - 0x0b) or 0x201e.
543 __ sub(r0, r0, Operand(0x2028 - 0x0b)); 516 __ sub(r0, r0, Operand(0x2028 - 0x0b));
544 __ cmp(r0, Operand(1)); 517 __ cmp(r0, Operand(1));
545 BranchOrBacktrack(ls, on_no_match); 518 BranchOrBacktrack(ls, on_no_match);
546 } 519 }
547 return true; 520 return true;
548 } 521 }
522 case 'n': {
523 // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
524 __ eor(r0, current_character(), Operand(0x01));
525 // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c
526 __ sub(r0, r0, Operand(0x0b));
527 __ cmp(r0, Operand(0x0c - 0x0b));
528 if (mode_ == ASCII) {
529 BranchOrBacktrack(hi, on_no_match);
530 } else {
531 Label done;
532 __ b(ls, &done);
533 // Compare original value to 0x2028 and 0x2029, using the already
534 // computed (current_char ^ 0x01 - 0x0b). I.e., check for
535 // 0x201d (0x2028 - 0x0b) or 0x201e.
536 __ sub(r0, r0, Operand(0x2028 - 0x0b));
537 __ cmp(r0, Operand(1));
538 BranchOrBacktrack(hi, on_no_match);
539 __ bind(&done);
540 }
541 return true;
542 }
543 case 'w': {
544 // Match word character (0-9, A-Z, a-z and _).
545 Label digits, done;
546 __ cmp(current_character(), Operand('9'));
547 __ b(ls, &digits);
548 __ cmp(current_character(), Operand('_'));
549 __ b(eq, &done);
550 __ orr(r0, current_character(), Operand(0x20));
551 __ sub(r0, r0, Operand('a'));
552 __ cmp(r0, Operand('z' - 'a'));
553 BranchOrBacktrack(hi, on_no_match);
554 __ jmp(&done);
555
556 __ bind(&digits);
557 __ cmp(current_character(), Operand('0'));
558 BranchOrBacktrack(lo, on_no_match);
559 __ bind(&done);
560
561 return true;
562 }
563 case 'W': {
564 // Match non-word character (not 0-9, A-Z, a-z and _).
565 Label digits, done;
566 __ cmp(current_character(), Operand('9'));
567 __ b(ls, &digits);
568 __ cmp(current_character(), Operand('_'));
569 BranchOrBacktrack(eq, on_no_match);
570 __ orr(r0, current_character(), Operand(0x20));
571 __ sub(r0, r0, Operand('a'));
572 __ cmp(r0, Operand('z' - 'a'));
573 BranchOrBacktrack(ls, on_no_match);
574 __ jmp(&done);
575
576 __ bind(&digits);
577 __ cmp(current_character(), Operand('0'));
578 BranchOrBacktrack(hs, on_no_match);
579 __ bind(&done);
580
581 return true;
582 }
549 case '*': 583 case '*':
550 // Match any character. 584 // Match any character.
551 if (check_offset) {
552 CheckPosition(cp_offset, on_no_match);
553 }
554 return true; 585 return true;
555 // No custom implementation (yet): w, W, s(UC16), S(UC16). 586 // No custom implementation (yet): s(UC16), S(UC16).
556 default: 587 default:
557 return false; 588 return false;
558 } 589 }
559 } 590 }
560 591
561 592
562 void RegExpMacroAssemblerARM::Fail() { 593 void RegExpMacroAssemblerARM::Fail() {
563 __ mov(r0, Operand(FAILURE)); 594 __ mov(r0, Operand(FAILURE));
564 __ jmp(&exit_label_); 595 __ jmp(&exit_label_);
565 } 596 }
(...skipping 654 matching lines...) Expand 10 before | Expand all | Expand 10 after
1220 __ mov(r0, sp); 1251 __ mov(r0, sp);
1221 __ Call(r5); 1252 __ Call(r5);
1222 __ ldr(pc, MemOperand(sp, stack_alignment, PostIndex)); 1253 __ ldr(pc, MemOperand(sp, stack_alignment, PostIndex));
1223 } 1254 }
1224 1255
1225 #undef __ 1256 #undef __
1226 1257
1227 #endif // V8_NATIVE_REGEXP 1258 #endif // V8_NATIVE_REGEXP
1228 1259
1229 }} // namespace v8::internal 1260 }} // namespace v8::internal
OLDNEW
« no previous file with comments | « src/arm/regexp-macro-assembler-arm.h ('k') | src/ast.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698