 Chromium Code Reviews
 Chromium Code Reviews Issue 547024:
  RegExp bitmap test for word character.  (Closed)
    
  
    Issue 547024:
  RegExp bitmap test for word character.  (Closed) 
  | OLD | NEW | 
|---|---|
| 1 // Copyright 2009 the V8 project authors. All rights reserved. | 1 // Copyright 2009 the V8 project authors. All rights reserved. | 
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without | 
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are | 
| 4 // met: | 4 // met: | 
| 5 // | 5 // | 
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright | 
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. | 
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above | 
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following | 
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided | 
| (...skipping 508 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 519 // Compare original value to 0x2028 and 0x2029, using the already | 519 // Compare original value to 0x2028 and 0x2029, using the already | 
| 520 // computed (current_char ^ 0x01 - 0x0b). I.e., check for | 520 // computed (current_char ^ 0x01 - 0x0b). I.e., check for | 
| 521 // 0x201d (0x2028 - 0x0b) or 0x201e. | 521 // 0x201d (0x2028 - 0x0b) or 0x201e. | 
| 522 __ sub(r0, r0, Operand(0x2028 - 0x0b)); | 522 __ sub(r0, r0, Operand(0x2028 - 0x0b)); | 
| 523 __ cmp(r0, Operand(1)); | 523 __ cmp(r0, Operand(1)); | 
| 524 BranchOrBacktrack(ls, on_no_match); | 524 BranchOrBacktrack(ls, on_no_match); | 
| 525 } | 525 } | 
| 526 return true; | 526 return true; | 
| 527 } | 527 } | 
| 528 case 'n': { | 528 case 'n': { | 
| 529 // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029) | 529 // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029) | 
| 530 __ eor(r0, current_character(), Operand(0x01)); | 530 __ eor(r0, current_character(), Operand(0x01)); | 
| 531 // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c | 531 // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c | 
| 532 __ sub(r0, r0, Operand(0x0b)); | 532 __ sub(r0, r0, Operand(0x0b)); | 
| 533 __ cmp(r0, Operand(0x0c - 0x0b)); | 533 __ cmp(r0, Operand(0x0c - 0x0b)); | 
| 534 if (mode_ == ASCII) { | 534 if (mode_ == ASCII) { | 
| 535 BranchOrBacktrack(hi, on_no_match); | 535 BranchOrBacktrack(hi, on_no_match); | 
| 536 } else { | 536 } else { | 
| 537 Label done; | 537 Label done; | 
| 538 __ b(ls, &done); | 538 __ b(ls, &done); | 
| 539 // Compare original value to 0x2028 and 0x2029, using the already | 539 // Compare original value to 0x2028 and 0x2029, using the already | 
| 540 // computed (current_char ^ 0x01 - 0x0b). I.e., check for | 540 // computed (current_char ^ 0x01 - 0x0b). I.e., check for | 
| 541 // 0x201d (0x2028 - 0x0b) or 0x201e. | 541 // 0x201d (0x2028 - 0x0b) or 0x201e. | 
| 542 __ sub(r0, r0, Operand(0x2028 - 0x0b)); | 542 __ sub(r0, r0, Operand(0x2028 - 0x0b)); | 
| 543 __ cmp(r0, Operand(1)); | 543 __ cmp(r0, Operand(1)); | 
| 544 BranchOrBacktrack(hi, on_no_match); | 544 BranchOrBacktrack(hi, on_no_match); | 
| 545 __ bind(&done); | 545 __ bind(&done); | 
| 546 } | |
| 547 return true; | |
| 548 } | 546 } | 
| 547 return true; | |
| 548 } | |
| 549 case 'w': { | 549 case 'w': { | 
| 550 // Match word character (0-9, A-Z, a-z and _). | 550 if (mode_ != ASCII) { | 
| 551 Label digits, done; | 551 // Table is 128 bits, so all ASCII characters can be tested. | 
| 
Erik Corry
2010/01/15 12:02:09
128 bytes.
 | |
| 552 __ cmp(current_character(), Operand('9')); | 552 __ cmp(current_character(), Operand('z')); | 
| 553 __ b(ls, &digits); | 553 BranchOrBacktrack(hi, on_no_match); | 
| 554 __ cmp(current_character(), Operand('_')); | 554 } | 
| 555 __ b(eq, &done); | 555 ExternalReference map = ExternalReference::re_word_character_map(); | 
| 556 __ orr(r0, current_character(), Operand(0x20)); | 556 __ mov(r0, Operand(map)); | 
| 557 __ sub(r0, r0, Operand('a')); | 557 __ ldrb(r0, MemOperand(r0, current_character())); | 
| 558 __ cmp(r0, Operand('z' - 'a')); | 558 __ tst(r0, Operand(r0)); | 
| 559 BranchOrBacktrack(hi, on_no_match); | 559 BranchOrBacktrack(eq, on_no_match); | 
| 560 __ jmp(&done); | |
| 561 | |
| 562 __ bind(&digits); | |
| 563 __ cmp(current_character(), Operand('0')); | |
| 564 BranchOrBacktrack(lo, on_no_match); | |
| 565 __ bind(&done); | |
| 566 | |
| 567 return true; | 560 return true; | 
| 568 } | 561 } | 
| 569 case 'W': { | 562 case 'W': { | 
| 570 // Match non-word character (not 0-9, A-Z, a-z and _). | 563 Label done; | 
| 571 Label digits, done; | 564 if (mode_ != ASCII) { | 
| 572 __ cmp(current_character(), Operand('9')); | 565 // Table is 128 bits, so all ASCII characters can be tested. | 
| 573 __ b(ls, &digits); | 566 __ cmp(current_character(), Operand('z')); | 
| 574 __ cmp(current_character(), Operand('_')); | 567 __ b(hi, &done); | 
| 575 BranchOrBacktrack(eq, on_no_match); | 568 } | 
| 576 __ orr(r0, current_character(), Operand(0x20)); | 569 ExternalReference map = ExternalReference::re_word_character_map(); | 
| 577 __ sub(r0, r0, Operand('a')); | 570 __ mov(r0, Operand(map)); | 
| 578 __ cmp(r0, Operand('z' - 'a')); | 571 __ ldrb(r0, MemOperand(r0, current_character())); | 
| 579 BranchOrBacktrack(ls, on_no_match); | 572 __ tst(r0, Operand(r0)); | 
| 580 __ jmp(&done); | 573 BranchOrBacktrack(ne, on_no_match); | 
| 581 | 574 if (mode_ != ASCII) { | 
| 582 __ bind(&digits); | 575 __ bind(&done); | 
| 583 __ cmp(current_character(), Operand('0')); | 576 } | 
| 584 BranchOrBacktrack(hs, on_no_match); | |
| 585 __ bind(&done); | |
| 586 | |
| 587 return true; | 577 return true; | 
| 588 } | 578 } | 
| 589 case '*': | 579 case '*': | 
| 590 // Match any character. | 580 // Match any character. | 
| 591 return true; | 581 return true; | 
| 592 // No custom implementation (yet): s(UC16), S(UC16). | 582 // No custom implementation (yet): s(UC16), S(UC16). | 
| 593 default: | 583 default: | 
| 594 return false; | 584 return false; | 
| 595 } | 585 } | 
| 596 } | 586 } | 
| (...skipping 660 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1257 __ mov(r0, sp); | 1247 __ mov(r0, sp); | 
| 1258 __ Call(r5); | 1248 __ Call(r5); | 
| 1259 __ ldr(pc, MemOperand(sp, stack_alignment, PostIndex)); | 1249 __ ldr(pc, MemOperand(sp, stack_alignment, PostIndex)); | 
| 1260 } | 1250 } | 
| 1261 | 1251 | 
| 1262 #undef __ | 1252 #undef __ | 
| 1263 | 1253 | 
| 1264 #endif // V8_NATIVE_REGEXP | 1254 #endif // V8_NATIVE_REGEXP | 
| 1265 | 1255 | 
| 1266 }} // namespace v8::internal | 1256 }} // namespace v8::internal | 
| OLD | NEW |