src/x64/regexp-macro-assembler-x64.cc - Issue 559913002: Rename ascii to one-byte where applicable.

Side by Side Diff: src/x64/regexp-macro-assembler-x64.cc

Issue 559913002: Rename ascii to one-byte where applicable. (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge

Patch Set: Created 6 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2012 the V8 project authors. All rights reserved.	1 // Copyright 2012 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "src/v8.h"	5 #include "src/v8.h"

6	6

7 #if V8_TARGET_ARCH_X64	7 #if V8_TARGET_ARCH_X64

8	8

9 #include "src/cpu-profiler.h"	9 #include "src/cpu-profiler.h"

10 #include "src/log.h"	10 #include "src/log.h"

11 #include "src/macro-assembler.h"	11 #include "src/macro-assembler.h"

12 #include "src/regexp-macro-assembler.h"	12 #include "src/regexp-macro-assembler.h"

13 #include "src/regexp-stack.h"	13 #include "src/regexp-stack.h"

14 #include "src/serialize.h"	14 #include "src/serialize.h"

15 #include "src/unicode.h"	15 #include "src/unicode.h"

16 #include "src/x64/regexp-macro-assembler-x64.h"	16 #include "src/x64/regexp-macro-assembler-x64.h"

17	17

18 namespace v8 {	18 namespace v8 {

19 namespace internal {	19 namespace internal {

20	20

21 #ifndef V8_INTERPRETED_REGEXP	21 #ifndef V8_INTERPRETED_REGEXP

22	22

23 /*	23 /*

24 * This assembler uses the following register assignment convention	24 * This assembler uses the following register assignment convention

25 * - rdx : Currently loaded character(s) as ASCII or UC16. Must be loaded	25 * - rdx : Currently loaded character(s) as Latin1 or UC16. Must be loaded

26 * using LoadCurrentCharacter before using any of the dispatch methods.	26 * using LoadCurrentCharacter before using any of the dispatch methods.

27 * Temporarily stores the index of capture start after a matching pass	27 * Temporarily stores the index of capture start after a matching pass

28 * for a global regexp.	28 * for a global regexp.

29 * - rdi : Current position in input, as negative offset from end of string.	29 * - rdi : Current position in input, as negative offset from end of string.

30 * Please notice that this is the byte offset, not the character	30 * Please notice that this is the byte offset, not the character

31 * offset! Is always a 32-bit signed (negative) offset, but must be	31 * offset! Is always a 32-bit signed (negative) offset, but must be

32 * maintained sign-extended to 64 bits, since it is used as index.	32 * maintained sign-extended to 64 bits, since it is used as index.

33 * - rsi : End of input (points to byte after last character in input),	33 * - rsi : End of input (points to byte after last character in input),

34 * so that rsi+rdi points to the current character.	34 * so that rsi+rdi points to the current character.

35 * - rbp : Frame pointer. Used to access arguments, local variables and	35 * - rbp : Frame pointer. Used to access arguments, local variables and

(...skipping 201 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
237 __ j(equal, &fallthrough);	237 __ j(equal, &fallthrough);

238	238

239 // -----------------------	239 // -----------------------

240 // rdx - Start of capture	240 // rdx - Start of capture

241 // rbx - length of capture	241 // rbx - length of capture

242 // Check that there are sufficient characters left in the input.	242 // Check that there are sufficient characters left in the input.

243 __ movl(rax, rdi);	243 __ movl(rax, rdi);

244 __ addl(rax, rbx);	244 __ addl(rax, rbx);

245 BranchOrBacktrack(greater, on_no_match);	245 BranchOrBacktrack(greater, on_no_match);

246	246

247 if (mode_ == ASCII) {	247 if (mode_ == LATIN1) {

248 Label loop_increment;	248 Label loop_increment;

249 if (on_no_match == NULL) {	249 if (on_no_match == NULL) {

250 on_no_match = &backtrack_label_;	250 on_no_match = &backtrack_label_;

251 }	251 }

252	252

253 __ leap(r9, Operand(rsi, rdx, times_1, 0));	253 __ leap(r9, Operand(rsi, rdx, times_1, 0));

254 __ leap(r11, Operand(rsi, rdi, times_1, 0));	254 __ leap(r11, Operand(rsi, rdi, times_1, 0));

255 __ addp(rbx, r9); // End of capture	255 __ addp(rbx, r9); // End of capture

256 // ---------------------	256 // ---------------------

257 // r11 - current input character address	257 // r11 - current input character address

(...skipping 135 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
393 __ addp(rdx, rsi); // Start of capture.	393 __ addp(rdx, rsi); // Start of capture.

394 __ leap(r9, Operand(rdx, rax, times_1, 0)); // End of capture	394 __ leap(r9, Operand(rdx, rax, times_1, 0)); // End of capture

395	395

396 // -----------------------	396 // -----------------------

397 // rbx - current capture character address.	397 // rbx - current capture character address.

398 // rbx - current input character address .	398 // rbx - current input character address .

399 // r9 - end of input to match (capture length after rbx).	399 // r9 - end of input to match (capture length after rbx).

400	400

401 Label loop;	401 Label loop;

402 __ bind(&loop);	402 __ bind(&loop);

403 if (mode_ == ASCII) {	403 if (mode_ == LATIN1) {

404 __ movzxbl(rax, Operand(rdx, 0));	404 __ movzxbl(rax, Operand(rdx, 0));

405 __ cmpb(rax, Operand(rbx, 0));	405 __ cmpb(rax, Operand(rbx, 0));

406 } else {	406 } else {

407 DCHECK(mode_ == UC16);	407 DCHECK(mode_ == UC16);

408 __ movzxwl(rax, Operand(rdx, 0));	408 __ movzxwl(rax, Operand(rdx, 0));

409 __ cmpw(rax, Operand(rbx, 0));	409 __ cmpw(rax, Operand(rbx, 0));

410 }	410 }

411 BranchOrBacktrack(not_equal, on_no_match);	411 BranchOrBacktrack(not_equal, on_no_match);

412 // Increment pointers into capture and match string.	412 // Increment pointers into capture and match string.

413 __ addp(rbx, Immediate(char_size()));	413 __ addp(rbx, Immediate(char_size()));

(...skipping 77 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
491 __ cmpl(rax, Immediate(to - from));	491 __ cmpl(rax, Immediate(to - from));

492 BranchOrBacktrack(above, on_not_in_range);	492 BranchOrBacktrack(above, on_not_in_range);

493 }	493 }

494	494

495	495

496 void RegExpMacroAssemblerX64::CheckBitInTable(	496 void RegExpMacroAssemblerX64::CheckBitInTable(

497 Handle<ByteArray> table,	497 Handle<ByteArray> table,

498 Label* on_bit_set) {	498 Label* on_bit_set) {

499 __ Move(rax, table);	499 __ Move(rax, table);

500 Register index = current_character();	500 Register index = current_character();

501 if (mode_ != ASCII \|\| kTableMask != String::kMaxOneByteCharCode) {	501 if (mode_ != LATIN1 \|\| kTableMask != String::kMaxOneByteCharCode) {

502 __ movp(rbx, current_character());	502 __ movp(rbx, current_character());

503 __ andp(rbx, Immediate(kTableMask));	503 __ andp(rbx, Immediate(kTableMask));

504 index = rbx;	504 index = rbx;

505 }	505 }

506 __ cmpb(FieldOperand(rax, index, times_1, ByteArray::kHeaderSize),	506 __ cmpb(FieldOperand(rax, index, times_1, ByteArray::kHeaderSize),

507 Immediate(0));	507 Immediate(0));

508 BranchOrBacktrack(not_equal, on_bit_set);	508 BranchOrBacktrack(not_equal, on_bit_set);

509 }	509 }

510	510

511	511

512 bool RegExpMacroAssemblerX64::CheckSpecialCharacterClass(uc16 type,	512 bool RegExpMacroAssemblerX64::CheckSpecialCharacterClass(uc16 type,

513 Label* on_no_match) {	513 Label* on_no_match) {

514 // Range checks (c in min..max) are generally implemented by an unsigned	514 // Range checks (c in min..max) are generally implemented by an unsigned

515 // (c - min) <= (max - min) check, using the sequence:	515 // (c - min) <= (max - min) check, using the sequence:

516 // leap(rax, Operand(current_character(), -min)) or sub(rax, Immediate(min))	516 // leap(rax, Operand(current_character(), -min)) or sub(rax, Immediate(min))

517 // cmp(rax, Immediate(max - min))	517 // cmp(rax, Immediate(max - min))

518 switch (type) {	518 switch (type) {

519 case 's':	519 case 's':

520 // Match space-characters	520 // Match space-characters

521 if (mode_ == ASCII) {	521 if (mode_ == LATIN1) {

522 // One byte space characters are '\t'..'\r', ' ' and \u00a0.	522 // One byte space characters are '\t'..'\r', ' ' and \u00a0.

523 Label success;	523 Label success;

524 __ cmpl(current_character(), Immediate(' '));	524 __ cmpl(current_character(), Immediate(' '));

525 __ j(equal, &success, Label::kNear);	525 __ j(equal, &success, Label::kNear);

526 // Check range 0x09..0x0d	526 // Check range 0x09..0x0d

527 __ leap(rax, Operand(current_character(), -'\t'));	527 __ leap(rax, Operand(current_character(), -'\t'));

528 __ cmpl(rax, Immediate('\r' - '\t'));	528 __ cmpl(rax, Immediate('\r' - '\t'));

529 __ j(below_equal, &success, Label::kNear);	529 __ j(below_equal, &success, Label::kNear);

530 // \u00a0 (NBSP).	530 // \u00a0 (NBSP).

531 __ cmpl(rax, Immediate(0x00a0 - '\t'));	531 __ cmpl(rax, Immediate(0x00a0 - '\t'));

(...skipping 35 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
567 }	567 }

568 return true;	568 return true;

569 }	569 }

570 case 'n': {	570 case 'n': {

571 // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)	571 // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)

572 __ movl(rax, current_character());	572 __ movl(rax, current_character());

573 __ xorp(rax, Immediate(0x01));	573 __ xorp(rax, Immediate(0x01));

574 // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c	574 // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c

575 __ subl(rax, Immediate(0x0b));	575 __ subl(rax, Immediate(0x0b));

576 __ cmpl(rax, Immediate(0x0c - 0x0b));	576 __ cmpl(rax, Immediate(0x0c - 0x0b));

577 if (mode_ == ASCII) {	577 if (mode_ == LATIN1) {

578 BranchOrBacktrack(above, on_no_match);	578 BranchOrBacktrack(above, on_no_match);

579 } else {	579 } else {

580 Label done;	580 Label done;

581 BranchOrBacktrack(below_equal, &done);	581 BranchOrBacktrack(below_equal, &done);

582 // Compare original value to 0x2028 and 0x2029, using the already	582 // Compare original value to 0x2028 and 0x2029, using the already

583 // computed (current_char ^ 0x01 - 0x0b). I.e., check for	583 // computed (current_char ^ 0x01 - 0x0b). I.e., check for

584 // 0x201d (0x2028 - 0x0b) or 0x201e.	584 // 0x201d (0x2028 - 0x0b) or 0x201e.

585 __ subl(rax, Immediate(0x2028 - 0x0b));	585 __ subl(rax, Immediate(0x2028 - 0x0b));

586 __ cmpl(rax, Immediate(0x2029 - 0x2028));	586 __ cmpl(rax, Immediate(0x2029 - 0x2028));

587 BranchOrBacktrack(above, on_no_match);	587 BranchOrBacktrack(above, on_no_match);

588 __ bind(&done);	588 __ bind(&done);

589 }	589 }

590 return true;	590 return true;

591 }	591 }

592 case 'w': {	592 case 'w': {

593 if (mode_ != ASCII) {	593 if (mode_ != LATIN1) {

594 // Table is 128 entries, so all ASCII characters can be tested.	594 // Table is 256 entries, so all Latin1 characters can be tested.

595 __ cmpl(current_character(), Immediate('z'));	595 __ cmpl(current_character(), Immediate('z'));

596 BranchOrBacktrack(above, on_no_match);	596 BranchOrBacktrack(above, on_no_match);

597 }	597 }

598 __ Move(rbx, ExternalReference::re_word_character_map());	598 __ Move(rbx, ExternalReference::re_word_character_map());

599 DCHECK_EQ(0, word_character_map[0]); // Character '\0' is not a word char.	599 DCHECK_EQ(0, word_character_map[0]); // Character '\0' is not a word char.

600 __ testb(Operand(rbx, current_character(), times_1, 0),	600 __ testb(Operand(rbx, current_character(), times_1, 0),

601 current_character());	601 current_character());

602 BranchOrBacktrack(zero, on_no_match);	602 BranchOrBacktrack(zero, on_no_match);

603 return true;	603 return true;

604 }	604 }

605 case 'W': {	605 case 'W': {

606 Label done;	606 Label done;

607 if (mode_ != ASCII) {	607 if (mode_ != LATIN1) {

608 // Table is 128 entries, so all ASCII characters can be tested.	608 // Table is 256 entries, so all Latin1 characters can be tested.

609 __ cmpl(current_character(), Immediate('z'));	609 __ cmpl(current_character(), Immediate('z'));

610 __ j(above, &done);	610 __ j(above, &done);

611 }	611 }

612 __ Move(rbx, ExternalReference::re_word_character_map());	612 __ Move(rbx, ExternalReference::re_word_character_map());

613 DCHECK_EQ(0, word_character_map[0]); // Character '\0' is not a word char.	613 DCHECK_EQ(0, word_character_map[0]); // Character '\0' is not a word char.

614 __ testb(Operand(rbx, current_character(), times_1, 0),	614 __ testb(Operand(rbx, current_character(), times_1, 0),

615 current_character());	615 current_character());

616 BranchOrBacktrack(not_zero, on_no_match);	616 BranchOrBacktrack(not_zero, on_no_match);

617 if (mode_ != ASCII) {	617 if (mode_ != LATIN1) {

618 __ bind(&done);	618 __ bind(&done);

619 }	619 }

620 return true;	620 return true;

621 }	621 }

622	622

623 case '*':	623 case '*':

624 // Match any character.	624 // Match any character.

625 return true;	625 return true;

626 // No custom implementation (yet): s(UC16), S(UC16).	626 // No custom implementation (yet): s(UC16), S(UC16).

627 default:	627 default:

(...skipping 570 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1198 return RETRY;	1198 return RETRY;

1199 }	1199 }

1200	1200

1201 // Prepare for possible GC.	1201 // Prepare for possible GC.

1202 HandleScope handles(isolate);	1202 HandleScope handles(isolate);

1203 Handle<Code> code_handle(re_code);	1203 Handle<Code> code_handle(re_code);

1204	1204

1205 Handle<String> subject(frame_entry<String*>(re_frame, kInputString));	1205 Handle<String> subject(frame_entry<String*>(re_frame, kInputString));

1206	1206

1207 // Current string.	1207 // Current string.

1208 bool is_ascii = subject->IsOneByteRepresentationUnderneath();	1208 bool is_one_byte = subject->IsOneByteRepresentationUnderneath();

1209	1209

1210 DCHECK(re_code->instruction_start() <= *return_address);	1210 DCHECK(re_code->instruction_start() <= *return_address);

1211 DCHECK(*return_address <=	1211 DCHECK(*return_address <=

1212 re_code->instruction_start() + re_code->instruction_size());	1212 re_code->instruction_start() + re_code->instruction_size());

1213	1213

1214 Object* result = isolate->stack_guard()->HandleInterrupts();	1214 Object* result = isolate->stack_guard()->HandleInterrupts();

1215	1215

1216 if (*code_handle != re_code) { // Return address no longer valid	1216 if (*code_handle != re_code) { // Return address no longer valid

1217 intptr_t delta = code_handle->address() - re_code->address();	1217 intptr_t delta = code_handle->address() - re_code->address();

1218 // Overwrite the return address on the stack.	1218 // Overwrite the return address on the stack.

(...skipping 10 matching lines...) Expand all Loading...
1229 // Extract the underlying string and the slice offset.	1229 // Extract the underlying string and the slice offset.

1230 if (StringShape(*subject_tmp).IsCons()) {	1230 if (StringShape(*subject_tmp).IsCons()) {

1231 subject_tmp = Handle<String>(ConsString::cast(*subject_tmp)->first());	1231 subject_tmp = Handle<String>(ConsString::cast(*subject_tmp)->first());

1232 } else if (StringShape(*subject_tmp).IsSliced()) {	1232 } else if (StringShape(*subject_tmp).IsSliced()) {

1233 SlicedString* slice = SlicedString::cast(*subject_tmp);	1233 SlicedString* slice = SlicedString::cast(*subject_tmp);

1234 subject_tmp = Handle<String>(slice->parent());	1234 subject_tmp = Handle<String>(slice->parent());

1235 slice_offset = slice->offset();	1235 slice_offset = slice->offset();

1236 }	1236 }

1237	1237

1238 // String might have changed.	1238 // String might have changed.

1239 if (subject_tmp->IsOneByteRepresentation() != is_ascii) {	1239 if (subject_tmp->IsOneByteRepresentation() != is_one_byte) {

1240 // If we changed between an ASCII and an UC16 string, the specialized	1240 // If we changed between an Latin1 and an UC16 string, the specialized

1241 // code cannot be used, and we need to restart regexp matching from	1241 // code cannot be used, and we need to restart regexp matching from

1242 // scratch (including, potentially, compiling a new version of the code).	1242 // scratch (including, potentially, compiling a new version of the code).

1243 return RETRY;	1243 return RETRY;

1244 }	1244 }

1245	1245

1246 // Otherwise, the content of the string might have moved. It must still	1246 // Otherwise, the content of the string might have moved. It must still

1247 // be a sequential or external string with the same content.	1247 // be a sequential or external string with the same content.

1248 // Update the start and end pointers in the stack frame to the current	1248 // Update the start and end pointers in the stack frame to the current

1249 // location (whether it has actually moved or not).	1249 // location (whether it has actually moved or not).

1250 DCHECK(StringShape(*subject_tmp).IsSequential() \|\|	1250 DCHECK(StringShape(*subject_tmp).IsSequential() \|\|

(...skipping 155 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1406 __ j(above, &no_stack_overflow);	1406 __ j(above, &no_stack_overflow);

1407	1407

1408 SafeCall(&stack_overflow_label_);	1408 SafeCall(&stack_overflow_label_);

1409	1409

1410 __ bind(&no_stack_overflow);	1410 __ bind(&no_stack_overflow);

1411 }	1411 }

1412	1412

1413	1413

1414 void RegExpMacroAssemblerX64::LoadCurrentCharacterUnchecked(int cp_offset,	1414 void RegExpMacroAssemblerX64::LoadCurrentCharacterUnchecked(int cp_offset,

1415 int characters) {	1415 int characters) {

1416 if (mode_ == ASCII) {	1416 if (mode_ == LATIN1) {

1417 if (characters == 4) {	1417 if (characters == 4) {

1418 __ movl(current_character(), Operand(rsi, rdi, times_1, cp_offset));	1418 __ movl(current_character(), Operand(rsi, rdi, times_1, cp_offset));

1419 } else if (characters == 2) {	1419 } else if (characters == 2) {

1420 __ movzxwl(current_character(), Operand(rsi, rdi, times_1, cp_offset));	1420 __ movzxwl(current_character(), Operand(rsi, rdi, times_1, cp_offset));

1421 } else {	1421 } else {

1422 DCHECK(characters == 1);	1422 DCHECK(characters == 1);

1423 __ movzxbl(current_character(), Operand(rsi, rdi, times_1, cp_offset));	1423 __ movzxbl(current_character(), Operand(rsi, rdi, times_1, cp_offset));

1424 }	1424 }

1425 } else {	1425 } else {

1426 DCHECK(mode_ == UC16);	1426 DCHECK(mode_ == UC16);

1427 if (characters == 2) {	1427 if (characters == 2) {

1428 __ movl(current_character(),	1428 __ movl(current_character(),

1429 Operand(rsi, rdi, times_1, cp_offset * sizeof(uc16)));	1429 Operand(rsi, rdi, times_1, cp_offset * sizeof(uc16)));

1430 } else {	1430 } else {

1431 DCHECK(characters == 1);	1431 DCHECK(characters == 1);

1432 __ movzxwl(current_character(),	1432 __ movzxwl(current_character(),

1433 Operand(rsi, rdi, times_1, cp_offset * sizeof(uc16)));	1433 Operand(rsi, rdi, times_1, cp_offset * sizeof(uc16)));

1434 }	1434 }

1435 }	1435 }

1436 }	1436 }

1437	1437

1438 #undef __	1438 #undef __

1439	1439

1440 #endif // V8_INTERPRETED_REGEXP	1440 #endif // V8_INTERPRETED_REGEXP

1441	1441

1442 }} // namespace v8::internal	1442 }} // namespace v8::internal

1443	1443

1444 #endif // V8_TARGET_ARCH_X64	1444 #endif // V8_TARGET_ARCH_X64

OLD	NEW

« src/jsregexp.cc ('K') | « src/x64/regexp-macro-assembler-x64.h ('k') | test/cctest/test-alloc.cc » ('j') | no next file with comments »