OLD | NEW |
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "src/v8.h" | 5 #include "src/v8.h" |
6 | 6 |
7 #if V8_TARGET_ARCH_X64 | 7 #if V8_TARGET_ARCH_X64 |
8 | 8 |
9 #include "src/cpu-profiler.h" | 9 #include "src/cpu-profiler.h" |
10 #include "src/log.h" | 10 #include "src/log.h" |
11 #include "src/macro-assembler.h" | 11 #include "src/macro-assembler.h" |
12 #include "src/regexp-macro-assembler.h" | 12 #include "src/regexp-macro-assembler.h" |
13 #include "src/regexp-stack.h" | 13 #include "src/regexp-stack.h" |
14 #include "src/serialize.h" | 14 #include "src/serialize.h" |
15 #include "src/unicode.h" | 15 #include "src/unicode.h" |
16 #include "src/x64/regexp-macro-assembler-x64.h" | 16 #include "src/x64/regexp-macro-assembler-x64.h" |
17 | 17 |
18 namespace v8 { | 18 namespace v8 { |
19 namespace internal { | 19 namespace internal { |
20 | 20 |
21 #ifndef V8_INTERPRETED_REGEXP | 21 #ifndef V8_INTERPRETED_REGEXP |
22 | 22 |
23 /* | 23 /* |
24 * This assembler uses the following register assignment convention | 24 * This assembler uses the following register assignment convention |
25 * - rdx : Currently loaded character(s) as ASCII or UC16. Must be loaded | 25 * - rdx : Currently loaded character(s) as Latin1 or UC16. Must be loaded |
26 * using LoadCurrentCharacter before using any of the dispatch methods. | 26 * using LoadCurrentCharacter before using any of the dispatch methods. |
27 * Temporarily stores the index of capture start after a matching pass | 27 * Temporarily stores the index of capture start after a matching pass |
28 * for a global regexp. | 28 * for a global regexp. |
29 * - rdi : Current position in input, as negative offset from end of string. | 29 * - rdi : Current position in input, as negative offset from end of string. |
30 * Please notice that this is the byte offset, not the character | 30 * Please notice that this is the byte offset, not the character |
31 * offset! Is always a 32-bit signed (negative) offset, but must be | 31 * offset! Is always a 32-bit signed (negative) offset, but must be |
32 * maintained sign-extended to 64 bits, since it is used as index. | 32 * maintained sign-extended to 64 bits, since it is used as index. |
33 * - rsi : End of input (points to byte after last character in input), | 33 * - rsi : End of input (points to byte after last character in input), |
34 * so that rsi+rdi points to the current character. | 34 * so that rsi+rdi points to the current character. |
35 * - rbp : Frame pointer. Used to access arguments, local variables and | 35 * - rbp : Frame pointer. Used to access arguments, local variables and |
(...skipping 201 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
237 __ j(equal, &fallthrough); | 237 __ j(equal, &fallthrough); |
238 | 238 |
239 // ----------------------- | 239 // ----------------------- |
240 // rdx - Start of capture | 240 // rdx - Start of capture |
241 // rbx - length of capture | 241 // rbx - length of capture |
242 // Check that there are sufficient characters left in the input. | 242 // Check that there are sufficient characters left in the input. |
243 __ movl(rax, rdi); | 243 __ movl(rax, rdi); |
244 __ addl(rax, rbx); | 244 __ addl(rax, rbx); |
245 BranchOrBacktrack(greater, on_no_match); | 245 BranchOrBacktrack(greater, on_no_match); |
246 | 246 |
247 if (mode_ == ASCII) { | 247 if (mode_ == LATIN1) { |
248 Label loop_increment; | 248 Label loop_increment; |
249 if (on_no_match == NULL) { | 249 if (on_no_match == NULL) { |
250 on_no_match = &backtrack_label_; | 250 on_no_match = &backtrack_label_; |
251 } | 251 } |
252 | 252 |
253 __ leap(r9, Operand(rsi, rdx, times_1, 0)); | 253 __ leap(r9, Operand(rsi, rdx, times_1, 0)); |
254 __ leap(r11, Operand(rsi, rdi, times_1, 0)); | 254 __ leap(r11, Operand(rsi, rdi, times_1, 0)); |
255 __ addp(rbx, r9); // End of capture | 255 __ addp(rbx, r9); // End of capture |
256 // --------------------- | 256 // --------------------- |
257 // r11 - current input character address | 257 // r11 - current input character address |
(...skipping 135 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
393 __ addp(rdx, rsi); // Start of capture. | 393 __ addp(rdx, rsi); // Start of capture. |
394 __ leap(r9, Operand(rdx, rax, times_1, 0)); // End of capture | 394 __ leap(r9, Operand(rdx, rax, times_1, 0)); // End of capture |
395 | 395 |
396 // ----------------------- | 396 // ----------------------- |
397 // rbx - current capture character address. | 397 // rbx - current capture character address. |
398 // rbx - current input character address . | 398 // rbx - current input character address . |
399 // r9 - end of input to match (capture length after rbx). | 399 // r9 - end of input to match (capture length after rbx). |
400 | 400 |
401 Label loop; | 401 Label loop; |
402 __ bind(&loop); | 402 __ bind(&loop); |
403 if (mode_ == ASCII) { | 403 if (mode_ == LATIN1) { |
404 __ movzxbl(rax, Operand(rdx, 0)); | 404 __ movzxbl(rax, Operand(rdx, 0)); |
405 __ cmpb(rax, Operand(rbx, 0)); | 405 __ cmpb(rax, Operand(rbx, 0)); |
406 } else { | 406 } else { |
407 DCHECK(mode_ == UC16); | 407 DCHECK(mode_ == UC16); |
408 __ movzxwl(rax, Operand(rdx, 0)); | 408 __ movzxwl(rax, Operand(rdx, 0)); |
409 __ cmpw(rax, Operand(rbx, 0)); | 409 __ cmpw(rax, Operand(rbx, 0)); |
410 } | 410 } |
411 BranchOrBacktrack(not_equal, on_no_match); | 411 BranchOrBacktrack(not_equal, on_no_match); |
412 // Increment pointers into capture and match string. | 412 // Increment pointers into capture and match string. |
413 __ addp(rbx, Immediate(char_size())); | 413 __ addp(rbx, Immediate(char_size())); |
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
491 __ cmpl(rax, Immediate(to - from)); | 491 __ cmpl(rax, Immediate(to - from)); |
492 BranchOrBacktrack(above, on_not_in_range); | 492 BranchOrBacktrack(above, on_not_in_range); |
493 } | 493 } |
494 | 494 |
495 | 495 |
496 void RegExpMacroAssemblerX64::CheckBitInTable( | 496 void RegExpMacroAssemblerX64::CheckBitInTable( |
497 Handle<ByteArray> table, | 497 Handle<ByteArray> table, |
498 Label* on_bit_set) { | 498 Label* on_bit_set) { |
499 __ Move(rax, table); | 499 __ Move(rax, table); |
500 Register index = current_character(); | 500 Register index = current_character(); |
501 if (mode_ != ASCII || kTableMask != String::kMaxOneByteCharCode) { | 501 if (mode_ != LATIN1 || kTableMask != String::kMaxOneByteCharCode) { |
502 __ movp(rbx, current_character()); | 502 __ movp(rbx, current_character()); |
503 __ andp(rbx, Immediate(kTableMask)); | 503 __ andp(rbx, Immediate(kTableMask)); |
504 index = rbx; | 504 index = rbx; |
505 } | 505 } |
506 __ cmpb(FieldOperand(rax, index, times_1, ByteArray::kHeaderSize), | 506 __ cmpb(FieldOperand(rax, index, times_1, ByteArray::kHeaderSize), |
507 Immediate(0)); | 507 Immediate(0)); |
508 BranchOrBacktrack(not_equal, on_bit_set); | 508 BranchOrBacktrack(not_equal, on_bit_set); |
509 } | 509 } |
510 | 510 |
511 | 511 |
512 bool RegExpMacroAssemblerX64::CheckSpecialCharacterClass(uc16 type, | 512 bool RegExpMacroAssemblerX64::CheckSpecialCharacterClass(uc16 type, |
513 Label* on_no_match) { | 513 Label* on_no_match) { |
514 // Range checks (c in min..max) are generally implemented by an unsigned | 514 // Range checks (c in min..max) are generally implemented by an unsigned |
515 // (c - min) <= (max - min) check, using the sequence: | 515 // (c - min) <= (max - min) check, using the sequence: |
516 // leap(rax, Operand(current_character(), -min)) or sub(rax, Immediate(min)) | 516 // leap(rax, Operand(current_character(), -min)) or sub(rax, Immediate(min)) |
517 // cmp(rax, Immediate(max - min)) | 517 // cmp(rax, Immediate(max - min)) |
518 switch (type) { | 518 switch (type) { |
519 case 's': | 519 case 's': |
520 // Match space-characters | 520 // Match space-characters |
521 if (mode_ == ASCII) { | 521 if (mode_ == LATIN1) { |
522 // One byte space characters are '\t'..'\r', ' ' and \u00a0. | 522 // One byte space characters are '\t'..'\r', ' ' and \u00a0. |
523 Label success; | 523 Label success; |
524 __ cmpl(current_character(), Immediate(' ')); | 524 __ cmpl(current_character(), Immediate(' ')); |
525 __ j(equal, &success, Label::kNear); | 525 __ j(equal, &success, Label::kNear); |
526 // Check range 0x09..0x0d | 526 // Check range 0x09..0x0d |
527 __ leap(rax, Operand(current_character(), -'\t')); | 527 __ leap(rax, Operand(current_character(), -'\t')); |
528 __ cmpl(rax, Immediate('\r' - '\t')); | 528 __ cmpl(rax, Immediate('\r' - '\t')); |
529 __ j(below_equal, &success, Label::kNear); | 529 __ j(below_equal, &success, Label::kNear); |
530 // \u00a0 (NBSP). | 530 // \u00a0 (NBSP). |
531 __ cmpl(rax, Immediate(0x00a0 - '\t')); | 531 __ cmpl(rax, Immediate(0x00a0 - '\t')); |
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
567 } | 567 } |
568 return true; | 568 return true; |
569 } | 569 } |
570 case 'n': { | 570 case 'n': { |
571 // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029) | 571 // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029) |
572 __ movl(rax, current_character()); | 572 __ movl(rax, current_character()); |
573 __ xorp(rax, Immediate(0x01)); | 573 __ xorp(rax, Immediate(0x01)); |
574 // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c | 574 // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c |
575 __ subl(rax, Immediate(0x0b)); | 575 __ subl(rax, Immediate(0x0b)); |
576 __ cmpl(rax, Immediate(0x0c - 0x0b)); | 576 __ cmpl(rax, Immediate(0x0c - 0x0b)); |
577 if (mode_ == ASCII) { | 577 if (mode_ == LATIN1) { |
578 BranchOrBacktrack(above, on_no_match); | 578 BranchOrBacktrack(above, on_no_match); |
579 } else { | 579 } else { |
580 Label done; | 580 Label done; |
581 BranchOrBacktrack(below_equal, &done); | 581 BranchOrBacktrack(below_equal, &done); |
582 // Compare original value to 0x2028 and 0x2029, using the already | 582 // Compare original value to 0x2028 and 0x2029, using the already |
583 // computed (current_char ^ 0x01 - 0x0b). I.e., check for | 583 // computed (current_char ^ 0x01 - 0x0b). I.e., check for |
584 // 0x201d (0x2028 - 0x0b) or 0x201e. | 584 // 0x201d (0x2028 - 0x0b) or 0x201e. |
585 __ subl(rax, Immediate(0x2028 - 0x0b)); | 585 __ subl(rax, Immediate(0x2028 - 0x0b)); |
586 __ cmpl(rax, Immediate(0x2029 - 0x2028)); | 586 __ cmpl(rax, Immediate(0x2029 - 0x2028)); |
587 BranchOrBacktrack(above, on_no_match); | 587 BranchOrBacktrack(above, on_no_match); |
588 __ bind(&done); | 588 __ bind(&done); |
589 } | 589 } |
590 return true; | 590 return true; |
591 } | 591 } |
592 case 'w': { | 592 case 'w': { |
593 if (mode_ != ASCII) { | 593 if (mode_ != LATIN1) { |
594 // Table is 128 entries, so all ASCII characters can be tested. | 594 // Table is 256 entries, so all Latin1 characters can be tested. |
595 __ cmpl(current_character(), Immediate('z')); | 595 __ cmpl(current_character(), Immediate('z')); |
596 BranchOrBacktrack(above, on_no_match); | 596 BranchOrBacktrack(above, on_no_match); |
597 } | 597 } |
598 __ Move(rbx, ExternalReference::re_word_character_map()); | 598 __ Move(rbx, ExternalReference::re_word_character_map()); |
599 DCHECK_EQ(0, word_character_map[0]); // Character '\0' is not a word char. | 599 DCHECK_EQ(0, word_character_map[0]); // Character '\0' is not a word char. |
600 __ testb(Operand(rbx, current_character(), times_1, 0), | 600 __ testb(Operand(rbx, current_character(), times_1, 0), |
601 current_character()); | 601 current_character()); |
602 BranchOrBacktrack(zero, on_no_match); | 602 BranchOrBacktrack(zero, on_no_match); |
603 return true; | 603 return true; |
604 } | 604 } |
605 case 'W': { | 605 case 'W': { |
606 Label done; | 606 Label done; |
607 if (mode_ != ASCII) { | 607 if (mode_ != LATIN1) { |
608 // Table is 128 entries, so all ASCII characters can be tested. | 608 // Table is 256 entries, so all Latin1 characters can be tested. |
609 __ cmpl(current_character(), Immediate('z')); | 609 __ cmpl(current_character(), Immediate('z')); |
610 __ j(above, &done); | 610 __ j(above, &done); |
611 } | 611 } |
612 __ Move(rbx, ExternalReference::re_word_character_map()); | 612 __ Move(rbx, ExternalReference::re_word_character_map()); |
613 DCHECK_EQ(0, word_character_map[0]); // Character '\0' is not a word char. | 613 DCHECK_EQ(0, word_character_map[0]); // Character '\0' is not a word char. |
614 __ testb(Operand(rbx, current_character(), times_1, 0), | 614 __ testb(Operand(rbx, current_character(), times_1, 0), |
615 current_character()); | 615 current_character()); |
616 BranchOrBacktrack(not_zero, on_no_match); | 616 BranchOrBacktrack(not_zero, on_no_match); |
617 if (mode_ != ASCII) { | 617 if (mode_ != LATIN1) { |
618 __ bind(&done); | 618 __ bind(&done); |
619 } | 619 } |
620 return true; | 620 return true; |
621 } | 621 } |
622 | 622 |
623 case '*': | 623 case '*': |
624 // Match any character. | 624 // Match any character. |
625 return true; | 625 return true; |
626 // No custom implementation (yet): s(UC16), S(UC16). | 626 // No custom implementation (yet): s(UC16), S(UC16). |
627 default: | 627 default: |
(...skipping 570 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1198 return RETRY; | 1198 return RETRY; |
1199 } | 1199 } |
1200 | 1200 |
1201 // Prepare for possible GC. | 1201 // Prepare for possible GC. |
1202 HandleScope handles(isolate); | 1202 HandleScope handles(isolate); |
1203 Handle<Code> code_handle(re_code); | 1203 Handle<Code> code_handle(re_code); |
1204 | 1204 |
1205 Handle<String> subject(frame_entry<String*>(re_frame, kInputString)); | 1205 Handle<String> subject(frame_entry<String*>(re_frame, kInputString)); |
1206 | 1206 |
1207 // Current string. | 1207 // Current string. |
1208 bool is_ascii = subject->IsOneByteRepresentationUnderneath(); | 1208 bool is_one_byte = subject->IsOneByteRepresentationUnderneath(); |
1209 | 1209 |
1210 DCHECK(re_code->instruction_start() <= *return_address); | 1210 DCHECK(re_code->instruction_start() <= *return_address); |
1211 DCHECK(*return_address <= | 1211 DCHECK(*return_address <= |
1212 re_code->instruction_start() + re_code->instruction_size()); | 1212 re_code->instruction_start() + re_code->instruction_size()); |
1213 | 1213 |
1214 Object* result = isolate->stack_guard()->HandleInterrupts(); | 1214 Object* result = isolate->stack_guard()->HandleInterrupts(); |
1215 | 1215 |
1216 if (*code_handle != re_code) { // Return address no longer valid | 1216 if (*code_handle != re_code) { // Return address no longer valid |
1217 intptr_t delta = code_handle->address() - re_code->address(); | 1217 intptr_t delta = code_handle->address() - re_code->address(); |
1218 // Overwrite the return address on the stack. | 1218 // Overwrite the return address on the stack. |
(...skipping 10 matching lines...) Expand all Loading... |
1229 // Extract the underlying string and the slice offset. | 1229 // Extract the underlying string and the slice offset. |
1230 if (StringShape(*subject_tmp).IsCons()) { | 1230 if (StringShape(*subject_tmp).IsCons()) { |
1231 subject_tmp = Handle<String>(ConsString::cast(*subject_tmp)->first()); | 1231 subject_tmp = Handle<String>(ConsString::cast(*subject_tmp)->first()); |
1232 } else if (StringShape(*subject_tmp).IsSliced()) { | 1232 } else if (StringShape(*subject_tmp).IsSliced()) { |
1233 SlicedString* slice = SlicedString::cast(*subject_tmp); | 1233 SlicedString* slice = SlicedString::cast(*subject_tmp); |
1234 subject_tmp = Handle<String>(slice->parent()); | 1234 subject_tmp = Handle<String>(slice->parent()); |
1235 slice_offset = slice->offset(); | 1235 slice_offset = slice->offset(); |
1236 } | 1236 } |
1237 | 1237 |
1238 // String might have changed. | 1238 // String might have changed. |
1239 if (subject_tmp->IsOneByteRepresentation() != is_ascii) { | 1239 if (subject_tmp->IsOneByteRepresentation() != is_one_byte) { |
1240 // If we changed between an ASCII and an UC16 string, the specialized | 1240 // If we changed between an Latin1 and an UC16 string, the specialized |
1241 // code cannot be used, and we need to restart regexp matching from | 1241 // code cannot be used, and we need to restart regexp matching from |
1242 // scratch (including, potentially, compiling a new version of the code). | 1242 // scratch (including, potentially, compiling a new version of the code). |
1243 return RETRY; | 1243 return RETRY; |
1244 } | 1244 } |
1245 | 1245 |
1246 // Otherwise, the content of the string might have moved. It must still | 1246 // Otherwise, the content of the string might have moved. It must still |
1247 // be a sequential or external string with the same content. | 1247 // be a sequential or external string with the same content. |
1248 // Update the start and end pointers in the stack frame to the current | 1248 // Update the start and end pointers in the stack frame to the current |
1249 // location (whether it has actually moved or not). | 1249 // location (whether it has actually moved or not). |
1250 DCHECK(StringShape(*subject_tmp).IsSequential() || | 1250 DCHECK(StringShape(*subject_tmp).IsSequential() || |
(...skipping 155 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1406 __ j(above, &no_stack_overflow); | 1406 __ j(above, &no_stack_overflow); |
1407 | 1407 |
1408 SafeCall(&stack_overflow_label_); | 1408 SafeCall(&stack_overflow_label_); |
1409 | 1409 |
1410 __ bind(&no_stack_overflow); | 1410 __ bind(&no_stack_overflow); |
1411 } | 1411 } |
1412 | 1412 |
1413 | 1413 |
1414 void RegExpMacroAssemblerX64::LoadCurrentCharacterUnchecked(int cp_offset, | 1414 void RegExpMacroAssemblerX64::LoadCurrentCharacterUnchecked(int cp_offset, |
1415 int characters) { | 1415 int characters) { |
1416 if (mode_ == ASCII) { | 1416 if (mode_ == LATIN1) { |
1417 if (characters == 4) { | 1417 if (characters == 4) { |
1418 __ movl(current_character(), Operand(rsi, rdi, times_1, cp_offset)); | 1418 __ movl(current_character(), Operand(rsi, rdi, times_1, cp_offset)); |
1419 } else if (characters == 2) { | 1419 } else if (characters == 2) { |
1420 __ movzxwl(current_character(), Operand(rsi, rdi, times_1, cp_offset)); | 1420 __ movzxwl(current_character(), Operand(rsi, rdi, times_1, cp_offset)); |
1421 } else { | 1421 } else { |
1422 DCHECK(characters == 1); | 1422 DCHECK(characters == 1); |
1423 __ movzxbl(current_character(), Operand(rsi, rdi, times_1, cp_offset)); | 1423 __ movzxbl(current_character(), Operand(rsi, rdi, times_1, cp_offset)); |
1424 } | 1424 } |
1425 } else { | 1425 } else { |
1426 DCHECK(mode_ == UC16); | 1426 DCHECK(mode_ == UC16); |
1427 if (characters == 2) { | 1427 if (characters == 2) { |
1428 __ movl(current_character(), | 1428 __ movl(current_character(), |
1429 Operand(rsi, rdi, times_1, cp_offset * sizeof(uc16))); | 1429 Operand(rsi, rdi, times_1, cp_offset * sizeof(uc16))); |
1430 } else { | 1430 } else { |
1431 DCHECK(characters == 1); | 1431 DCHECK(characters == 1); |
1432 __ movzxwl(current_character(), | 1432 __ movzxwl(current_character(), |
1433 Operand(rsi, rdi, times_1, cp_offset * sizeof(uc16))); | 1433 Operand(rsi, rdi, times_1, cp_offset * sizeof(uc16))); |
1434 } | 1434 } |
1435 } | 1435 } |
1436 } | 1436 } |
1437 | 1437 |
1438 #undef __ | 1438 #undef __ |
1439 | 1439 |
1440 #endif // V8_INTERPRETED_REGEXP | 1440 #endif // V8_INTERPRETED_REGEXP |
1441 | 1441 |
1442 }} // namespace v8::internal | 1442 }} // namespace v8::internal |
1443 | 1443 |
1444 #endif // V8_TARGET_ARCH_X64 | 1444 #endif // V8_TARGET_ARCH_X64 |
OLD | NEW |