| OLD | NEW |
| 1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "src/v8.h" | 5 #include "src/v8.h" |
| 6 | 6 |
| 7 #if V8_TARGET_ARCH_X64 | 7 #if V8_TARGET_ARCH_X64 |
| 8 | 8 |
| 9 #include "src/cpu-profiler.h" | 9 #include "src/cpu-profiler.h" |
| 10 #include "src/log.h" | 10 #include "src/log.h" |
| 11 #include "src/macro-assembler.h" | 11 #include "src/macro-assembler.h" |
| 12 #include "src/regexp-macro-assembler.h" | 12 #include "src/regexp-macro-assembler.h" |
| 13 #include "src/regexp-stack.h" | 13 #include "src/regexp-stack.h" |
| 14 #include "src/serialize.h" | 14 #include "src/serialize.h" |
| 15 #include "src/unicode.h" | 15 #include "src/unicode.h" |
| 16 #include "src/x64/regexp-macro-assembler-x64.h" | 16 #include "src/x64/regexp-macro-assembler-x64.h" |
| 17 | 17 |
| 18 namespace v8 { | 18 namespace v8 { |
| 19 namespace internal { | 19 namespace internal { |
| 20 | 20 |
| 21 #ifndef V8_INTERPRETED_REGEXP | 21 #ifndef V8_INTERPRETED_REGEXP |
| 22 | 22 |
| 23 /* | 23 /* |
| 24 * This assembler uses the following register assignment convention | 24 * This assembler uses the following register assignment convention |
| 25 * - rdx : Currently loaded character(s) as ASCII or UC16. Must be loaded | 25 * - rdx : Currently loaded character(s) as Latin1 or UC16. Must be loaded |
| 26 * using LoadCurrentCharacter before using any of the dispatch methods. | 26 * using LoadCurrentCharacter before using any of the dispatch methods. |
| 27 * Temporarily stores the index of capture start after a matching pass | 27 * Temporarily stores the index of capture start after a matching pass |
| 28 * for a global regexp. | 28 * for a global regexp. |
| 29 * - rdi : Current position in input, as negative offset from end of string. | 29 * - rdi : Current position in input, as negative offset from end of string. |
| 30 * Please notice that this is the byte offset, not the character | 30 * Please notice that this is the byte offset, not the character |
| 31 * offset! Is always a 32-bit signed (negative) offset, but must be | 31 * offset! Is always a 32-bit signed (negative) offset, but must be |
| 32 * maintained sign-extended to 64 bits, since it is used as index. | 32 * maintained sign-extended to 64 bits, since it is used as index. |
| 33 * - rsi : End of input (points to byte after last character in input), | 33 * - rsi : End of input (points to byte after last character in input), |
| 34 * so that rsi+rdi points to the current character. | 34 * so that rsi+rdi points to the current character. |
| 35 * - rbp : Frame pointer. Used to access arguments, local variables and | 35 * - rbp : Frame pointer. Used to access arguments, local variables and |
| (...skipping 201 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 237 __ j(equal, &fallthrough); | 237 __ j(equal, &fallthrough); |
| 238 | 238 |
| 239 // ----------------------- | 239 // ----------------------- |
| 240 // rdx - Start of capture | 240 // rdx - Start of capture |
| 241 // rbx - length of capture | 241 // rbx - length of capture |
| 242 // Check that there are sufficient characters left in the input. | 242 // Check that there are sufficient characters left in the input. |
| 243 __ movl(rax, rdi); | 243 __ movl(rax, rdi); |
| 244 __ addl(rax, rbx); | 244 __ addl(rax, rbx); |
| 245 BranchOrBacktrack(greater, on_no_match); | 245 BranchOrBacktrack(greater, on_no_match); |
| 246 | 246 |
| 247 if (mode_ == ASCII) { | 247 if (mode_ == LATIN1) { |
| 248 Label loop_increment; | 248 Label loop_increment; |
| 249 if (on_no_match == NULL) { | 249 if (on_no_match == NULL) { |
| 250 on_no_match = &backtrack_label_; | 250 on_no_match = &backtrack_label_; |
| 251 } | 251 } |
| 252 | 252 |
| 253 __ leap(r9, Operand(rsi, rdx, times_1, 0)); | 253 __ leap(r9, Operand(rsi, rdx, times_1, 0)); |
| 254 __ leap(r11, Operand(rsi, rdi, times_1, 0)); | 254 __ leap(r11, Operand(rsi, rdi, times_1, 0)); |
| 255 __ addp(rbx, r9); // End of capture | 255 __ addp(rbx, r9); // End of capture |
| 256 // --------------------- | 256 // --------------------- |
| 257 // r11 - current input character address | 257 // r11 - current input character address |
| (...skipping 135 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 393 __ addp(rdx, rsi); // Start of capture. | 393 __ addp(rdx, rsi); // Start of capture. |
| 394 __ leap(r9, Operand(rdx, rax, times_1, 0)); // End of capture | 394 __ leap(r9, Operand(rdx, rax, times_1, 0)); // End of capture |
| 395 | 395 |
| 396 // ----------------------- | 396 // ----------------------- |
| 397 // rbx - current capture character address. | 397 // rbx - current capture character address. |
| 398 // rbx - current input character address . | 398 // rbx - current input character address . |
| 399 // r9 - end of input to match (capture length after rbx). | 399 // r9 - end of input to match (capture length after rbx). |
| 400 | 400 |
| 401 Label loop; | 401 Label loop; |
| 402 __ bind(&loop); | 402 __ bind(&loop); |
| 403 if (mode_ == ASCII) { | 403 if (mode_ == LATIN1) { |
| 404 __ movzxbl(rax, Operand(rdx, 0)); | 404 __ movzxbl(rax, Operand(rdx, 0)); |
| 405 __ cmpb(rax, Operand(rbx, 0)); | 405 __ cmpb(rax, Operand(rbx, 0)); |
| 406 } else { | 406 } else { |
| 407 DCHECK(mode_ == UC16); | 407 DCHECK(mode_ == UC16); |
| 408 __ movzxwl(rax, Operand(rdx, 0)); | 408 __ movzxwl(rax, Operand(rdx, 0)); |
| 409 __ cmpw(rax, Operand(rbx, 0)); | 409 __ cmpw(rax, Operand(rbx, 0)); |
| 410 } | 410 } |
| 411 BranchOrBacktrack(not_equal, on_no_match); | 411 BranchOrBacktrack(not_equal, on_no_match); |
| 412 // Increment pointers into capture and match string. | 412 // Increment pointers into capture and match string. |
| 413 __ addp(rbx, Immediate(char_size())); | 413 __ addp(rbx, Immediate(char_size())); |
| (...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 491 __ cmpl(rax, Immediate(to - from)); | 491 __ cmpl(rax, Immediate(to - from)); |
| 492 BranchOrBacktrack(above, on_not_in_range); | 492 BranchOrBacktrack(above, on_not_in_range); |
| 493 } | 493 } |
| 494 | 494 |
| 495 | 495 |
| 496 void RegExpMacroAssemblerX64::CheckBitInTable( | 496 void RegExpMacroAssemblerX64::CheckBitInTable( |
| 497 Handle<ByteArray> table, | 497 Handle<ByteArray> table, |
| 498 Label* on_bit_set) { | 498 Label* on_bit_set) { |
| 499 __ Move(rax, table); | 499 __ Move(rax, table); |
| 500 Register index = current_character(); | 500 Register index = current_character(); |
| 501 if (mode_ != ASCII || kTableMask != String::kMaxOneByteCharCode) { | 501 if (mode_ != LATIN1 || kTableMask != String::kMaxOneByteCharCode) { |
| 502 __ movp(rbx, current_character()); | 502 __ movp(rbx, current_character()); |
| 503 __ andp(rbx, Immediate(kTableMask)); | 503 __ andp(rbx, Immediate(kTableMask)); |
| 504 index = rbx; | 504 index = rbx; |
| 505 } | 505 } |
| 506 __ cmpb(FieldOperand(rax, index, times_1, ByteArray::kHeaderSize), | 506 __ cmpb(FieldOperand(rax, index, times_1, ByteArray::kHeaderSize), |
| 507 Immediate(0)); | 507 Immediate(0)); |
| 508 BranchOrBacktrack(not_equal, on_bit_set); | 508 BranchOrBacktrack(not_equal, on_bit_set); |
| 509 } | 509 } |
| 510 | 510 |
| 511 | 511 |
| 512 bool RegExpMacroAssemblerX64::CheckSpecialCharacterClass(uc16 type, | 512 bool RegExpMacroAssemblerX64::CheckSpecialCharacterClass(uc16 type, |
| 513 Label* on_no_match) { | 513 Label* on_no_match) { |
| 514 // Range checks (c in min..max) are generally implemented by an unsigned | 514 // Range checks (c in min..max) are generally implemented by an unsigned |
| 515 // (c - min) <= (max - min) check, using the sequence: | 515 // (c - min) <= (max - min) check, using the sequence: |
| 516 // leap(rax, Operand(current_character(), -min)) or sub(rax, Immediate(min)) | 516 // leap(rax, Operand(current_character(), -min)) or sub(rax, Immediate(min)) |
| 517 // cmp(rax, Immediate(max - min)) | 517 // cmp(rax, Immediate(max - min)) |
| 518 switch (type) { | 518 switch (type) { |
| 519 case 's': | 519 case 's': |
| 520 // Match space-characters | 520 // Match space-characters |
| 521 if (mode_ == ASCII) { | 521 if (mode_ == LATIN1) { |
| 522 // One byte space characters are '\t'..'\r', ' ' and \u00a0. | 522 // One byte space characters are '\t'..'\r', ' ' and \u00a0. |
| 523 Label success; | 523 Label success; |
| 524 __ cmpl(current_character(), Immediate(' ')); | 524 __ cmpl(current_character(), Immediate(' ')); |
| 525 __ j(equal, &success, Label::kNear); | 525 __ j(equal, &success, Label::kNear); |
| 526 // Check range 0x09..0x0d | 526 // Check range 0x09..0x0d |
| 527 __ leap(rax, Operand(current_character(), -'\t')); | 527 __ leap(rax, Operand(current_character(), -'\t')); |
| 528 __ cmpl(rax, Immediate('\r' - '\t')); | 528 __ cmpl(rax, Immediate('\r' - '\t')); |
| 529 __ j(below_equal, &success, Label::kNear); | 529 __ j(below_equal, &success, Label::kNear); |
| 530 // \u00a0 (NBSP). | 530 // \u00a0 (NBSP). |
| 531 __ cmpl(rax, Immediate(0x00a0 - '\t')); | 531 __ cmpl(rax, Immediate(0x00a0 - '\t')); |
| (...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 567 } | 567 } |
| 568 return true; | 568 return true; |
| 569 } | 569 } |
| 570 case 'n': { | 570 case 'n': { |
| 571 // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029) | 571 // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029) |
| 572 __ movl(rax, current_character()); | 572 __ movl(rax, current_character()); |
| 573 __ xorp(rax, Immediate(0x01)); | 573 __ xorp(rax, Immediate(0x01)); |
| 574 // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c | 574 // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c |
| 575 __ subl(rax, Immediate(0x0b)); | 575 __ subl(rax, Immediate(0x0b)); |
| 576 __ cmpl(rax, Immediate(0x0c - 0x0b)); | 576 __ cmpl(rax, Immediate(0x0c - 0x0b)); |
| 577 if (mode_ == ASCII) { | 577 if (mode_ == LATIN1) { |
| 578 BranchOrBacktrack(above, on_no_match); | 578 BranchOrBacktrack(above, on_no_match); |
| 579 } else { | 579 } else { |
| 580 Label done; | 580 Label done; |
| 581 BranchOrBacktrack(below_equal, &done); | 581 BranchOrBacktrack(below_equal, &done); |
| 582 // Compare original value to 0x2028 and 0x2029, using the already | 582 // Compare original value to 0x2028 and 0x2029, using the already |
| 583 // computed (current_char ^ 0x01 - 0x0b). I.e., check for | 583 // computed (current_char ^ 0x01 - 0x0b). I.e., check for |
| 584 // 0x201d (0x2028 - 0x0b) or 0x201e. | 584 // 0x201d (0x2028 - 0x0b) or 0x201e. |
| 585 __ subl(rax, Immediate(0x2028 - 0x0b)); | 585 __ subl(rax, Immediate(0x2028 - 0x0b)); |
| 586 __ cmpl(rax, Immediate(0x2029 - 0x2028)); | 586 __ cmpl(rax, Immediate(0x2029 - 0x2028)); |
| 587 BranchOrBacktrack(above, on_no_match); | 587 BranchOrBacktrack(above, on_no_match); |
| 588 __ bind(&done); | 588 __ bind(&done); |
| 589 } | 589 } |
| 590 return true; | 590 return true; |
| 591 } | 591 } |
| 592 case 'w': { | 592 case 'w': { |
| 593 if (mode_ != ASCII) { | 593 if (mode_ != LATIN1) { |
| 594 // Table is 128 entries, so all ASCII characters can be tested. | 594 // Table is 256 entries, so all Latin1 characters can be tested. |
| 595 __ cmpl(current_character(), Immediate('z')); | 595 __ cmpl(current_character(), Immediate('z')); |
| 596 BranchOrBacktrack(above, on_no_match); | 596 BranchOrBacktrack(above, on_no_match); |
| 597 } | 597 } |
| 598 __ Move(rbx, ExternalReference::re_word_character_map()); | 598 __ Move(rbx, ExternalReference::re_word_character_map()); |
| 599 DCHECK_EQ(0, word_character_map[0]); // Character '\0' is not a word char. | 599 DCHECK_EQ(0, word_character_map[0]); // Character '\0' is not a word char. |
| 600 __ testb(Operand(rbx, current_character(), times_1, 0), | 600 __ testb(Operand(rbx, current_character(), times_1, 0), |
| 601 current_character()); | 601 current_character()); |
| 602 BranchOrBacktrack(zero, on_no_match); | 602 BranchOrBacktrack(zero, on_no_match); |
| 603 return true; | 603 return true; |
| 604 } | 604 } |
| 605 case 'W': { | 605 case 'W': { |
| 606 Label done; | 606 Label done; |
| 607 if (mode_ != ASCII) { | 607 if (mode_ != LATIN1) { |
| 608 // Table is 128 entries, so all ASCII characters can be tested. | 608 // Table is 256 entries, so all Latin1 characters can be tested. |
| 609 __ cmpl(current_character(), Immediate('z')); | 609 __ cmpl(current_character(), Immediate('z')); |
| 610 __ j(above, &done); | 610 __ j(above, &done); |
| 611 } | 611 } |
| 612 __ Move(rbx, ExternalReference::re_word_character_map()); | 612 __ Move(rbx, ExternalReference::re_word_character_map()); |
| 613 DCHECK_EQ(0, word_character_map[0]); // Character '\0' is not a word char. | 613 DCHECK_EQ(0, word_character_map[0]); // Character '\0' is not a word char. |
| 614 __ testb(Operand(rbx, current_character(), times_1, 0), | 614 __ testb(Operand(rbx, current_character(), times_1, 0), |
| 615 current_character()); | 615 current_character()); |
| 616 BranchOrBacktrack(not_zero, on_no_match); | 616 BranchOrBacktrack(not_zero, on_no_match); |
| 617 if (mode_ != ASCII) { | 617 if (mode_ != LATIN1) { |
| 618 __ bind(&done); | 618 __ bind(&done); |
| 619 } | 619 } |
| 620 return true; | 620 return true; |
| 621 } | 621 } |
| 622 | 622 |
| 623 case '*': | 623 case '*': |
| 624 // Match any character. | 624 // Match any character. |
| 625 return true; | 625 return true; |
| 626 // No custom implementation (yet): s(UC16), S(UC16). | 626 // No custom implementation (yet): s(UC16), S(UC16). |
| 627 default: | 627 default: |
| (...skipping 570 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1198 return RETRY; | 1198 return RETRY; |
| 1199 } | 1199 } |
| 1200 | 1200 |
| 1201 // Prepare for possible GC. | 1201 // Prepare for possible GC. |
| 1202 HandleScope handles(isolate); | 1202 HandleScope handles(isolate); |
| 1203 Handle<Code> code_handle(re_code); | 1203 Handle<Code> code_handle(re_code); |
| 1204 | 1204 |
| 1205 Handle<String> subject(frame_entry<String*>(re_frame, kInputString)); | 1205 Handle<String> subject(frame_entry<String*>(re_frame, kInputString)); |
| 1206 | 1206 |
| 1207 // Current string. | 1207 // Current string. |
| 1208 bool is_ascii = subject->IsOneByteRepresentationUnderneath(); | 1208 bool is_one_byte = subject->IsOneByteRepresentationUnderneath(); |
| 1209 | 1209 |
| 1210 DCHECK(re_code->instruction_start() <= *return_address); | 1210 DCHECK(re_code->instruction_start() <= *return_address); |
| 1211 DCHECK(*return_address <= | 1211 DCHECK(*return_address <= |
| 1212 re_code->instruction_start() + re_code->instruction_size()); | 1212 re_code->instruction_start() + re_code->instruction_size()); |
| 1213 | 1213 |
| 1214 Object* result = isolate->stack_guard()->HandleInterrupts(); | 1214 Object* result = isolate->stack_guard()->HandleInterrupts(); |
| 1215 | 1215 |
| 1216 if (*code_handle != re_code) { // Return address no longer valid | 1216 if (*code_handle != re_code) { // Return address no longer valid |
| 1217 intptr_t delta = code_handle->address() - re_code->address(); | 1217 intptr_t delta = code_handle->address() - re_code->address(); |
| 1218 // Overwrite the return address on the stack. | 1218 // Overwrite the return address on the stack. |
| (...skipping 10 matching lines...) Expand all Loading... |
| 1229 // Extract the underlying string and the slice offset. | 1229 // Extract the underlying string and the slice offset. |
| 1230 if (StringShape(*subject_tmp).IsCons()) { | 1230 if (StringShape(*subject_tmp).IsCons()) { |
| 1231 subject_tmp = Handle<String>(ConsString::cast(*subject_tmp)->first()); | 1231 subject_tmp = Handle<String>(ConsString::cast(*subject_tmp)->first()); |
| 1232 } else if (StringShape(*subject_tmp).IsSliced()) { | 1232 } else if (StringShape(*subject_tmp).IsSliced()) { |
| 1233 SlicedString* slice = SlicedString::cast(*subject_tmp); | 1233 SlicedString* slice = SlicedString::cast(*subject_tmp); |
| 1234 subject_tmp = Handle<String>(slice->parent()); | 1234 subject_tmp = Handle<String>(slice->parent()); |
| 1235 slice_offset = slice->offset(); | 1235 slice_offset = slice->offset(); |
| 1236 } | 1236 } |
| 1237 | 1237 |
| 1238 // String might have changed. | 1238 // String might have changed. |
| 1239 if (subject_tmp->IsOneByteRepresentation() != is_ascii) { | 1239 if (subject_tmp->IsOneByteRepresentation() != is_one_byte) { |
| 1240 // If we changed between an ASCII and an UC16 string, the specialized | 1240 // If we changed between an Latin1 and an UC16 string, the specialized |
| 1241 // code cannot be used, and we need to restart regexp matching from | 1241 // code cannot be used, and we need to restart regexp matching from |
| 1242 // scratch (including, potentially, compiling a new version of the code). | 1242 // scratch (including, potentially, compiling a new version of the code). |
| 1243 return RETRY; | 1243 return RETRY; |
| 1244 } | 1244 } |
| 1245 | 1245 |
| 1246 // Otherwise, the content of the string might have moved. It must still | 1246 // Otherwise, the content of the string might have moved. It must still |
| 1247 // be a sequential or external string with the same content. | 1247 // be a sequential or external string with the same content. |
| 1248 // Update the start and end pointers in the stack frame to the current | 1248 // Update the start and end pointers in the stack frame to the current |
| 1249 // location (whether it has actually moved or not). | 1249 // location (whether it has actually moved or not). |
| 1250 DCHECK(StringShape(*subject_tmp).IsSequential() || | 1250 DCHECK(StringShape(*subject_tmp).IsSequential() || |
| (...skipping 155 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1406 __ j(above, &no_stack_overflow); | 1406 __ j(above, &no_stack_overflow); |
| 1407 | 1407 |
| 1408 SafeCall(&stack_overflow_label_); | 1408 SafeCall(&stack_overflow_label_); |
| 1409 | 1409 |
| 1410 __ bind(&no_stack_overflow); | 1410 __ bind(&no_stack_overflow); |
| 1411 } | 1411 } |
| 1412 | 1412 |
| 1413 | 1413 |
| 1414 void RegExpMacroAssemblerX64::LoadCurrentCharacterUnchecked(int cp_offset, | 1414 void RegExpMacroAssemblerX64::LoadCurrentCharacterUnchecked(int cp_offset, |
| 1415 int characters) { | 1415 int characters) { |
| 1416 if (mode_ == ASCII) { | 1416 if (mode_ == LATIN1) { |
| 1417 if (characters == 4) { | 1417 if (characters == 4) { |
| 1418 __ movl(current_character(), Operand(rsi, rdi, times_1, cp_offset)); | 1418 __ movl(current_character(), Operand(rsi, rdi, times_1, cp_offset)); |
| 1419 } else if (characters == 2) { | 1419 } else if (characters == 2) { |
| 1420 __ movzxwl(current_character(), Operand(rsi, rdi, times_1, cp_offset)); | 1420 __ movzxwl(current_character(), Operand(rsi, rdi, times_1, cp_offset)); |
| 1421 } else { | 1421 } else { |
| 1422 DCHECK(characters == 1); | 1422 DCHECK(characters == 1); |
| 1423 __ movzxbl(current_character(), Operand(rsi, rdi, times_1, cp_offset)); | 1423 __ movzxbl(current_character(), Operand(rsi, rdi, times_1, cp_offset)); |
| 1424 } | 1424 } |
| 1425 } else { | 1425 } else { |
| 1426 DCHECK(mode_ == UC16); | 1426 DCHECK(mode_ == UC16); |
| 1427 if (characters == 2) { | 1427 if (characters == 2) { |
| 1428 __ movl(current_character(), | 1428 __ movl(current_character(), |
| 1429 Operand(rsi, rdi, times_1, cp_offset * sizeof(uc16))); | 1429 Operand(rsi, rdi, times_1, cp_offset * sizeof(uc16))); |
| 1430 } else { | 1430 } else { |
| 1431 DCHECK(characters == 1); | 1431 DCHECK(characters == 1); |
| 1432 __ movzxwl(current_character(), | 1432 __ movzxwl(current_character(), |
| 1433 Operand(rsi, rdi, times_1, cp_offset * sizeof(uc16))); | 1433 Operand(rsi, rdi, times_1, cp_offset * sizeof(uc16))); |
| 1434 } | 1434 } |
| 1435 } | 1435 } |
| 1436 } | 1436 } |
| 1437 | 1437 |
| 1438 #undef __ | 1438 #undef __ |
| 1439 | 1439 |
| 1440 #endif // V8_INTERPRETED_REGEXP | 1440 #endif // V8_INTERPRETED_REGEXP |
| 1441 | 1441 |
| 1442 }} // namespace v8::internal | 1442 }} // namespace v8::internal |
| 1443 | 1443 |
| 1444 #endif // V8_TARGET_ARCH_X64 | 1444 #endif // V8_TARGET_ARCH_X64 |
| OLD | NEW |