Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(59)

Side by Side Diff: src/x64/regexp-macro-assembler-x64.cc

Issue 559913002: Rename ascii to one-byte where applicable. (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: Created 6 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/v8.h" 5 #include "src/v8.h"
6 6
7 #if V8_TARGET_ARCH_X64 7 #if V8_TARGET_ARCH_X64
8 8
9 #include "src/cpu-profiler.h" 9 #include "src/cpu-profiler.h"
10 #include "src/log.h" 10 #include "src/log.h"
11 #include "src/macro-assembler.h" 11 #include "src/macro-assembler.h"
12 #include "src/regexp-macro-assembler.h" 12 #include "src/regexp-macro-assembler.h"
13 #include "src/regexp-stack.h" 13 #include "src/regexp-stack.h"
14 #include "src/serialize.h" 14 #include "src/serialize.h"
15 #include "src/unicode.h" 15 #include "src/unicode.h"
16 #include "src/x64/regexp-macro-assembler-x64.h" 16 #include "src/x64/regexp-macro-assembler-x64.h"
17 17
18 namespace v8 { 18 namespace v8 {
19 namespace internal { 19 namespace internal {
20 20
21 #ifndef V8_INTERPRETED_REGEXP 21 #ifndef V8_INTERPRETED_REGEXP
22 22
23 /* 23 /*
24 * This assembler uses the following register assignment convention 24 * This assembler uses the following register assignment convention
25 * - rdx : Currently loaded character(s) as ASCII or UC16. Must be loaded 25 * - rdx : Currently loaded character(s) as Latin1 or UC16. Must be loaded
26 * using LoadCurrentCharacter before using any of the dispatch methods. 26 * using LoadCurrentCharacter before using any of the dispatch methods.
27 * Temporarily stores the index of capture start after a matching pass 27 * Temporarily stores the index of capture start after a matching pass
28 * for a global regexp. 28 * for a global regexp.
29 * - rdi : Current position in input, as negative offset from end of string. 29 * - rdi : Current position in input, as negative offset from end of string.
30 * Please notice that this is the byte offset, not the character 30 * Please notice that this is the byte offset, not the character
31 * offset! Is always a 32-bit signed (negative) offset, but must be 31 * offset! Is always a 32-bit signed (negative) offset, but must be
32 * maintained sign-extended to 64 bits, since it is used as index. 32 * maintained sign-extended to 64 bits, since it is used as index.
33 * - rsi : End of input (points to byte after last character in input), 33 * - rsi : End of input (points to byte after last character in input),
34 * so that rsi+rdi points to the current character. 34 * so that rsi+rdi points to the current character.
35 * - rbp : Frame pointer. Used to access arguments, local variables and 35 * - rbp : Frame pointer. Used to access arguments, local variables and
(...skipping 201 matching lines...) Expand 10 before | Expand all | Expand 10 after
237 __ j(equal, &fallthrough); 237 __ j(equal, &fallthrough);
238 238
239 // ----------------------- 239 // -----------------------
240 // rdx - Start of capture 240 // rdx - Start of capture
241 // rbx - length of capture 241 // rbx - length of capture
242 // Check that there are sufficient characters left in the input. 242 // Check that there are sufficient characters left in the input.
243 __ movl(rax, rdi); 243 __ movl(rax, rdi);
244 __ addl(rax, rbx); 244 __ addl(rax, rbx);
245 BranchOrBacktrack(greater, on_no_match); 245 BranchOrBacktrack(greater, on_no_match);
246 246
247 if (mode_ == ASCII) { 247 if (mode_ == LATIN1) {
248 Label loop_increment; 248 Label loop_increment;
249 if (on_no_match == NULL) { 249 if (on_no_match == NULL) {
250 on_no_match = &backtrack_label_; 250 on_no_match = &backtrack_label_;
251 } 251 }
252 252
253 __ leap(r9, Operand(rsi, rdx, times_1, 0)); 253 __ leap(r9, Operand(rsi, rdx, times_1, 0));
254 __ leap(r11, Operand(rsi, rdi, times_1, 0)); 254 __ leap(r11, Operand(rsi, rdi, times_1, 0));
255 __ addp(rbx, r9); // End of capture 255 __ addp(rbx, r9); // End of capture
256 // --------------------- 256 // ---------------------
257 // r11 - current input character address 257 // r11 - current input character address
(...skipping 135 matching lines...) Expand 10 before | Expand all | Expand 10 after
393 __ addp(rdx, rsi); // Start of capture. 393 __ addp(rdx, rsi); // Start of capture.
394 __ leap(r9, Operand(rdx, rax, times_1, 0)); // End of capture 394 __ leap(r9, Operand(rdx, rax, times_1, 0)); // End of capture
395 395
396 // ----------------------- 396 // -----------------------
397 // rbx - current capture character address. 397 // rbx - current capture character address.
398 // rbx - current input character address . 398 // rbx - current input character address .
399 // r9 - end of input to match (capture length after rbx). 399 // r9 - end of input to match (capture length after rbx).
400 400
401 Label loop; 401 Label loop;
402 __ bind(&loop); 402 __ bind(&loop);
403 if (mode_ == ASCII) { 403 if (mode_ == LATIN1) {
404 __ movzxbl(rax, Operand(rdx, 0)); 404 __ movzxbl(rax, Operand(rdx, 0));
405 __ cmpb(rax, Operand(rbx, 0)); 405 __ cmpb(rax, Operand(rbx, 0));
406 } else { 406 } else {
407 DCHECK(mode_ == UC16); 407 DCHECK(mode_ == UC16);
408 __ movzxwl(rax, Operand(rdx, 0)); 408 __ movzxwl(rax, Operand(rdx, 0));
409 __ cmpw(rax, Operand(rbx, 0)); 409 __ cmpw(rax, Operand(rbx, 0));
410 } 410 }
411 BranchOrBacktrack(not_equal, on_no_match); 411 BranchOrBacktrack(not_equal, on_no_match);
412 // Increment pointers into capture and match string. 412 // Increment pointers into capture and match string.
413 __ addp(rbx, Immediate(char_size())); 413 __ addp(rbx, Immediate(char_size()));
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after
491 __ cmpl(rax, Immediate(to - from)); 491 __ cmpl(rax, Immediate(to - from));
492 BranchOrBacktrack(above, on_not_in_range); 492 BranchOrBacktrack(above, on_not_in_range);
493 } 493 }
494 494
495 495
496 void RegExpMacroAssemblerX64::CheckBitInTable( 496 void RegExpMacroAssemblerX64::CheckBitInTable(
497 Handle<ByteArray> table, 497 Handle<ByteArray> table,
498 Label* on_bit_set) { 498 Label* on_bit_set) {
499 __ Move(rax, table); 499 __ Move(rax, table);
500 Register index = current_character(); 500 Register index = current_character();
501 if (mode_ != ASCII || kTableMask != String::kMaxOneByteCharCode) { 501 if (mode_ != LATIN1 || kTableMask != String::kMaxOneByteCharCode) {
502 __ movp(rbx, current_character()); 502 __ movp(rbx, current_character());
503 __ andp(rbx, Immediate(kTableMask)); 503 __ andp(rbx, Immediate(kTableMask));
504 index = rbx; 504 index = rbx;
505 } 505 }
506 __ cmpb(FieldOperand(rax, index, times_1, ByteArray::kHeaderSize), 506 __ cmpb(FieldOperand(rax, index, times_1, ByteArray::kHeaderSize),
507 Immediate(0)); 507 Immediate(0));
508 BranchOrBacktrack(not_equal, on_bit_set); 508 BranchOrBacktrack(not_equal, on_bit_set);
509 } 509 }
510 510
511 511
512 bool RegExpMacroAssemblerX64::CheckSpecialCharacterClass(uc16 type, 512 bool RegExpMacroAssemblerX64::CheckSpecialCharacterClass(uc16 type,
513 Label* on_no_match) { 513 Label* on_no_match) {
514 // Range checks (c in min..max) are generally implemented by an unsigned 514 // Range checks (c in min..max) are generally implemented by an unsigned
515 // (c - min) <= (max - min) check, using the sequence: 515 // (c - min) <= (max - min) check, using the sequence:
516 // leap(rax, Operand(current_character(), -min)) or sub(rax, Immediate(min)) 516 // leap(rax, Operand(current_character(), -min)) or sub(rax, Immediate(min))
517 // cmp(rax, Immediate(max - min)) 517 // cmp(rax, Immediate(max - min))
518 switch (type) { 518 switch (type) {
519 case 's': 519 case 's':
520 // Match space-characters 520 // Match space-characters
521 if (mode_ == ASCII) { 521 if (mode_ == LATIN1) {
522 // One byte space characters are '\t'..'\r', ' ' and \u00a0. 522 // One byte space characters are '\t'..'\r', ' ' and \u00a0.
523 Label success; 523 Label success;
524 __ cmpl(current_character(), Immediate(' ')); 524 __ cmpl(current_character(), Immediate(' '));
525 __ j(equal, &success, Label::kNear); 525 __ j(equal, &success, Label::kNear);
526 // Check range 0x09..0x0d 526 // Check range 0x09..0x0d
527 __ leap(rax, Operand(current_character(), -'\t')); 527 __ leap(rax, Operand(current_character(), -'\t'));
528 __ cmpl(rax, Immediate('\r' - '\t')); 528 __ cmpl(rax, Immediate('\r' - '\t'));
529 __ j(below_equal, &success, Label::kNear); 529 __ j(below_equal, &success, Label::kNear);
530 // \u00a0 (NBSP). 530 // \u00a0 (NBSP).
531 __ cmpl(rax, Immediate(0x00a0 - '\t')); 531 __ cmpl(rax, Immediate(0x00a0 - '\t'));
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after
567 } 567 }
568 return true; 568 return true;
569 } 569 }
570 case 'n': { 570 case 'n': {
571 // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029) 571 // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
572 __ movl(rax, current_character()); 572 __ movl(rax, current_character());
573 __ xorp(rax, Immediate(0x01)); 573 __ xorp(rax, Immediate(0x01));
574 // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c 574 // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c
575 __ subl(rax, Immediate(0x0b)); 575 __ subl(rax, Immediate(0x0b));
576 __ cmpl(rax, Immediate(0x0c - 0x0b)); 576 __ cmpl(rax, Immediate(0x0c - 0x0b));
577 if (mode_ == ASCII) { 577 if (mode_ == LATIN1) {
578 BranchOrBacktrack(above, on_no_match); 578 BranchOrBacktrack(above, on_no_match);
579 } else { 579 } else {
580 Label done; 580 Label done;
581 BranchOrBacktrack(below_equal, &done); 581 BranchOrBacktrack(below_equal, &done);
582 // Compare original value to 0x2028 and 0x2029, using the already 582 // Compare original value to 0x2028 and 0x2029, using the already
583 // computed (current_char ^ 0x01 - 0x0b). I.e., check for 583 // computed (current_char ^ 0x01 - 0x0b). I.e., check for
584 // 0x201d (0x2028 - 0x0b) or 0x201e. 584 // 0x201d (0x2028 - 0x0b) or 0x201e.
585 __ subl(rax, Immediate(0x2028 - 0x0b)); 585 __ subl(rax, Immediate(0x2028 - 0x0b));
586 __ cmpl(rax, Immediate(0x2029 - 0x2028)); 586 __ cmpl(rax, Immediate(0x2029 - 0x2028));
587 BranchOrBacktrack(above, on_no_match); 587 BranchOrBacktrack(above, on_no_match);
588 __ bind(&done); 588 __ bind(&done);
589 } 589 }
590 return true; 590 return true;
591 } 591 }
592 case 'w': { 592 case 'w': {
593 if (mode_ != ASCII) { 593 if (mode_ != LATIN1) {
594 // Table is 128 entries, so all ASCII characters can be tested. 594 // Table is 256 entries, so all Latin1 characters can be tested.
595 __ cmpl(current_character(), Immediate('z')); 595 __ cmpl(current_character(), Immediate('z'));
596 BranchOrBacktrack(above, on_no_match); 596 BranchOrBacktrack(above, on_no_match);
597 } 597 }
598 __ Move(rbx, ExternalReference::re_word_character_map()); 598 __ Move(rbx, ExternalReference::re_word_character_map());
599 DCHECK_EQ(0, word_character_map[0]); // Character '\0' is not a word char. 599 DCHECK_EQ(0, word_character_map[0]); // Character '\0' is not a word char.
600 __ testb(Operand(rbx, current_character(), times_1, 0), 600 __ testb(Operand(rbx, current_character(), times_1, 0),
601 current_character()); 601 current_character());
602 BranchOrBacktrack(zero, on_no_match); 602 BranchOrBacktrack(zero, on_no_match);
603 return true; 603 return true;
604 } 604 }
605 case 'W': { 605 case 'W': {
606 Label done; 606 Label done;
607 if (mode_ != ASCII) { 607 if (mode_ != LATIN1) {
608 // Table is 128 entries, so all ASCII characters can be tested. 608 // Table is 256 entries, so all Latin1 characters can be tested.
609 __ cmpl(current_character(), Immediate('z')); 609 __ cmpl(current_character(), Immediate('z'));
610 __ j(above, &done); 610 __ j(above, &done);
611 } 611 }
612 __ Move(rbx, ExternalReference::re_word_character_map()); 612 __ Move(rbx, ExternalReference::re_word_character_map());
613 DCHECK_EQ(0, word_character_map[0]); // Character '\0' is not a word char. 613 DCHECK_EQ(0, word_character_map[0]); // Character '\0' is not a word char.
614 __ testb(Operand(rbx, current_character(), times_1, 0), 614 __ testb(Operand(rbx, current_character(), times_1, 0),
615 current_character()); 615 current_character());
616 BranchOrBacktrack(not_zero, on_no_match); 616 BranchOrBacktrack(not_zero, on_no_match);
617 if (mode_ != ASCII) { 617 if (mode_ != LATIN1) {
618 __ bind(&done); 618 __ bind(&done);
619 } 619 }
620 return true; 620 return true;
621 } 621 }
622 622
623 case '*': 623 case '*':
624 // Match any character. 624 // Match any character.
625 return true; 625 return true;
626 // No custom implementation (yet): s(UC16), S(UC16). 626 // No custom implementation (yet): s(UC16), S(UC16).
627 default: 627 default:
(...skipping 570 matching lines...) Expand 10 before | Expand all | Expand 10 after
1198 return RETRY; 1198 return RETRY;
1199 } 1199 }
1200 1200
1201 // Prepare for possible GC. 1201 // Prepare for possible GC.
1202 HandleScope handles(isolate); 1202 HandleScope handles(isolate);
1203 Handle<Code> code_handle(re_code); 1203 Handle<Code> code_handle(re_code);
1204 1204
1205 Handle<String> subject(frame_entry<String*>(re_frame, kInputString)); 1205 Handle<String> subject(frame_entry<String*>(re_frame, kInputString));
1206 1206
1207 // Current string. 1207 // Current string.
1208 bool is_ascii = subject->IsOneByteRepresentationUnderneath(); 1208 bool is_one_byte = subject->IsOneByteRepresentationUnderneath();
1209 1209
1210 DCHECK(re_code->instruction_start() <= *return_address); 1210 DCHECK(re_code->instruction_start() <= *return_address);
1211 DCHECK(*return_address <= 1211 DCHECK(*return_address <=
1212 re_code->instruction_start() + re_code->instruction_size()); 1212 re_code->instruction_start() + re_code->instruction_size());
1213 1213
1214 Object* result = isolate->stack_guard()->HandleInterrupts(); 1214 Object* result = isolate->stack_guard()->HandleInterrupts();
1215 1215
1216 if (*code_handle != re_code) { // Return address no longer valid 1216 if (*code_handle != re_code) { // Return address no longer valid
1217 intptr_t delta = code_handle->address() - re_code->address(); 1217 intptr_t delta = code_handle->address() - re_code->address();
1218 // Overwrite the return address on the stack. 1218 // Overwrite the return address on the stack.
(...skipping 10 matching lines...) Expand all
1229 // Extract the underlying string and the slice offset. 1229 // Extract the underlying string and the slice offset.
1230 if (StringShape(*subject_tmp).IsCons()) { 1230 if (StringShape(*subject_tmp).IsCons()) {
1231 subject_tmp = Handle<String>(ConsString::cast(*subject_tmp)->first()); 1231 subject_tmp = Handle<String>(ConsString::cast(*subject_tmp)->first());
1232 } else if (StringShape(*subject_tmp).IsSliced()) { 1232 } else if (StringShape(*subject_tmp).IsSliced()) {
1233 SlicedString* slice = SlicedString::cast(*subject_tmp); 1233 SlicedString* slice = SlicedString::cast(*subject_tmp);
1234 subject_tmp = Handle<String>(slice->parent()); 1234 subject_tmp = Handle<String>(slice->parent());
1235 slice_offset = slice->offset(); 1235 slice_offset = slice->offset();
1236 } 1236 }
1237 1237
1238 // String might have changed. 1238 // String might have changed.
1239 if (subject_tmp->IsOneByteRepresentation() != is_ascii) { 1239 if (subject_tmp->IsOneByteRepresentation() != is_one_byte) {
1240 // If we changed between an ASCII and an UC16 string, the specialized 1240 // If we changed between an Latin1 and an UC16 string, the specialized
1241 // code cannot be used, and we need to restart regexp matching from 1241 // code cannot be used, and we need to restart regexp matching from
1242 // scratch (including, potentially, compiling a new version of the code). 1242 // scratch (including, potentially, compiling a new version of the code).
1243 return RETRY; 1243 return RETRY;
1244 } 1244 }
1245 1245
1246 // Otherwise, the content of the string might have moved. It must still 1246 // Otherwise, the content of the string might have moved. It must still
1247 // be a sequential or external string with the same content. 1247 // be a sequential or external string with the same content.
1248 // Update the start and end pointers in the stack frame to the current 1248 // Update the start and end pointers in the stack frame to the current
1249 // location (whether it has actually moved or not). 1249 // location (whether it has actually moved or not).
1250 DCHECK(StringShape(*subject_tmp).IsSequential() || 1250 DCHECK(StringShape(*subject_tmp).IsSequential() ||
(...skipping 155 matching lines...) Expand 10 before | Expand all | Expand 10 after
1406 __ j(above, &no_stack_overflow); 1406 __ j(above, &no_stack_overflow);
1407 1407
1408 SafeCall(&stack_overflow_label_); 1408 SafeCall(&stack_overflow_label_);
1409 1409
1410 __ bind(&no_stack_overflow); 1410 __ bind(&no_stack_overflow);
1411 } 1411 }
1412 1412
1413 1413
1414 void RegExpMacroAssemblerX64::LoadCurrentCharacterUnchecked(int cp_offset, 1414 void RegExpMacroAssemblerX64::LoadCurrentCharacterUnchecked(int cp_offset,
1415 int characters) { 1415 int characters) {
1416 if (mode_ == ASCII) { 1416 if (mode_ == LATIN1) {
1417 if (characters == 4) { 1417 if (characters == 4) {
1418 __ movl(current_character(), Operand(rsi, rdi, times_1, cp_offset)); 1418 __ movl(current_character(), Operand(rsi, rdi, times_1, cp_offset));
1419 } else if (characters == 2) { 1419 } else if (characters == 2) {
1420 __ movzxwl(current_character(), Operand(rsi, rdi, times_1, cp_offset)); 1420 __ movzxwl(current_character(), Operand(rsi, rdi, times_1, cp_offset));
1421 } else { 1421 } else {
1422 DCHECK(characters == 1); 1422 DCHECK(characters == 1);
1423 __ movzxbl(current_character(), Operand(rsi, rdi, times_1, cp_offset)); 1423 __ movzxbl(current_character(), Operand(rsi, rdi, times_1, cp_offset));
1424 } 1424 }
1425 } else { 1425 } else {
1426 DCHECK(mode_ == UC16); 1426 DCHECK(mode_ == UC16);
1427 if (characters == 2) { 1427 if (characters == 2) {
1428 __ movl(current_character(), 1428 __ movl(current_character(),
1429 Operand(rsi, rdi, times_1, cp_offset * sizeof(uc16))); 1429 Operand(rsi, rdi, times_1, cp_offset * sizeof(uc16)));
1430 } else { 1430 } else {
1431 DCHECK(characters == 1); 1431 DCHECK(characters == 1);
1432 __ movzxwl(current_character(), 1432 __ movzxwl(current_character(),
1433 Operand(rsi, rdi, times_1, cp_offset * sizeof(uc16))); 1433 Operand(rsi, rdi, times_1, cp_offset * sizeof(uc16)));
1434 } 1434 }
1435 } 1435 }
1436 } 1436 }
1437 1437
1438 #undef __ 1438 #undef __
1439 1439
1440 #endif // V8_INTERPRETED_REGEXP 1440 #endif // V8_INTERPRETED_REGEXP
1441 1441
1442 }} // namespace v8::internal 1442 }} // namespace v8::internal
1443 1443
1444 #endif // V8_TARGET_ARCH_X64 1444 #endif // V8_TARGET_ARCH_X64
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698