Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(11)

Side by Side Diff: src/arm64/macro-assembler-arm64.cc

Issue 2812573003: Reland "ARM64: Add NEON support" (Closed)
Patch Set: Add trace directory to gitignore Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/arm64/macro-assembler-arm64.h ('k') | src/arm64/macro-assembler-arm64-inl.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2013 the V8 project authors. All rights reserved. 1 // Copyright 2013 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #if V8_TARGET_ARCH_ARM64 5 #if V8_TARGET_ARCH_ARM64
6 6
7 #include "src/arm64/frames-arm64.h" 7 #include "src/arm64/frames-arm64.h"
8 #include "src/assembler.h" 8 #include "src/assembler.h"
9 #include "src/base/bits.h" 9 #include "src/base/bits.h"
10 #include "src/base/division-by-constant.h" 10 #include "src/base/division-by-constant.h"
(...skipping 277 matching lines...) Expand 10 before | Expand all | Expand 10 after
288 dst = rd; 288 dst = rd;
289 } 289 }
290 290
291 // Copy the result to the system stack pointer. 291 // Copy the result to the system stack pointer.
292 if (!dst.Is(rd)) { 292 if (!dst.Is(rd)) {
293 DCHECK(rd.IsSP()); 293 DCHECK(rd.IsSP());
294 Assembler::mov(rd, dst); 294 Assembler::mov(rd, dst);
295 } 295 }
296 } 296 }
297 297
298 void MacroAssembler::Movi16bitHelper(const VRegister& vd, uint64_t imm) {
299 DCHECK(is_uint16(imm));
300 int byte1 = (imm & 0xff);
301 int byte2 = ((imm >> 8) & 0xff);
302 if (byte1 == byte2) {
303 movi(vd.Is64Bits() ? vd.V8B() : vd.V16B(), byte1);
304 } else if (byte1 == 0) {
305 movi(vd, byte2, LSL, 8);
306 } else if (byte2 == 0) {
307 movi(vd, byte1);
308 } else if (byte1 == 0xff) {
309 mvni(vd, ~byte2 & 0xff, LSL, 8);
310 } else if (byte2 == 0xff) {
311 mvni(vd, ~byte1 & 0xff);
312 } else {
313 UseScratchRegisterScope temps(this);
314 Register temp = temps.AcquireW();
315 movz(temp, imm);
316 dup(vd, temp);
317 }
318 }
319
320 void MacroAssembler::Movi32bitHelper(const VRegister& vd, uint64_t imm) {
321 DCHECK(is_uint32(imm));
322
323 uint8_t bytes[sizeof(imm)];
324 memcpy(bytes, &imm, sizeof(imm));
325
326 // All bytes are either 0x00 or 0xff.
327 {
328 bool all0orff = true;
329 for (int i = 0; i < 4; ++i) {
330 if ((bytes[i] != 0) && (bytes[i] != 0xff)) {
331 all0orff = false;
332 break;
333 }
334 }
335
336 if (all0orff == true) {
337 movi(vd.Is64Bits() ? vd.V1D() : vd.V2D(), ((imm << 32) | imm));
338 return;
339 }
340 }
341
342 // Of the 4 bytes, only one byte is non-zero.
343 for (int i = 0; i < 4; i++) {
344 if ((imm & (0xff << (i * 8))) == imm) {
345 movi(vd, bytes[i], LSL, i * 8);
346 return;
347 }
348 }
349
350 // Of the 4 bytes, only one byte is not 0xff.
351 for (int i = 0; i < 4; i++) {
352 uint32_t mask = ~(0xff << (i * 8));
353 if ((imm & mask) == mask) {
354 mvni(vd, ~bytes[i] & 0xff, LSL, i * 8);
355 return;
356 }
357 }
358
359 // Immediate is of the form 0x00MMFFFF.
360 if ((imm & 0xff00ffff) == 0x0000ffff) {
361 movi(vd, bytes[2], MSL, 16);
362 return;
363 }
364
365 // Immediate is of the form 0x0000MMFF.
366 if ((imm & 0xffff00ff) == 0x000000ff) {
367 movi(vd, bytes[1], MSL, 8);
368 return;
369 }
370
371 // Immediate is of the form 0xFFMM0000.
372 if ((imm & 0xff00ffff) == 0xff000000) {
373 mvni(vd, ~bytes[2] & 0xff, MSL, 16);
374 return;
375 }
376 // Immediate is of the form 0xFFFFMM00.
377 if ((imm & 0xffff00ff) == 0xffff0000) {
378 mvni(vd, ~bytes[1] & 0xff, MSL, 8);
379 return;
380 }
381
382 // Top and bottom 16-bits are equal.
383 if (((imm >> 16) & 0xffff) == (imm & 0xffff)) {
384 Movi16bitHelper(vd.Is64Bits() ? vd.V4H() : vd.V8H(), imm & 0xffff);
385 return;
386 }
387
388 // Default case.
389 {
390 UseScratchRegisterScope temps(this);
391 Register temp = temps.AcquireW();
392 Mov(temp, imm);
393 dup(vd, temp);
394 }
395 }
396
397 void MacroAssembler::Movi64bitHelper(const VRegister& vd, uint64_t imm) {
398 // All bytes are either 0x00 or 0xff.
399 {
400 bool all0orff = true;
401 for (int i = 0; i < 8; ++i) {
402 int byteval = (imm >> (i * 8)) & 0xff;
403 if (byteval != 0 && byteval != 0xff) {
404 all0orff = false;
405 break;
406 }
407 }
408 if (all0orff == true) {
409 movi(vd, imm);
410 return;
411 }
412 }
413
414 // Top and bottom 32-bits are equal.
415 if (((imm >> 32) & 0xffffffff) == (imm & 0xffffffff)) {
416 Movi32bitHelper(vd.Is64Bits() ? vd.V2S() : vd.V4S(), imm & 0xffffffff);
417 return;
418 }
419
420 // Default case.
421 {
422 UseScratchRegisterScope temps(this);
423 Register temp = temps.AcquireX();
424 Mov(temp, imm);
425 if (vd.Is1D()) {
426 mov(vd.D(), 0, temp);
427 } else {
428 dup(vd.V2D(), temp);
429 }
430 }
431 }
432
433 void MacroAssembler::Movi(const VRegister& vd, uint64_t imm, Shift shift,
434 int shift_amount) {
435 DCHECK(allow_macro_instructions_);
436 if (shift_amount != 0 || shift != LSL) {
437 movi(vd, imm, shift, shift_amount);
438 } else if (vd.Is8B() || vd.Is16B()) {
439 // 8-bit immediate.
440 DCHECK(is_uint8(imm));
441 movi(vd, imm);
442 } else if (vd.Is4H() || vd.Is8H()) {
443 // 16-bit immediate.
444 Movi16bitHelper(vd, imm);
445 } else if (vd.Is2S() || vd.Is4S()) {
446 // 32-bit immediate.
447 Movi32bitHelper(vd, imm);
448 } else {
449 // 64-bit immediate.
450 Movi64bitHelper(vd, imm);
451 }
452 }
453
454 void MacroAssembler::Movi(const VRegister& vd, uint64_t hi, uint64_t lo) {
455 // TODO(all): Move 128-bit values in a more efficient way.
456 DCHECK(vd.Is128Bits());
457 UseScratchRegisterScope temps(this);
458 Movi(vd.V2D(), lo);
459 Register temp = temps.AcquireX();
460 Mov(temp, hi);
461 Ins(vd.V2D(), 1, temp);
462 }
298 463
299 void MacroAssembler::Mvn(const Register& rd, const Operand& operand) { 464 void MacroAssembler::Mvn(const Register& rd, const Operand& operand) {
300 DCHECK(allow_macro_instructions_); 465 DCHECK(allow_macro_instructions_);
301 466
302 if (operand.NeedsRelocation(this)) { 467 if (operand.NeedsRelocation(this)) {
303 Ldr(rd, operand.immediate()); 468 Ldr(rd, operand.immediate());
304 mvn(rd, rd); 469 mvn(rd, rd);
305 470
306 } else if (operand.IsImmediate()) { 471 } else if (operand.IsImmediate()) {
307 // Call the macro assembler for generic immediates. 472 // Call the macro assembler for generic immediates.
(...skipping 251 matching lines...) Expand 10 before | Expand all | Expand 10 after
559 // The addressing mode is directly supported by the instruction. 724 // The addressing mode is directly supported by the instruction.
560 AddSubWithCarry(rd, rn, operand, S, op); 725 AddSubWithCarry(rd, rn, operand, S, op);
561 } 726 }
562 } 727 }
563 728
564 729
565 void MacroAssembler::LoadStoreMacro(const CPURegister& rt, 730 void MacroAssembler::LoadStoreMacro(const CPURegister& rt,
566 const MemOperand& addr, 731 const MemOperand& addr,
567 LoadStoreOp op) { 732 LoadStoreOp op) {
568 int64_t offset = addr.offset(); 733 int64_t offset = addr.offset();
569 LSDataSize size = CalcLSDataSize(op); 734 unsigned size = CalcLSDataSize(op);
570 735
571 // Check if an immediate offset fits in the immediate field of the 736 // Check if an immediate offset fits in the immediate field of the
572 // appropriate instruction. If not, emit two instructions to perform 737 // appropriate instruction. If not, emit two instructions to perform
573 // the operation. 738 // the operation.
574 if (addr.IsImmediateOffset() && !IsImmLSScaled(offset, size) && 739 if (addr.IsImmediateOffset() && !IsImmLSScaled(offset, size) &&
575 !IsImmLSUnscaled(offset)) { 740 !IsImmLSUnscaled(offset)) {
576 // Immediate offset that can't be encoded using unsigned or unscaled 741 // Immediate offset that can't be encoded using unsigned or unscaled
577 // addressing modes. 742 // addressing modes.
578 UseScratchRegisterScope temps(this); 743 UseScratchRegisterScope temps(this);
579 Register temp = temps.AcquireSameSizeAs(addr.base()); 744 Register temp = temps.AcquireSameSizeAs(addr.base());
(...skipping 14 matching lines...) Expand all
594 } 759 }
595 760
596 void MacroAssembler::LoadStorePairMacro(const CPURegister& rt, 761 void MacroAssembler::LoadStorePairMacro(const CPURegister& rt,
597 const CPURegister& rt2, 762 const CPURegister& rt2,
598 const MemOperand& addr, 763 const MemOperand& addr,
599 LoadStorePairOp op) { 764 LoadStorePairOp op) {
600 // TODO(all): Should we support register offset for load-store-pair? 765 // TODO(all): Should we support register offset for load-store-pair?
601 DCHECK(!addr.IsRegisterOffset()); 766 DCHECK(!addr.IsRegisterOffset());
602 767
603 int64_t offset = addr.offset(); 768 int64_t offset = addr.offset();
604 LSDataSize size = CalcLSPairDataSize(op); 769 unsigned size = CalcLSPairDataSize(op);
605 770
606 // Check if the offset fits in the immediate field of the appropriate 771 // Check if the offset fits in the immediate field of the appropriate
607 // instruction. If not, emit two instructions to perform the operation. 772 // instruction. If not, emit two instructions to perform the operation.
608 if (IsImmLSPair(offset, size)) { 773 if (IsImmLSPair(offset, size)) {
609 // Encodable in one load/store pair instruction. 774 // Encodable in one load/store pair instruction.
610 LoadStorePair(rt, rt2, addr, op); 775 LoadStorePair(rt, rt2, addr, op);
611 } else { 776 } else {
612 Register base = addr.base(); 777 Register base = addr.base();
613 if (addr.IsImmediateOffset()) { 778 if (addr.IsImmediateOffset()) {
614 UseScratchRegisterScope temps(this); 779 UseScratchRegisterScope temps(this);
(...skipping 307 matching lines...) Expand 10 before | Expand all | Expand 10 after
922 DCHECK(dst0.IsValid()); 1087 DCHECK(dst0.IsValid());
923 1088
924 int count = 5 + dst5.IsValid() + dst6.IsValid() + dst7.IsValid(); 1089 int count = 5 + dst5.IsValid() + dst6.IsValid() + dst7.IsValid();
925 int size = dst0.SizeInBytes(); 1090 int size = dst0.SizeInBytes();
926 1091
927 PopHelper(4, size, dst0, dst1, dst2, dst3); 1092 PopHelper(4, size, dst0, dst1, dst2, dst3);
928 PopHelper(count - 4, size, dst4, dst5, dst6, dst7); 1093 PopHelper(count - 4, size, dst4, dst5, dst6, dst7);
929 PopPostamble(count, size); 1094 PopPostamble(count, size);
930 } 1095 }
931 1096
932 1097 void MacroAssembler::Push(const Register& src0, const VRegister& src1) {
933 void MacroAssembler::Push(const Register& src0, const FPRegister& src1) {
934 int size = src0.SizeInBytes() + src1.SizeInBytes(); 1098 int size = src0.SizeInBytes() + src1.SizeInBytes();
935 1099
936 PushPreamble(size); 1100 PushPreamble(size);
937 // Reserve room for src0 and push src1. 1101 // Reserve room for src0 and push src1.
938 str(src1, MemOperand(StackPointer(), -size, PreIndex)); 1102 str(src1, MemOperand(StackPointer(), -size, PreIndex));
939 // Fill the gap with src0. 1103 // Fill the gap with src0.
940 str(src0, MemOperand(StackPointer(), src1.SizeInBytes())); 1104 str(src0, MemOperand(StackPointer(), src1.SizeInBytes()));
941 } 1105 }
942 1106
943 1107
(...skipping 446 matching lines...) Expand 10 before | Expand all | Expand 10 after
1390 Tst(fpcr, RMode_mask); 1554 Tst(fpcr, RMode_mask);
1391 B(eq, &done); 1555 B(eq, &done);
1392 1556
1393 Bind(&unexpected_mode); 1557 Bind(&unexpected_mode);
1394 Abort(kUnexpectedFPCRMode); 1558 Abort(kUnexpectedFPCRMode);
1395 1559
1396 Bind(&done); 1560 Bind(&done);
1397 } 1561 }
1398 } 1562 }
1399 1563
1400 1564 void MacroAssembler::CanonicalizeNaN(const VRegister& dst,
1401 void MacroAssembler::CanonicalizeNaN(const FPRegister& dst, 1565 const VRegister& src) {
1402 const FPRegister& src) {
1403 AssertFPCRState(); 1566 AssertFPCRState();
1404 1567
1405 // Subtracting 0.0 preserves all inputs except for signalling NaNs, which 1568 // Subtracting 0.0 preserves all inputs except for signalling NaNs, which
1406 // become quiet NaNs. We use fsub rather than fadd because fsub preserves -0.0 1569 // become quiet NaNs. We use fsub rather than fadd because fsub preserves -0.0
1407 // inputs: -0.0 + 0.0 = 0.0, but -0.0 - 0.0 = -0.0. 1570 // inputs: -0.0 + 0.0 = 0.0, but -0.0 - 0.0 = -0.0.
1408 Fsub(dst, src, fp_zero); 1571 Fsub(dst, src, fp_zero);
1409 } 1572 }
1410 1573
1411 1574
1412 void MacroAssembler::LoadRoot(CPURegister destination, 1575 void MacroAssembler::LoadRoot(CPURegister destination,
(...skipping 631 matching lines...) Expand 10 before | Expand all | Expand 10 after
2044 } 2207 }
2045 2208
2046 AssertNotSmi(object); 2209 AssertNotSmi(object);
2047 2210
2048 UseScratchRegisterScope temps(this); 2211 UseScratchRegisterScope temps(this);
2049 Register temp = temps.AcquireX(); 2212 Register temp = temps.AcquireX();
2050 Ldr(temp, FieldMemOperand(object, HeapObject::kMapOffset)); 2213 Ldr(temp, FieldMemOperand(object, HeapObject::kMapOffset));
2051 JumpIfNotRoot(temp, Heap::kHeapNumberMapRootIndex, on_not_heap_number); 2214 JumpIfNotRoot(temp, Heap::kHeapNumberMapRootIndex, on_not_heap_number);
2052 } 2215 }
2053 2216
2054 2217 void MacroAssembler::TryRepresentDoubleAsInt(Register as_int, VRegister value,
2055 void MacroAssembler::TryRepresentDoubleAsInt(Register as_int, 2218 VRegister scratch_d,
2056 FPRegister value,
2057 FPRegister scratch_d,
2058 Label* on_successful_conversion, 2219 Label* on_successful_conversion,
2059 Label* on_failed_conversion) { 2220 Label* on_failed_conversion) {
2060 // Convert to an int and back again, then compare with the original value. 2221 // Convert to an int and back again, then compare with the original value.
2061 Fcvtzs(as_int, value); 2222 Fcvtzs(as_int, value);
2062 Scvtf(scratch_d, as_int); 2223 Scvtf(scratch_d, as_int);
2063 Fcmp(value, scratch_d); 2224 Fcmp(value, scratch_d);
2064 2225
2065 if (on_successful_conversion) { 2226 if (on_successful_conversion) {
2066 B(on_successful_conversion, eq); 2227 B(on_successful_conversion, eq);
2067 } 2228 }
(...skipping 593 matching lines...) Expand 10 before | Expand all | Expand 10 after
2661 // Drop the execution stack down to the frame pointer and restore 2822 // Drop the execution stack down to the frame pointer and restore
2662 // the caller frame pointer and return address. 2823 // the caller frame pointer and return address.
2663 Mov(jssp, fp); 2824 Mov(jssp, fp);
2664 AssertStackConsistency(); 2825 AssertStackConsistency();
2665 Pop(fp, lr); 2826 Pop(fp, lr);
2666 } 2827 }
2667 } 2828 }
2668 2829
2669 2830
2670 void MacroAssembler::ExitFramePreserveFPRegs() { 2831 void MacroAssembler::ExitFramePreserveFPRegs() {
2671 PushCPURegList(kCallerSavedFP); 2832 PushCPURegList(kCallerSavedV);
2672 } 2833 }
2673 2834
2674 2835
2675 void MacroAssembler::ExitFrameRestoreFPRegs() { 2836 void MacroAssembler::ExitFrameRestoreFPRegs() {
2676 // Read the registers from the stack without popping them. The stack pointer 2837 // Read the registers from the stack without popping them. The stack pointer
2677 // will be reset as part of the unwinding process. 2838 // will be reset as part of the unwinding process.
2678 CPURegList saved_fp_regs = kCallerSavedFP; 2839 CPURegList saved_fp_regs = kCallerSavedV;
2679 DCHECK(saved_fp_regs.Count() % 2 == 0); 2840 DCHECK(saved_fp_regs.Count() % 2 == 0);
2680 2841
2681 int offset = ExitFrameConstants::kLastExitFrameField; 2842 int offset = ExitFrameConstants::kLastExitFrameField;
2682 while (!saved_fp_regs.IsEmpty()) { 2843 while (!saved_fp_regs.IsEmpty()) {
2683 const CPURegister& dst0 = saved_fp_regs.PopHighestIndex(); 2844 const CPURegister& dst0 = saved_fp_regs.PopHighestIndex();
2684 const CPURegister& dst1 = saved_fp_regs.PopHighestIndex(); 2845 const CPURegister& dst1 = saved_fp_regs.PopHighestIndex();
2685 offset -= 2 * kDRegSize; 2846 offset -= 2 * kDRegSize;
2686 Ldp(dst1, dst0, MemOperand(fp, offset)); 2847 Ldp(dst1, dst0, MemOperand(fp, offset));
2687 } 2848 }
2688 } 2849 }
(...skipping 458 matching lines...) Expand 10 before | Expand all | Expand 10 after
3147 NO_ALLOCATION_FLAGS); 3308 NO_ALLOCATION_FLAGS);
3148 3309
3149 Heap::RootListIndex map_index = mode == MUTABLE 3310 Heap::RootListIndex map_index = mode == MUTABLE
3150 ? Heap::kMutableHeapNumberMapRootIndex 3311 ? Heap::kMutableHeapNumberMapRootIndex
3151 : Heap::kHeapNumberMapRootIndex; 3312 : Heap::kHeapNumberMapRootIndex;
3152 3313
3153 // Prepare the heap number map. 3314 // Prepare the heap number map.
3154 if (!heap_number_map.IsValid()) { 3315 if (!heap_number_map.IsValid()) {
3155 // If we have a valid value register, use the same type of register to store 3316 // If we have a valid value register, use the same type of register to store
3156 // the map so we can use STP to store both in one instruction. 3317 // the map so we can use STP to store both in one instruction.
3157 if (value.IsValid() && value.IsFPRegister()) { 3318 if (value.IsValid() && value.IsVRegister()) {
3158 heap_number_map = temps.AcquireD(); 3319 heap_number_map = temps.AcquireD();
3159 } else { 3320 } else {
3160 heap_number_map = scratch1; 3321 heap_number_map = scratch1;
3161 } 3322 }
3162 LoadRoot(heap_number_map, map_index); 3323 LoadRoot(heap_number_map, map_index);
3163 } 3324 }
3164 if (emit_debug_code()) { 3325 if (emit_debug_code()) {
3165 Register map; 3326 Register map;
3166 if (heap_number_map.IsFPRegister()) { 3327 if (heap_number_map.IsVRegister()) {
3167 map = scratch1; 3328 map = scratch1;
3168 Fmov(map, DoubleRegister(heap_number_map)); 3329 Fmov(map, DoubleRegister(heap_number_map));
3169 } else { 3330 } else {
3170 map = Register(heap_number_map); 3331 map = Register(heap_number_map);
3171 } 3332 }
3172 AssertRegisterIsRoot(map, map_index); 3333 AssertRegisterIsRoot(map, map_index);
3173 } 3334 }
3174 3335
3175 // Store the heap number map and the value in the allocated object. 3336 // Store the heap number map and the value in the allocated object.
3176 if (value.IsSameSizeAndType(heap_number_map)) { 3337 if (value.IsSameSizeAndType(heap_number_map)) {
(...skipping 441 matching lines...) Expand 10 before | Expand all | Expand 10 after
3618 const int num_unsaved = kNumSafepointRegisters - kNumSafepointSavedRegisters; 3779 const int num_unsaved = kNumSafepointRegisters - kNumSafepointSavedRegisters;
3619 DCHECK(num_unsaved >= 0); 3780 DCHECK(num_unsaved >= 0);
3620 Claim(num_unsaved); 3781 Claim(num_unsaved);
3621 PushXRegList(kSafepointSavedRegisters); 3782 PushXRegList(kSafepointSavedRegisters);
3622 } 3783 }
3623 3784
3624 3785
3625 void MacroAssembler::PushSafepointRegistersAndDoubles() { 3786 void MacroAssembler::PushSafepointRegistersAndDoubles() {
3626 PushSafepointRegisters(); 3787 PushSafepointRegisters();
3627 PushCPURegList(CPURegList( 3788 PushCPURegList(CPURegList(
3628 CPURegister::kFPRegister, kDRegSizeInBits, 3789 CPURegister::kVRegister, kDRegSizeInBits,
3629 RegisterConfiguration::Crankshaft()->allocatable_double_codes_mask())); 3790 RegisterConfiguration::Crankshaft()->allocatable_double_codes_mask()));
3630 } 3791 }
3631 3792
3632 3793
3633 void MacroAssembler::PopSafepointRegistersAndDoubles() { 3794 void MacroAssembler::PopSafepointRegistersAndDoubles() {
3634 PopCPURegList(CPURegList( 3795 PopCPURegList(CPURegList(
3635 CPURegister::kFPRegister, kDRegSizeInBits, 3796 CPURegister::kVRegister, kDRegSizeInBits,
3636 RegisterConfiguration::Crankshaft()->allocatable_double_codes_mask())); 3797 RegisterConfiguration::Crankshaft()->allocatable_double_codes_mask()));
3637 PopSafepointRegisters(); 3798 PopSafepointRegisters();
3638 } 3799 }
3639 3800
3640 void MacroAssembler::StoreToSafepointRegisterSlot(Register src, Register dst) { 3801 void MacroAssembler::StoreToSafepointRegisterSlot(Register src, Register dst) {
3641 Poke(src, SafepointRegisterStackIndex(dst.code()) * kPointerSize); 3802 Poke(src, SafepointRegisterStackIndex(dst.code()) * kPointerSize);
3642 } 3803 }
3643 3804
3644 void MacroAssembler::LoadFromSafepointRegisterSlot(Register dst, Register src) { 3805 void MacroAssembler::LoadFromSafepointRegisterSlot(Register dst, Register src) {
3645 Peek(src, SafepointRegisterStackIndex(dst.code()) * kPointerSize); 3806 Peek(src, SafepointRegisterStackIndex(dst.code()) * kPointerSize);
(...skipping 531 matching lines...) Expand 10 before | Expand all | Expand 10 after
4177 CPURegister args[kPrintfMaxArgCount] = {arg0, arg1, arg2, arg3}; 4338 CPURegister args[kPrintfMaxArgCount] = {arg0, arg1, arg2, arg3};
4178 CPURegister pcs[kPrintfMaxArgCount] = {NoReg, NoReg, NoReg, NoReg}; 4339 CPURegister pcs[kPrintfMaxArgCount] = {NoReg, NoReg, NoReg, NoReg};
4179 4340
4180 int arg_count = kPrintfMaxArgCount; 4341 int arg_count = kPrintfMaxArgCount;
4181 4342
4182 // The PCS varargs registers for printf. Note that x0 is used for the printf 4343 // The PCS varargs registers for printf. Note that x0 is used for the printf
4183 // format string. 4344 // format string.
4184 static const CPURegList kPCSVarargs = 4345 static const CPURegList kPCSVarargs =
4185 CPURegList(CPURegister::kRegister, kXRegSizeInBits, 1, arg_count); 4346 CPURegList(CPURegister::kRegister, kXRegSizeInBits, 1, arg_count);
4186 static const CPURegList kPCSVarargsFP = 4347 static const CPURegList kPCSVarargsFP =
4187 CPURegList(CPURegister::kFPRegister, kDRegSizeInBits, 0, arg_count - 1); 4348 CPURegList(CPURegister::kVRegister, kDRegSizeInBits, 0, arg_count - 1);
4188 4349
4189 // We can use caller-saved registers as scratch values, except for the 4350 // We can use caller-saved registers as scratch values, except for the
4190 // arguments and the PCS registers where they might need to go. 4351 // arguments and the PCS registers where they might need to go.
4191 CPURegList tmp_list = kCallerSaved; 4352 CPURegList tmp_list = kCallerSaved;
4192 tmp_list.Remove(x0); // Used to pass the format string. 4353 tmp_list.Remove(x0); // Used to pass the format string.
4193 tmp_list.Remove(kPCSVarargs); 4354 tmp_list.Remove(kPCSVarargs);
4194 tmp_list.Remove(arg0, arg1, arg2, arg3); 4355 tmp_list.Remove(arg0, arg1, arg2, arg3);
4195 4356
4196 CPURegList fp_tmp_list = kCallerSavedFP; 4357 CPURegList fp_tmp_list = kCallerSavedV;
4197 fp_tmp_list.Remove(kPCSVarargsFP); 4358 fp_tmp_list.Remove(kPCSVarargsFP);
4198 fp_tmp_list.Remove(arg0, arg1, arg2, arg3); 4359 fp_tmp_list.Remove(arg0, arg1, arg2, arg3);
4199 4360
4200 // Override the MacroAssembler's scratch register list. The lists will be 4361 // Override the MacroAssembler's scratch register list. The lists will be
4201 // reset automatically at the end of the UseScratchRegisterScope. 4362 // reset automatically at the end of the UseScratchRegisterScope.
4202 UseScratchRegisterScope temps(this); 4363 UseScratchRegisterScope temps(this);
4203 TmpList()->set_list(tmp_list.list()); 4364 TmpList()->set_list(tmp_list.list());
4204 FPTmpList()->set_list(fp_tmp_list.list()); 4365 FPTmpList()->set_list(fp_tmp_list.list());
4205 4366
4206 // Copies of the printf vararg registers that we can pop from. 4367 // Copies of the printf vararg registers that we can pop from.
4207 CPURegList pcs_varargs = kPCSVarargs; 4368 CPURegList pcs_varargs = kPCSVarargs;
4208 CPURegList pcs_varargs_fp = kPCSVarargsFP; 4369 CPURegList pcs_varargs_fp = kPCSVarargsFP;
4209 4370
4210 // Place the arguments. There are lots of clever tricks and optimizations we 4371 // Place the arguments. There are lots of clever tricks and optimizations we
4211 // could use here, but Printf is a debug tool so instead we just try to keep 4372 // could use here, but Printf is a debug tool so instead we just try to keep
4212 // it simple: Move each input that isn't already in the right place to a 4373 // it simple: Move each input that isn't already in the right place to a
4213 // scratch register, then move everything back. 4374 // scratch register, then move everything back.
4214 for (unsigned i = 0; i < kPrintfMaxArgCount; i++) { 4375 for (unsigned i = 0; i < kPrintfMaxArgCount; i++) {
4215 // Work out the proper PCS register for this argument. 4376 // Work out the proper PCS register for this argument.
4216 if (args[i].IsRegister()) { 4377 if (args[i].IsRegister()) {
4217 pcs[i] = pcs_varargs.PopLowestIndex().X(); 4378 pcs[i] = pcs_varargs.PopLowestIndex().X();
4218 // We might only need a W register here. We need to know the size of the 4379 // We might only need a W register here. We need to know the size of the
4219 // argument so we can properly encode it for the simulator call. 4380 // argument so we can properly encode it for the simulator call.
4220 if (args[i].Is32Bits()) pcs[i] = pcs[i].W(); 4381 if (args[i].Is32Bits()) pcs[i] = pcs[i].W();
4221 } else if (args[i].IsFPRegister()) { 4382 } else if (args[i].IsVRegister()) {
4222 // In C, floats are always cast to doubles for varargs calls. 4383 // In C, floats are always cast to doubles for varargs calls.
4223 pcs[i] = pcs_varargs_fp.PopLowestIndex().D(); 4384 pcs[i] = pcs_varargs_fp.PopLowestIndex().D();
4224 } else { 4385 } else {
4225 DCHECK(args[i].IsNone()); 4386 DCHECK(args[i].IsNone());
4226 arg_count = i; 4387 arg_count = i;
4227 break; 4388 break;
4228 } 4389 }
4229 4390
4230 // If the argument is already in the right place, leave it where it is. 4391 // If the argument is already in the right place, leave it where it is.
4231 if (args[i].Aliases(pcs[i])) continue; 4392 if (args[i].Aliases(pcs[i])) continue;
4232 4393
4233 // Otherwise, if the argument is in a PCS argument register, allocate an 4394 // Otherwise, if the argument is in a PCS argument register, allocate an
4234 // appropriate scratch register and then move it out of the way. 4395 // appropriate scratch register and then move it out of the way.
4235 if (kPCSVarargs.IncludesAliasOf(args[i]) || 4396 if (kPCSVarargs.IncludesAliasOf(args[i]) ||
4236 kPCSVarargsFP.IncludesAliasOf(args[i])) { 4397 kPCSVarargsFP.IncludesAliasOf(args[i])) {
4237 if (args[i].IsRegister()) { 4398 if (args[i].IsRegister()) {
4238 Register old_arg = Register(args[i]); 4399 Register old_arg = Register(args[i]);
4239 Register new_arg = temps.AcquireSameSizeAs(old_arg); 4400 Register new_arg = temps.AcquireSameSizeAs(old_arg);
4240 Mov(new_arg, old_arg); 4401 Mov(new_arg, old_arg);
4241 args[i] = new_arg; 4402 args[i] = new_arg;
4242 } else { 4403 } else {
4243 FPRegister old_arg = FPRegister(args[i]); 4404 VRegister old_arg = VRegister(args[i]);
4244 FPRegister new_arg = temps.AcquireSameSizeAs(old_arg); 4405 VRegister new_arg = temps.AcquireSameSizeAs(old_arg);
4245 Fmov(new_arg, old_arg); 4406 Fmov(new_arg, old_arg);
4246 args[i] = new_arg; 4407 args[i] = new_arg;
4247 } 4408 }
4248 } 4409 }
4249 } 4410 }
4250 4411
4251 // Do a second pass to move values into their final positions and perform any 4412 // Do a second pass to move values into their final positions and perform any
4252 // conversions that may be required. 4413 // conversions that may be required.
4253 for (int i = 0; i < arg_count; i++) { 4414 for (int i = 0; i < arg_count; i++) {
4254 DCHECK(pcs[i].type() == args[i].type()); 4415 DCHECK(pcs[i].type() == args[i].type());
4255 if (pcs[i].IsRegister()) { 4416 if (pcs[i].IsRegister()) {
4256 Mov(Register(pcs[i]), Register(args[i]), kDiscardForSameWReg); 4417 Mov(Register(pcs[i]), Register(args[i]), kDiscardForSameWReg);
4257 } else { 4418 } else {
4258 DCHECK(pcs[i].IsFPRegister()); 4419 DCHECK(pcs[i].IsVRegister());
4259 if (pcs[i].SizeInBytes() == args[i].SizeInBytes()) { 4420 if (pcs[i].SizeInBytes() == args[i].SizeInBytes()) {
4260 Fmov(FPRegister(pcs[i]), FPRegister(args[i])); 4421 Fmov(VRegister(pcs[i]), VRegister(args[i]));
4261 } else { 4422 } else {
4262 Fcvt(FPRegister(pcs[i]), FPRegister(args[i])); 4423 Fcvt(VRegister(pcs[i]), VRegister(args[i]));
4263 } 4424 }
4264 } 4425 }
4265 } 4426 }
4266 4427
4267 // Load the format string into x0, as per the procedure-call standard. 4428 // Load the format string into x0, as per the procedure-call standard.
4268 // 4429 //
4269 // To make the code as portable as possible, the format string is encoded 4430 // To make the code as portable as possible, the format string is encoded
4270 // directly in the instruction stream. It might be cleaner to encode it in a 4431 // directly in the instruction stream. It might be cleaner to encode it in a
4271 // literal pool, but since Printf is usually used for debugging, it is 4432 // literal pool, but since Printf is usually used for debugging, it is
4272 // beneficial for it to be minimally dependent on other features. 4433 // beneficial for it to be minimally dependent on other features.
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after
4340 // available as scratch registers until we've preserved them. 4501 // available as scratch registers until we've preserved them.
4341 RegList old_tmp_list = TmpList()->list(); 4502 RegList old_tmp_list = TmpList()->list();
4342 RegList old_fp_tmp_list = FPTmpList()->list(); 4503 RegList old_fp_tmp_list = FPTmpList()->list();
4343 TmpList()->set_list(0); 4504 TmpList()->set_list(0);
4344 FPTmpList()->set_list(0); 4505 FPTmpList()->set_list(0);
4345 4506
4346 // Preserve all caller-saved registers as well as NZCV. 4507 // Preserve all caller-saved registers as well as NZCV.
4347 // If csp is the stack pointer, PushCPURegList asserts that the size of each 4508 // If csp is the stack pointer, PushCPURegList asserts that the size of each
4348 // list is a multiple of 16 bytes. 4509 // list is a multiple of 16 bytes.
4349 PushCPURegList(kCallerSaved); 4510 PushCPURegList(kCallerSaved);
4350 PushCPURegList(kCallerSavedFP); 4511 PushCPURegList(kCallerSavedV);
4351 4512
4352 // We can use caller-saved registers as scratch values (except for argN). 4513 // We can use caller-saved registers as scratch values (except for argN).
4353 CPURegList tmp_list = kCallerSaved; 4514 CPURegList tmp_list = kCallerSaved;
4354 CPURegList fp_tmp_list = kCallerSavedFP; 4515 CPURegList fp_tmp_list = kCallerSavedV;
4355 tmp_list.Remove(arg0, arg1, arg2, arg3); 4516 tmp_list.Remove(arg0, arg1, arg2, arg3);
4356 fp_tmp_list.Remove(arg0, arg1, arg2, arg3); 4517 fp_tmp_list.Remove(arg0, arg1, arg2, arg3);
4357 TmpList()->set_list(tmp_list.list()); 4518 TmpList()->set_list(tmp_list.list());
4358 FPTmpList()->set_list(fp_tmp_list.list()); 4519 FPTmpList()->set_list(fp_tmp_list.list());
4359 4520
4360 { UseScratchRegisterScope temps(this); 4521 { UseScratchRegisterScope temps(this);
4361 // If any of the arguments are the current stack pointer, allocate a new 4522 // If any of the arguments are the current stack pointer, allocate a new
4362 // register for them, and adjust the value to compensate for pushing the 4523 // register for them, and adjust the value to compensate for pushing the
4363 // caller-saved registers. 4524 // caller-saved registers.
4364 bool arg0_sp = StackPointer().Aliases(arg0); 4525 bool arg0_sp = StackPointer().Aliases(arg0);
4365 bool arg1_sp = StackPointer().Aliases(arg1); 4526 bool arg1_sp = StackPointer().Aliases(arg1);
4366 bool arg2_sp = StackPointer().Aliases(arg2); 4527 bool arg2_sp = StackPointer().Aliases(arg2);
4367 bool arg3_sp = StackPointer().Aliases(arg3); 4528 bool arg3_sp = StackPointer().Aliases(arg3);
4368 if (arg0_sp || arg1_sp || arg2_sp || arg3_sp) { 4529 if (arg0_sp || arg1_sp || arg2_sp || arg3_sp) {
4369 // Allocate a register to hold the original stack pointer value, to pass 4530 // Allocate a register to hold the original stack pointer value, to pass
4370 // to PrintfNoPreserve as an argument. 4531 // to PrintfNoPreserve as an argument.
4371 Register arg_sp = temps.AcquireX(); 4532 Register arg_sp = temps.AcquireX();
4372 Add(arg_sp, StackPointer(), 4533 Add(arg_sp, StackPointer(),
4373 kCallerSaved.TotalSizeInBytes() + kCallerSavedFP.TotalSizeInBytes()); 4534 kCallerSaved.TotalSizeInBytes() + kCallerSavedV.TotalSizeInBytes());
4374 if (arg0_sp) arg0 = Register::Create(arg_sp.code(), arg0.SizeInBits()); 4535 if (arg0_sp) arg0 = Register::Create(arg_sp.code(), arg0.SizeInBits());
4375 if (arg1_sp) arg1 = Register::Create(arg_sp.code(), arg1.SizeInBits()); 4536 if (arg1_sp) arg1 = Register::Create(arg_sp.code(), arg1.SizeInBits());
4376 if (arg2_sp) arg2 = Register::Create(arg_sp.code(), arg2.SizeInBits()); 4537 if (arg2_sp) arg2 = Register::Create(arg_sp.code(), arg2.SizeInBits());
4377 if (arg3_sp) arg3 = Register::Create(arg_sp.code(), arg3.SizeInBits()); 4538 if (arg3_sp) arg3 = Register::Create(arg_sp.code(), arg3.SizeInBits());
4378 } 4539 }
4379 4540
4380 // Preserve NZCV. 4541 // Preserve NZCV.
4381 { UseScratchRegisterScope temps(this); 4542 { UseScratchRegisterScope temps(this);
4382 Register tmp = temps.AcquireX(); 4543 Register tmp = temps.AcquireX();
4383 Mrs(tmp, NZCV); 4544 Mrs(tmp, NZCV);
4384 Push(tmp, xzr); 4545 Push(tmp, xzr);
4385 } 4546 }
4386 4547
4387 PrintfNoPreserve(format, arg0, arg1, arg2, arg3); 4548 PrintfNoPreserve(format, arg0, arg1, arg2, arg3);
4388 4549
4389 // Restore NZCV. 4550 // Restore NZCV.
4390 { UseScratchRegisterScope temps(this); 4551 { UseScratchRegisterScope temps(this);
4391 Register tmp = temps.AcquireX(); 4552 Register tmp = temps.AcquireX();
4392 Pop(xzr, tmp); 4553 Pop(xzr, tmp);
4393 Msr(NZCV, tmp); 4554 Msr(NZCV, tmp);
4394 } 4555 }
4395 } 4556 }
4396 4557
4397 PopCPURegList(kCallerSavedFP); 4558 PopCPURegList(kCallerSavedV);
4398 PopCPURegList(kCallerSaved); 4559 PopCPURegList(kCallerSaved);
4399 4560
4400 TmpList()->set_list(old_tmp_list); 4561 TmpList()->set_list(old_tmp_list);
4401 FPTmpList()->set_list(old_fp_tmp_list); 4562 FPTmpList()->set_list(old_fp_tmp_list);
4402 } 4563 }
4403 4564
4404 4565
4405 void MacroAssembler::EmitFrameSetupForCodeAgePatching() { 4566 void MacroAssembler::EmitFrameSetupForCodeAgePatching() {
4406 // TODO(jbramley): Other architectures use the internal memcpy to copy the 4567 // TODO(jbramley): Other architectures use the internal memcpy to copy the
4407 // sequence. If this is a performance bottleneck, we should consider caching 4568 // sequence. If this is a performance bottleneck, we should consider caching
(...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after
4501 available_->set_list(old_available_); 4662 available_->set_list(old_available_);
4502 availablefp_->set_list(old_availablefp_); 4663 availablefp_->set_list(old_availablefp_);
4503 } 4664 }
4504 4665
4505 4666
4506 Register UseScratchRegisterScope::AcquireSameSizeAs(const Register& reg) { 4667 Register UseScratchRegisterScope::AcquireSameSizeAs(const Register& reg) {
4507 int code = AcquireNextAvailable(available_).code(); 4668 int code = AcquireNextAvailable(available_).code();
4508 return Register::Create(code, reg.SizeInBits()); 4669 return Register::Create(code, reg.SizeInBits());
4509 } 4670 }
4510 4671
4511 4672 VRegister UseScratchRegisterScope::AcquireSameSizeAs(const VRegister& reg) {
4512 FPRegister UseScratchRegisterScope::AcquireSameSizeAs(const FPRegister& reg) {
4513 int code = AcquireNextAvailable(availablefp_).code(); 4673 int code = AcquireNextAvailable(availablefp_).code();
4514 return FPRegister::Create(code, reg.SizeInBits()); 4674 return VRegister::Create(code, reg.SizeInBits());
4515 } 4675 }
4516 4676
4517 4677
4518 CPURegister UseScratchRegisterScope::AcquireNextAvailable( 4678 CPURegister UseScratchRegisterScope::AcquireNextAvailable(
4519 CPURegList* available) { 4679 CPURegList* available) {
4520 CHECK(!available->IsEmpty()); 4680 CHECK(!available->IsEmpty());
4521 CPURegister result = available->PopLowestIndex(); 4681 CPURegister result = available->PopLowestIndex();
4522 DCHECK(!AreAliased(result, xzr, csp)); 4682 DCHECK(!AreAliased(result, xzr, csp));
4523 return result; 4683 return result;
4524 } 4684 }
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after
4585 } 4745 }
4586 4746
4587 4747
4588 #undef __ 4748 #undef __
4589 4749
4590 4750
4591 } // namespace internal 4751 } // namespace internal
4592 } // namespace v8 4752 } // namespace v8
4593 4753
4594 #endif // V8_TARGET_ARCH_ARM64 4754 #endif // V8_TARGET_ARCH_ARM64
OLDNEW
« no previous file with comments | « src/arm64/macro-assembler-arm64.h ('k') | src/arm64/macro-assembler-arm64-inl.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698