Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(18)

Unified Diff: src/arm64/assembler-arm64.cc

Issue 2812573003: Reland "ARM64: Add NEON support" (Closed)
Patch Set: Add trace directory to gitignore Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/arm64/assembler-arm64.h ('k') | src/arm64/assembler-arm64-inl.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/arm64/assembler-arm64.cc
diff --git a/src/arm64/assembler-arm64.cc b/src/arm64/assembler-arm64.cc
index ac6931dec76d3e95aa3f8ab86eb64f6faef9a857..a8cb93a5c73bed7013bdfbd39417e6469ea458d4 100644
--- a/src/arm64/assembler-arm64.cc
+++ b/src/arm64/assembler-arm64.cc
@@ -89,8 +89,8 @@ CPURegister CPURegList::PopHighestIndex() {
void CPURegList::RemoveCalleeSaved() {
if (type() == CPURegister::kRegister) {
Remove(GetCalleeSaved(RegisterSizeInBits()));
- } else if (type() == CPURegister::kFPRegister) {
- Remove(GetCalleeSavedFP(RegisterSizeInBits()));
+ } else if (type() == CPURegister::kVRegister) {
+ Remove(GetCalleeSavedV(RegisterSizeInBits()));
} else {
DCHECK(type() == CPURegister::kNoRegister);
DCHECK(IsEmpty());
@@ -103,9 +103,8 @@ CPURegList CPURegList::GetCalleeSaved(int size) {
return CPURegList(CPURegister::kRegister, size, 19, 29);
}
-
-CPURegList CPURegList::GetCalleeSavedFP(int size) {
- return CPURegList(CPURegister::kFPRegister, size, 8, 15);
+CPURegList CPURegList::GetCalleeSavedV(int size) {
+ return CPURegList(CPURegister::kVRegister, size, 8, 15);
}
@@ -116,11 +115,10 @@ CPURegList CPURegList::GetCallerSaved(int size) {
return list;
}
-
-CPURegList CPURegList::GetCallerSavedFP(int size) {
+CPURegList CPURegList::GetCallerSavedV(int size) {
// Registers d0-d7 and d16-d31 are caller-saved.
- CPURegList list = CPURegList(CPURegister::kFPRegister, size, 0, 7);
- list.Combine(CPURegList(CPURegister::kFPRegister, size, 16, 31));
+ CPURegList list = CPURegList(CPURegister::kVRegister, size, 0, 7);
+ list.Combine(CPURegList(CPURegister::kVRegister, size, 16, 31));
return list;
}
@@ -240,7 +238,7 @@ bool AreAliased(const CPURegister& reg1, const CPURegister& reg2,
if (regs[i].IsRegister()) {
number_of_valid_regs++;
unique_regs |= regs[i].Bit();
- } else if (regs[i].IsFPRegister()) {
+ } else if (regs[i].IsVRegister()) {
number_of_valid_fpregs++;
unique_fpregs |= regs[i].Bit();
} else {
@@ -277,6 +275,39 @@ bool AreSameSizeAndType(const CPURegister& reg1, const CPURegister& reg2,
return match;
}
+bool AreSameFormat(const VRegister& reg1, const VRegister& reg2,
+ const VRegister& reg3, const VRegister& reg4) {
+ DCHECK(reg1.IsValid());
+ return (!reg2.IsValid() || reg2.IsSameFormat(reg1)) &&
+ (!reg3.IsValid() || reg3.IsSameFormat(reg1)) &&
+ (!reg4.IsValid() || reg4.IsSameFormat(reg1));
+}
+
+bool AreConsecutive(const VRegister& reg1, const VRegister& reg2,
+ const VRegister& reg3, const VRegister& reg4) {
+ DCHECK(reg1.IsValid());
+ if (!reg2.IsValid()) {
+ DCHECK(!reg3.IsValid() && !reg4.IsValid());
+ return true;
+ } else if (reg2.code() != ((reg1.code() + 1) % kNumberOfVRegisters)) {
+ return false;
+ }
+
+ if (!reg3.IsValid()) {
+ DCHECK(!reg4.IsValid());
+ return true;
+ } else if (reg3.code() != ((reg2.code() + 1) % kNumberOfVRegisters)) {
+ return false;
+ }
+
+ if (!reg4.IsValid()) {
+ return true;
+ } else if (reg4.code() != ((reg3.code() + 1) % kNumberOfVRegisters)) {
+ return false;
+ }
+
+ return true;
+}
void Immediate::InitializeHandle(Handle<Object> handle) {
AllowDeferredHandleDereference using_raw_address;
@@ -1773,446 +1804,2163 @@ void Assembler::stlxrh(const Register& rs, const Register& rt,
Emit(STLXR_h | Rs(rs) | Rt2(x31) | RnSP(rn) | Rt(rt));
}
-void Assembler::mov(const Register& rd, const Register& rm) {
- // Moves involving the stack pointer are encoded as add immediate with
- // second operand of zero. Otherwise, orr with first operand zr is
- // used.
- if (rd.IsSP() || rm.IsSP()) {
- add(rd, rm, 0);
+void Assembler::NEON3DifferentL(const VRegister& vd, const VRegister& vn,
+ const VRegister& vm, NEON3DifferentOp vop) {
+ DCHECK(AreSameFormat(vn, vm));
+ DCHECK((vn.Is1H() && vd.Is1S()) || (vn.Is1S() && vd.Is1D()) ||
+ (vn.Is8B() && vd.Is8H()) || (vn.Is4H() && vd.Is4S()) ||
+ (vn.Is2S() && vd.Is2D()) || (vn.Is16B() && vd.Is8H()) ||
+ (vn.Is8H() && vd.Is4S()) || (vn.Is4S() && vd.Is2D()));
+ Instr format, op = vop;
+ if (vd.IsScalar()) {
+ op |= NEON_Q | NEONScalar;
+ format = SFormat(vn);
} else {
- orr(rd, AppropriateZeroRegFor(rd), rm);
+ format = VFormat(vn);
}
+ Emit(format | op | Rm(vm) | Rn(vn) | Rd(vd));
+}
+
+void Assembler::NEON3DifferentW(const VRegister& vd, const VRegister& vn,
+ const VRegister& vm, NEON3DifferentOp vop) {
+ DCHECK(AreSameFormat(vd, vn));
+ DCHECK((vm.Is8B() && vd.Is8H()) || (vm.Is4H() && vd.Is4S()) ||
+ (vm.Is2S() && vd.Is2D()) || (vm.Is16B() && vd.Is8H()) ||
+ (vm.Is8H() && vd.Is4S()) || (vm.Is4S() && vd.Is2D()));
+ Emit(VFormat(vm) | vop | Rm(vm) | Rn(vn) | Rd(vd));
+}
+
+void Assembler::NEON3DifferentHN(const VRegister& vd, const VRegister& vn,
+ const VRegister& vm, NEON3DifferentOp vop) {
+ DCHECK(AreSameFormat(vm, vn));
+ DCHECK((vd.Is8B() && vn.Is8H()) || (vd.Is4H() && vn.Is4S()) ||
+ (vd.Is2S() && vn.Is2D()) || (vd.Is16B() && vn.Is8H()) ||
+ (vd.Is8H() && vn.Is4S()) || (vd.Is4S() && vn.Is2D()));
+ Emit(VFormat(vd) | vop | Rm(vm) | Rn(vn) | Rd(vd));
+}
+
+#define NEON_3DIFF_LONG_LIST(V) \
+ V(pmull, NEON_PMULL, vn.IsVector() && vn.Is8B()) \
+ V(pmull2, NEON_PMULL2, vn.IsVector() && vn.Is16B()) \
+ V(saddl, NEON_SADDL, vn.IsVector() && vn.IsD()) \
+ V(saddl2, NEON_SADDL2, vn.IsVector() && vn.IsQ()) \
+ V(sabal, NEON_SABAL, vn.IsVector() && vn.IsD()) \
+ V(sabal2, NEON_SABAL2, vn.IsVector() && vn.IsQ()) \
+ V(uabal, NEON_UABAL, vn.IsVector() && vn.IsD()) \
+ V(uabal2, NEON_UABAL2, vn.IsVector() && vn.IsQ()) \
+ V(sabdl, NEON_SABDL, vn.IsVector() && vn.IsD()) \
+ V(sabdl2, NEON_SABDL2, vn.IsVector() && vn.IsQ()) \
+ V(uabdl, NEON_UABDL, vn.IsVector() && vn.IsD()) \
+ V(uabdl2, NEON_UABDL2, vn.IsVector() && vn.IsQ()) \
+ V(smlal, NEON_SMLAL, vn.IsVector() && vn.IsD()) \
+ V(smlal2, NEON_SMLAL2, vn.IsVector() && vn.IsQ()) \
+ V(umlal, NEON_UMLAL, vn.IsVector() && vn.IsD()) \
+ V(umlal2, NEON_UMLAL2, vn.IsVector() && vn.IsQ()) \
+ V(smlsl, NEON_SMLSL, vn.IsVector() && vn.IsD()) \
+ V(smlsl2, NEON_SMLSL2, vn.IsVector() && vn.IsQ()) \
+ V(umlsl, NEON_UMLSL, vn.IsVector() && vn.IsD()) \
+ V(umlsl2, NEON_UMLSL2, vn.IsVector() && vn.IsQ()) \
+ V(smull, NEON_SMULL, vn.IsVector() && vn.IsD()) \
+ V(smull2, NEON_SMULL2, vn.IsVector() && vn.IsQ()) \
+ V(umull, NEON_UMULL, vn.IsVector() && vn.IsD()) \
+ V(umull2, NEON_UMULL2, vn.IsVector() && vn.IsQ()) \
+ V(ssubl, NEON_SSUBL, vn.IsVector() && vn.IsD()) \
+ V(ssubl2, NEON_SSUBL2, vn.IsVector() && vn.IsQ()) \
+ V(uaddl, NEON_UADDL, vn.IsVector() && vn.IsD()) \
+ V(uaddl2, NEON_UADDL2, vn.IsVector() && vn.IsQ()) \
+ V(usubl, NEON_USUBL, vn.IsVector() && vn.IsD()) \
+ V(usubl2, NEON_USUBL2, vn.IsVector() && vn.IsQ()) \
+ V(sqdmlal, NEON_SQDMLAL, vn.Is1H() || vn.Is1S() || vn.Is4H() || vn.Is2S()) \
+ V(sqdmlal2, NEON_SQDMLAL2, vn.Is1H() || vn.Is1S() || vn.Is8H() || vn.Is4S()) \
+ V(sqdmlsl, NEON_SQDMLSL, vn.Is1H() || vn.Is1S() || vn.Is4H() || vn.Is2S()) \
+ V(sqdmlsl2, NEON_SQDMLSL2, vn.Is1H() || vn.Is1S() || vn.Is8H() || vn.Is4S()) \
+ V(sqdmull, NEON_SQDMULL, vn.Is1H() || vn.Is1S() || vn.Is4H() || vn.Is2S()) \
+ V(sqdmull2, NEON_SQDMULL2, vn.Is1H() || vn.Is1S() || vn.Is8H() || vn.Is4S())
+
+#define DEFINE_ASM_FUNC(FN, OP, AS) \
+ void Assembler::FN(const VRegister& vd, const VRegister& vn, \
+ const VRegister& vm) { \
+ DCHECK(AS); \
+ NEON3DifferentL(vd, vn, vm, OP); \
+ }
+NEON_3DIFF_LONG_LIST(DEFINE_ASM_FUNC)
+#undef DEFINE_ASM_FUNC
+
+#define NEON_3DIFF_HN_LIST(V) \
+ V(addhn, NEON_ADDHN, vd.IsD()) \
+ V(addhn2, NEON_ADDHN2, vd.IsQ()) \
+ V(raddhn, NEON_RADDHN, vd.IsD()) \
+ V(raddhn2, NEON_RADDHN2, vd.IsQ()) \
+ V(subhn, NEON_SUBHN, vd.IsD()) \
+ V(subhn2, NEON_SUBHN2, vd.IsQ()) \
+ V(rsubhn, NEON_RSUBHN, vd.IsD()) \
+ V(rsubhn2, NEON_RSUBHN2, vd.IsQ())
+
+#define DEFINE_ASM_FUNC(FN, OP, AS) \
+ void Assembler::FN(const VRegister& vd, const VRegister& vn, \
+ const VRegister& vm) { \
+ DCHECK(AS); \
+ NEON3DifferentHN(vd, vn, vm, OP); \
+ }
+NEON_3DIFF_HN_LIST(DEFINE_ASM_FUNC)
+#undef DEFINE_ASM_FUNC
+
+void Assembler::NEONPerm(const VRegister& vd, const VRegister& vn,
+ const VRegister& vm, NEONPermOp op) {
+ DCHECK(AreSameFormat(vd, vn, vm));
+ DCHECK(!vd.Is1D());
+ Emit(VFormat(vd) | op | Rm(vm) | Rn(vn) | Rd(vd));
}
+void Assembler::trn1(const VRegister& vd, const VRegister& vn,
+ const VRegister& vm) {
+ NEONPerm(vd, vn, vm, NEON_TRN1);
+}
-void Assembler::mvn(const Register& rd, const Operand& operand) {
- orn(rd, AppropriateZeroRegFor(rd), operand);
+void Assembler::trn2(const VRegister& vd, const VRegister& vn,
+ const VRegister& vm) {
+ NEONPerm(vd, vn, vm, NEON_TRN2);
}
+void Assembler::uzp1(const VRegister& vd, const VRegister& vn,
+ const VRegister& vm) {
+ NEONPerm(vd, vn, vm, NEON_UZP1);
+}
-void Assembler::mrs(const Register& rt, SystemRegister sysreg) {
- DCHECK(rt.Is64Bits());
- Emit(MRS | ImmSystemRegister(sysreg) | Rt(rt));
+void Assembler::uzp2(const VRegister& vd, const VRegister& vn,
+ const VRegister& vm) {
+ NEONPerm(vd, vn, vm, NEON_UZP2);
}
+void Assembler::zip1(const VRegister& vd, const VRegister& vn,
+ const VRegister& vm) {
+ NEONPerm(vd, vn, vm, NEON_ZIP1);
+}
-void Assembler::msr(SystemRegister sysreg, const Register& rt) {
- DCHECK(rt.Is64Bits());
- Emit(MSR | Rt(rt) | ImmSystemRegister(sysreg));
+void Assembler::zip2(const VRegister& vd, const VRegister& vn,
+ const VRegister& vm) {
+ NEONPerm(vd, vn, vm, NEON_ZIP2);
}
+void Assembler::NEONShiftImmediate(const VRegister& vd, const VRegister& vn,
+ NEONShiftImmediateOp op, int immh_immb) {
+ DCHECK(AreSameFormat(vd, vn));
+ Instr q, scalar;
+ if (vn.IsScalar()) {
+ q = NEON_Q;
+ scalar = NEONScalar;
+ } else {
+ q = vd.IsD() ? 0 : NEON_Q;
+ scalar = 0;
+ }
+ Emit(q | op | scalar | immh_immb | Rn(vn) | Rd(vd));
+}
+
+void Assembler::NEONShiftLeftImmediate(const VRegister& vd, const VRegister& vn,
+ int shift, NEONShiftImmediateOp op) {
+ int laneSizeInBits = vn.LaneSizeInBits();
+ DCHECK((shift >= 0) && (shift < laneSizeInBits));
+ NEONShiftImmediate(vd, vn, op, (laneSizeInBits + shift) << 16);
+}
+
+void Assembler::NEONShiftRightImmediate(const VRegister& vd,
+ const VRegister& vn, int shift,
+ NEONShiftImmediateOp op) {
+ int laneSizeInBits = vn.LaneSizeInBits();
+ DCHECK((shift >= 1) && (shift <= laneSizeInBits));
+ NEONShiftImmediate(vd, vn, op, ((2 * laneSizeInBits) - shift) << 16);
+}
+
+void Assembler::NEONShiftImmediateL(const VRegister& vd, const VRegister& vn,
+ int shift, NEONShiftImmediateOp op) {
+ int laneSizeInBits = vn.LaneSizeInBits();
+ DCHECK((shift >= 0) && (shift < laneSizeInBits));
+ int immh_immb = (laneSizeInBits + shift) << 16;
+
+ DCHECK((vn.Is8B() && vd.Is8H()) || (vn.Is4H() && vd.Is4S()) ||
+ (vn.Is2S() && vd.Is2D()) || (vn.Is16B() && vd.Is8H()) ||
+ (vn.Is8H() && vd.Is4S()) || (vn.Is4S() && vd.Is2D()));
+ Instr q;
+ q = vn.IsD() ? 0 : NEON_Q;
+ Emit(q | op | immh_immb | Rn(vn) | Rd(vd));
+}
+
+void Assembler::NEONShiftImmediateN(const VRegister& vd, const VRegister& vn,
+ int shift, NEONShiftImmediateOp op) {
+ Instr q, scalar;
+ int laneSizeInBits = vd.LaneSizeInBits();
+ DCHECK((shift >= 1) && (shift <= laneSizeInBits));
+ int immh_immb = (2 * laneSizeInBits - shift) << 16;
+
+ if (vn.IsScalar()) {
+ DCHECK((vd.Is1B() && vn.Is1H()) || (vd.Is1H() && vn.Is1S()) ||
+ (vd.Is1S() && vn.Is1D()));
+ q = NEON_Q;
+ scalar = NEONScalar;
+ } else {
+ DCHECK((vd.Is8B() && vn.Is8H()) || (vd.Is4H() && vn.Is4S()) ||
+ (vd.Is2S() && vn.Is2D()) || (vd.Is16B() && vn.Is8H()) ||
+ (vd.Is8H() && vn.Is4S()) || (vd.Is4S() && vn.Is2D()));
+ scalar = 0;
+ q = vd.IsD() ? 0 : NEON_Q;
+ }
+ Emit(q | op | scalar | immh_immb | Rn(vn) | Rd(vd));
+}
-void Assembler::hint(SystemHint code) {
- Emit(HINT | ImmHint(code) | Rt(xzr));
+void Assembler::shl(const VRegister& vd, const VRegister& vn, int shift) {
+ DCHECK(vd.IsVector() || vd.Is1D());
+ NEONShiftLeftImmediate(vd, vn, shift, NEON_SHL);
}
+void Assembler::sli(const VRegister& vd, const VRegister& vn, int shift) {
+ DCHECK(vd.IsVector() || vd.Is1D());
+ NEONShiftLeftImmediate(vd, vn, shift, NEON_SLI);
+}
-void Assembler::dmb(BarrierDomain domain, BarrierType type) {
- Emit(DMB | ImmBarrierDomain(domain) | ImmBarrierType(type));
+void Assembler::sqshl(const VRegister& vd, const VRegister& vn, int shift) {
+ NEONShiftLeftImmediate(vd, vn, shift, NEON_SQSHL_imm);
}
+void Assembler::sqshlu(const VRegister& vd, const VRegister& vn, int shift) {
+ NEONShiftLeftImmediate(vd, vn, shift, NEON_SQSHLU);
+}
-void Assembler::dsb(BarrierDomain domain, BarrierType type) {
- Emit(DSB | ImmBarrierDomain(domain) | ImmBarrierType(type));
+void Assembler::uqshl(const VRegister& vd, const VRegister& vn, int shift) {
+ NEONShiftLeftImmediate(vd, vn, shift, NEON_UQSHL_imm);
}
+void Assembler::sshll(const VRegister& vd, const VRegister& vn, int shift) {
+ DCHECK(vn.IsD());
+ NEONShiftImmediateL(vd, vn, shift, NEON_SSHLL);
+}
-void Assembler::isb() {
- Emit(ISB | ImmBarrierDomain(FullSystem) | ImmBarrierType(BarrierAll));
+void Assembler::sshll2(const VRegister& vd, const VRegister& vn, int shift) {
+ DCHECK(vn.IsQ());
+ NEONShiftImmediateL(vd, vn, shift, NEON_SSHLL);
}
+void Assembler::sxtl(const VRegister& vd, const VRegister& vn) {
+ sshll(vd, vn, 0);
+}
-void Assembler::fmov(FPRegister fd, double imm) {
- DCHECK(fd.Is64Bits());
- DCHECK(IsImmFP64(imm));
- Emit(FMOV_d_imm | Rd(fd) | ImmFP64(imm));
+void Assembler::sxtl2(const VRegister& vd, const VRegister& vn) {
+ sshll2(vd, vn, 0);
}
+void Assembler::ushll(const VRegister& vd, const VRegister& vn, int shift) {
+ DCHECK(vn.IsD());
+ NEONShiftImmediateL(vd, vn, shift, NEON_USHLL);
+}
-void Assembler::fmov(FPRegister fd, float imm) {
- DCHECK(fd.Is32Bits());
- DCHECK(IsImmFP32(imm));
- Emit(FMOV_s_imm | Rd(fd) | ImmFP32(imm));
+void Assembler::ushll2(const VRegister& vd, const VRegister& vn, int shift) {
+ DCHECK(vn.IsQ());
+ NEONShiftImmediateL(vd, vn, shift, NEON_USHLL);
}
+void Assembler::uxtl(const VRegister& vd, const VRegister& vn) {
+ ushll(vd, vn, 0);
+}
-void Assembler::fmov(Register rd, FPRegister fn) {
- DCHECK(rd.SizeInBits() == fn.SizeInBits());
- FPIntegerConvertOp op = rd.Is32Bits() ? FMOV_ws : FMOV_xd;
- Emit(op | Rd(rd) | Rn(fn));
+void Assembler::uxtl2(const VRegister& vd, const VRegister& vn) {
+ ushll2(vd, vn, 0);
}
+void Assembler::sri(const VRegister& vd, const VRegister& vn, int shift) {
+ DCHECK(vd.IsVector() || vd.Is1D());
+ NEONShiftRightImmediate(vd, vn, shift, NEON_SRI);
+}
-void Assembler::fmov(FPRegister fd, Register rn) {
- DCHECK(fd.SizeInBits() == rn.SizeInBits());
- FPIntegerConvertOp op = fd.Is32Bits() ? FMOV_sw : FMOV_dx;
- Emit(op | Rd(fd) | Rn(rn));
+void Assembler::sshr(const VRegister& vd, const VRegister& vn, int shift) {
+ DCHECK(vd.IsVector() || vd.Is1D());
+ NEONShiftRightImmediate(vd, vn, shift, NEON_SSHR);
}
+void Assembler::ushr(const VRegister& vd, const VRegister& vn, int shift) {
+ DCHECK(vd.IsVector() || vd.Is1D());
+ NEONShiftRightImmediate(vd, vn, shift, NEON_USHR);
+}
-void Assembler::fmov(FPRegister fd, FPRegister fn) {
- DCHECK(fd.SizeInBits() == fn.SizeInBits());
- Emit(FPType(fd) | FMOV | Rd(fd) | Rn(fn));
+void Assembler::srshr(const VRegister& vd, const VRegister& vn, int shift) {
+ DCHECK(vd.IsVector() || vd.Is1D());
+ NEONShiftRightImmediate(vd, vn, shift, NEON_SRSHR);
}
+void Assembler::urshr(const VRegister& vd, const VRegister& vn, int shift) {
+ DCHECK(vd.IsVector() || vd.Is1D());
+ NEONShiftRightImmediate(vd, vn, shift, NEON_URSHR);
+}
-void Assembler::fadd(const FPRegister& fd,
- const FPRegister& fn,
- const FPRegister& fm) {
- FPDataProcessing2Source(fd, fn, fm, FADD);
+void Assembler::ssra(const VRegister& vd, const VRegister& vn, int shift) {
+ DCHECK(vd.IsVector() || vd.Is1D());
+ NEONShiftRightImmediate(vd, vn, shift, NEON_SSRA);
}
+void Assembler::usra(const VRegister& vd, const VRegister& vn, int shift) {
+ DCHECK(vd.IsVector() || vd.Is1D());
+ NEONShiftRightImmediate(vd, vn, shift, NEON_USRA);
+}
-void Assembler::fsub(const FPRegister& fd,
- const FPRegister& fn,
- const FPRegister& fm) {
- FPDataProcessing2Source(fd, fn, fm, FSUB);
+void Assembler::srsra(const VRegister& vd, const VRegister& vn, int shift) {
+ DCHECK(vd.IsVector() || vd.Is1D());
+ NEONShiftRightImmediate(vd, vn, shift, NEON_SRSRA);
}
+void Assembler::ursra(const VRegister& vd, const VRegister& vn, int shift) {
+ DCHECK(vd.IsVector() || vd.Is1D());
+ NEONShiftRightImmediate(vd, vn, shift, NEON_URSRA);
+}
-void Assembler::fmul(const FPRegister& fd,
- const FPRegister& fn,
- const FPRegister& fm) {
- FPDataProcessing2Source(fd, fn, fm, FMUL);
+void Assembler::shrn(const VRegister& vd, const VRegister& vn, int shift) {
+ DCHECK(vn.IsVector() && vd.IsD());
+ NEONShiftImmediateN(vd, vn, shift, NEON_SHRN);
}
+void Assembler::shrn2(const VRegister& vd, const VRegister& vn, int shift) {
+ DCHECK(vn.IsVector() && vd.IsQ());
+ NEONShiftImmediateN(vd, vn, shift, NEON_SHRN);
+}
-void Assembler::fmadd(const FPRegister& fd,
- const FPRegister& fn,
- const FPRegister& fm,
- const FPRegister& fa) {
- FPDataProcessing3Source(fd, fn, fm, fa, fd.Is32Bits() ? FMADD_s : FMADD_d);
+void Assembler::rshrn(const VRegister& vd, const VRegister& vn, int shift) {
+ DCHECK(vn.IsVector() && vd.IsD());
+ NEONShiftImmediateN(vd, vn, shift, NEON_RSHRN);
}
+void Assembler::rshrn2(const VRegister& vd, const VRegister& vn, int shift) {
+ DCHECK(vn.IsVector() && vd.IsQ());
+ NEONShiftImmediateN(vd, vn, shift, NEON_RSHRN);
+}
-void Assembler::fmsub(const FPRegister& fd,
- const FPRegister& fn,
- const FPRegister& fm,
- const FPRegister& fa) {
- FPDataProcessing3Source(fd, fn, fm, fa, fd.Is32Bits() ? FMSUB_s : FMSUB_d);
+void Assembler::sqshrn(const VRegister& vd, const VRegister& vn, int shift) {
+ DCHECK(vd.IsD() || (vn.IsScalar() && vd.IsScalar()));
+ NEONShiftImmediateN(vd, vn, shift, NEON_SQSHRN);
}
+void Assembler::sqshrn2(const VRegister& vd, const VRegister& vn, int shift) {
+ DCHECK(vn.IsVector() && vd.IsQ());
+ NEONShiftImmediateN(vd, vn, shift, NEON_SQSHRN);
+}
-void Assembler::fnmadd(const FPRegister& fd,
- const FPRegister& fn,
- const FPRegister& fm,
- const FPRegister& fa) {
- FPDataProcessing3Source(fd, fn, fm, fa, fd.Is32Bits() ? FNMADD_s : FNMADD_d);
+void Assembler::sqrshrn(const VRegister& vd, const VRegister& vn, int shift) {
+ DCHECK(vd.IsD() || (vn.IsScalar() && vd.IsScalar()));
+ NEONShiftImmediateN(vd, vn, shift, NEON_SQRSHRN);
}
+void Assembler::sqrshrn2(const VRegister& vd, const VRegister& vn, int shift) {
+ DCHECK(vn.IsVector() && vd.IsQ());
+ NEONShiftImmediateN(vd, vn, shift, NEON_SQRSHRN);
+}
-void Assembler::fnmsub(const FPRegister& fd,
- const FPRegister& fn,
- const FPRegister& fm,
- const FPRegister& fa) {
- FPDataProcessing3Source(fd, fn, fm, fa, fd.Is32Bits() ? FNMSUB_s : FNMSUB_d);
+void Assembler::sqshrun(const VRegister& vd, const VRegister& vn, int shift) {
+ DCHECK(vd.IsD() || (vn.IsScalar() && vd.IsScalar()));
+ NEONShiftImmediateN(vd, vn, shift, NEON_SQSHRUN);
}
+void Assembler::sqshrun2(const VRegister& vd, const VRegister& vn, int shift) {
+ DCHECK(vn.IsVector() && vd.IsQ());
+ NEONShiftImmediateN(vd, vn, shift, NEON_SQSHRUN);
+}
-void Assembler::fdiv(const FPRegister& fd,
- const FPRegister& fn,
- const FPRegister& fm) {
- FPDataProcessing2Source(fd, fn, fm, FDIV);
+void Assembler::sqrshrun(const VRegister& vd, const VRegister& vn, int shift) {
+ DCHECK(vd.IsD() || (vn.IsScalar() && vd.IsScalar()));
+ NEONShiftImmediateN(vd, vn, shift, NEON_SQRSHRUN);
}
+void Assembler::sqrshrun2(const VRegister& vd, const VRegister& vn, int shift) {
+ DCHECK(vn.IsVector() && vd.IsQ());
+ NEONShiftImmediateN(vd, vn, shift, NEON_SQRSHRUN);
+}
-void Assembler::fmax(const FPRegister& fd,
- const FPRegister& fn,
- const FPRegister& fm) {
- FPDataProcessing2Source(fd, fn, fm, FMAX);
+void Assembler::uqshrn(const VRegister& vd, const VRegister& vn, int shift) {
+ DCHECK(vd.IsD() || (vn.IsScalar() && vd.IsScalar()));
+ NEONShiftImmediateN(vd, vn, shift, NEON_UQSHRN);
}
+void Assembler::uqshrn2(const VRegister& vd, const VRegister& vn, int shift) {
+ DCHECK(vn.IsVector() && vd.IsQ());
+ NEONShiftImmediateN(vd, vn, shift, NEON_UQSHRN);
+}
-void Assembler::fmaxnm(const FPRegister& fd,
- const FPRegister& fn,
- const FPRegister& fm) {
- FPDataProcessing2Source(fd, fn, fm, FMAXNM);
+void Assembler::uqrshrn(const VRegister& vd, const VRegister& vn, int shift) {
+ DCHECK(vd.IsD() || (vn.IsScalar() && vd.IsScalar()));
+ NEONShiftImmediateN(vd, vn, shift, NEON_UQRSHRN);
}
+void Assembler::uqrshrn2(const VRegister& vd, const VRegister& vn, int shift) {
+ DCHECK(vn.IsVector() && vd.IsQ());
+ NEONShiftImmediateN(vd, vn, shift, NEON_UQRSHRN);
+}
-void Assembler::fmin(const FPRegister& fd,
- const FPRegister& fn,
- const FPRegister& fm) {
- FPDataProcessing2Source(fd, fn, fm, FMIN);
+void Assembler::uaddw(const VRegister& vd, const VRegister& vn,
+ const VRegister& vm) {
+ DCHECK(vm.IsD());
+ NEON3DifferentW(vd, vn, vm, NEON_UADDW);
}
+void Assembler::uaddw2(const VRegister& vd, const VRegister& vn,
+ const VRegister& vm) {
+ DCHECK(vm.IsQ());
+ NEON3DifferentW(vd, vn, vm, NEON_UADDW2);
+}
-void Assembler::fminnm(const FPRegister& fd,
- const FPRegister& fn,
- const FPRegister& fm) {
- FPDataProcessing2Source(fd, fn, fm, FMINNM);
+void Assembler::saddw(const VRegister& vd, const VRegister& vn,
+ const VRegister& vm) {
+ DCHECK(vm.IsD());
+ NEON3DifferentW(vd, vn, vm, NEON_SADDW);
}
+void Assembler::saddw2(const VRegister& vd, const VRegister& vn,
+ const VRegister& vm) {
+ DCHECK(vm.IsQ());
+ NEON3DifferentW(vd, vn, vm, NEON_SADDW2);
+}
-void Assembler::fabs(const FPRegister& fd,
- const FPRegister& fn) {
- DCHECK(fd.SizeInBits() == fn.SizeInBits());
- FPDataProcessing1Source(fd, fn, FABS);
+void Assembler::usubw(const VRegister& vd, const VRegister& vn,
+ const VRegister& vm) {
+ DCHECK(vm.IsD());
+ NEON3DifferentW(vd, vn, vm, NEON_USUBW);
}
+void Assembler::usubw2(const VRegister& vd, const VRegister& vn,
+ const VRegister& vm) {
+ DCHECK(vm.IsQ());
+ NEON3DifferentW(vd, vn, vm, NEON_USUBW2);
+}
-void Assembler::fneg(const FPRegister& fd,
- const FPRegister& fn) {
- DCHECK(fd.SizeInBits() == fn.SizeInBits());
- FPDataProcessing1Source(fd, fn, FNEG);
+void Assembler::ssubw(const VRegister& vd, const VRegister& vn,
+ const VRegister& vm) {
+ DCHECK(vm.IsD());
+ NEON3DifferentW(vd, vn, vm, NEON_SSUBW);
}
+void Assembler::ssubw2(const VRegister& vd, const VRegister& vn,
+ const VRegister& vm) {
+ DCHECK(vm.IsQ());
+ NEON3DifferentW(vd, vn, vm, NEON_SSUBW2);
+}
-void Assembler::fsqrt(const FPRegister& fd,
- const FPRegister& fn) {
- DCHECK(fd.SizeInBits() == fn.SizeInBits());
- FPDataProcessing1Source(fd, fn, FSQRT);
+void Assembler::mov(const Register& rd, const Register& rm) {
+ // Moves involving the stack pointer are encoded as add immediate with
+ // second operand of zero. Otherwise, orr with first operand zr is
+ // used.
+ if (rd.IsSP() || rm.IsSP()) {
+ add(rd, rm, 0);
+ } else {
+ orr(rd, AppropriateZeroRegFor(rd), rm);
+ }
}
+void Assembler::ins(const VRegister& vd, int vd_index, const Register& rn) {
+ // We support vd arguments of the form vd.VxT() or vd.T(), where x is the
+ // number of lanes, and T is b, h, s or d.
+ int lane_size = vd.LaneSizeInBytes();
+ NEONFormatField format;
+ switch (lane_size) {
+ case 1:
+ format = NEON_16B;
+ DCHECK(rn.IsW());
+ break;
+ case 2:
+ format = NEON_8H;
+ DCHECK(rn.IsW());
+ break;
+ case 4:
+ format = NEON_4S;
+ DCHECK(rn.IsW());
+ break;
+ default:
+ DCHECK_EQ(lane_size, 8);
+ DCHECK(rn.IsX());
+ format = NEON_2D;
+ break;
+ }
-void Assembler::frinta(const FPRegister& fd,
- const FPRegister& fn) {
- DCHECK(fd.SizeInBits() == fn.SizeInBits());
- FPDataProcessing1Source(fd, fn, FRINTA);
+ DCHECK((0 <= vd_index) &&
+ (vd_index < LaneCountFromFormat(static_cast<VectorFormat>(format))));
+ Emit(NEON_INS_GENERAL | ImmNEON5(format, vd_index) | Rn(rn) | Rd(vd));
}
+void Assembler::mov(const Register& rd, const VRegister& vn, int vn_index) {
+ DCHECK_GE(vn.SizeInBytes(), 4);
+ umov(rd, vn, vn_index);
+}
-void Assembler::frintm(const FPRegister& fd,
- const FPRegister& fn) {
- DCHECK(fd.SizeInBits() == fn.SizeInBits());
- FPDataProcessing1Source(fd, fn, FRINTM);
+void Assembler::smov(const Register& rd, const VRegister& vn, int vn_index) {
+ // We support vn arguments of the form vn.VxT() or vn.T(), where x is the
+ // number of lanes, and T is b, h, s.
+ int lane_size = vn.LaneSizeInBytes();
+ NEONFormatField format;
+ Instr q = 0;
+ switch (lane_size) {
+ case 1:
+ format = NEON_16B;
+ break;
+ case 2:
+ format = NEON_8H;
+ break;
+ default:
+ DCHECK_EQ(lane_size, 4);
+ DCHECK(rd.IsX());
+ format = NEON_4S;
+ break;
+ }
+ q = rd.IsW() ? 0 : NEON_Q;
+ DCHECK((0 <= vn_index) &&
+ (vn_index < LaneCountFromFormat(static_cast<VectorFormat>(format))));
+ Emit(q | NEON_SMOV | ImmNEON5(format, vn_index) | Rn(vn) | Rd(rd));
}
+void Assembler::cls(const VRegister& vd, const VRegister& vn) {
+ DCHECK(AreSameFormat(vd, vn));
+ DCHECK(!vd.Is1D() && !vd.Is2D());
+ Emit(VFormat(vn) | NEON_CLS | Rn(vn) | Rd(vd));
+}
-void Assembler::frintn(const FPRegister& fd,
- const FPRegister& fn) {
- DCHECK(fd.SizeInBits() == fn.SizeInBits());
- FPDataProcessing1Source(fd, fn, FRINTN);
+void Assembler::clz(const VRegister& vd, const VRegister& vn) {
+ DCHECK(AreSameFormat(vd, vn));
+ DCHECK(!vd.Is1D() && !vd.Is2D());
+ Emit(VFormat(vn) | NEON_CLZ | Rn(vn) | Rd(vd));
}
+void Assembler::cnt(const VRegister& vd, const VRegister& vn) {
+ DCHECK(AreSameFormat(vd, vn));
+ DCHECK(vd.Is8B() || vd.Is16B());
+ Emit(VFormat(vn) | NEON_CNT | Rn(vn) | Rd(vd));
+}
-void Assembler::frintp(const FPRegister& fd, const FPRegister& fn) {
- DCHECK(fd.SizeInBits() == fn.SizeInBits());
- FPDataProcessing1Source(fd, fn, FRINTP);
+void Assembler::rev16(const VRegister& vd, const VRegister& vn) {
+ DCHECK(AreSameFormat(vd, vn));
+ DCHECK(vd.Is8B() || vd.Is16B());
+ Emit(VFormat(vn) | NEON_REV16 | Rn(vn) | Rd(vd));
}
+void Assembler::rev32(const VRegister& vd, const VRegister& vn) {
+ DCHECK(AreSameFormat(vd, vn));
+ DCHECK(vd.Is8B() || vd.Is16B() || vd.Is4H() || vd.Is8H());
+ Emit(VFormat(vn) | NEON_REV32 | Rn(vn) | Rd(vd));
+}
-void Assembler::frintz(const FPRegister& fd,
- const FPRegister& fn) {
- DCHECK(fd.SizeInBits() == fn.SizeInBits());
- FPDataProcessing1Source(fd, fn, FRINTZ);
+void Assembler::rev64(const VRegister& vd, const VRegister& vn) {
+ DCHECK(AreSameFormat(vd, vn));
+ DCHECK(!vd.Is1D() && !vd.Is2D());
+ Emit(VFormat(vn) | NEON_REV64 | Rn(vn) | Rd(vd));
}
+void Assembler::ursqrte(const VRegister& vd, const VRegister& vn) {
+ DCHECK(AreSameFormat(vd, vn));
+ DCHECK(vd.Is2S() || vd.Is4S());
+ Emit(VFormat(vn) | NEON_URSQRTE | Rn(vn) | Rd(vd));
+}
-void Assembler::fcmp(const FPRegister& fn,
- const FPRegister& fm) {
- DCHECK(fn.SizeInBits() == fm.SizeInBits());
- Emit(FPType(fn) | FCMP | Rm(fm) | Rn(fn));
+void Assembler::urecpe(const VRegister& vd, const VRegister& vn) {
+ DCHECK(AreSameFormat(vd, vn));
+ DCHECK(vd.Is2S() || vd.Is4S());
+ Emit(VFormat(vn) | NEON_URECPE | Rn(vn) | Rd(vd));
}
+void Assembler::NEONAddlp(const VRegister& vd, const VRegister& vn,
+ NEON2RegMiscOp op) {
+ DCHECK((op == NEON_SADDLP) || (op == NEON_UADDLP) || (op == NEON_SADALP) ||
+ (op == NEON_UADALP));
-void Assembler::fcmp(const FPRegister& fn,
- double value) {
- USE(value);
- // Although the fcmp instruction can strictly only take an immediate value of
- // +0.0, we don't need to check for -0.0 because the sign of 0.0 doesn't
- // affect the result of the comparison.
- DCHECK(value == 0.0);
- Emit(FPType(fn) | FCMP_zero | Rn(fn));
+ DCHECK((vn.Is8B() && vd.Is4H()) || (vn.Is4H() && vd.Is2S()) ||
+ (vn.Is2S() && vd.Is1D()) || (vn.Is16B() && vd.Is8H()) ||
+ (vn.Is8H() && vd.Is4S()) || (vn.Is4S() && vd.Is2D()));
+ Emit(VFormat(vn) | op | Rn(vn) | Rd(vd));
}
+void Assembler::saddlp(const VRegister& vd, const VRegister& vn) {
+ NEONAddlp(vd, vn, NEON_SADDLP);
+}
-void Assembler::fccmp(const FPRegister& fn,
- const FPRegister& fm,
- StatusFlags nzcv,
- Condition cond) {
- DCHECK(fn.SizeInBits() == fm.SizeInBits());
- Emit(FPType(fn) | FCCMP | Rm(fm) | Cond(cond) | Rn(fn) | Nzcv(nzcv));
+void Assembler::uaddlp(const VRegister& vd, const VRegister& vn) {
+ NEONAddlp(vd, vn, NEON_UADDLP);
}
+void Assembler::sadalp(const VRegister& vd, const VRegister& vn) {
+ NEONAddlp(vd, vn, NEON_SADALP);
+}
-void Assembler::fcsel(const FPRegister& fd,
- const FPRegister& fn,
- const FPRegister& fm,
- Condition cond) {
- DCHECK(fd.SizeInBits() == fn.SizeInBits());
- DCHECK(fd.SizeInBits() == fm.SizeInBits());
- Emit(FPType(fd) | FCSEL | Rm(fm) | Cond(cond) | Rn(fn) | Rd(fd));
+void Assembler::uadalp(const VRegister& vd, const VRegister& vn) {
+ NEONAddlp(vd, vn, NEON_UADALP);
}
+void Assembler::NEONAcrossLanesL(const VRegister& vd, const VRegister& vn,
+ NEONAcrossLanesOp op) {
+ DCHECK((vn.Is8B() && vd.Is1H()) || (vn.Is16B() && vd.Is1H()) ||
+ (vn.Is4H() && vd.Is1S()) || (vn.Is8H() && vd.Is1S()) ||
+ (vn.Is4S() && vd.Is1D()));
+ Emit(VFormat(vn) | op | Rn(vn) | Rd(vd));
+}
-void Assembler::FPConvertToInt(const Register& rd,
- const FPRegister& fn,
- FPIntegerConvertOp op) {
- Emit(SF(rd) | FPType(fn) | op | Rn(fn) | Rd(rd));
+void Assembler::saddlv(const VRegister& vd, const VRegister& vn) {
+ NEONAcrossLanesL(vd, vn, NEON_SADDLV);
}
+void Assembler::uaddlv(const VRegister& vd, const VRegister& vn) {
+ NEONAcrossLanesL(vd, vn, NEON_UADDLV);
+}
-void Assembler::fcvt(const FPRegister& fd,
- const FPRegister& fn) {
- if (fd.Is64Bits()) {
- // Convert float to double.
- DCHECK(fn.Is32Bits());
- FPDataProcessing1Source(fd, fn, FCVT_ds);
+void Assembler::NEONAcrossLanes(const VRegister& vd, const VRegister& vn,
+ NEONAcrossLanesOp op) {
+ DCHECK((vn.Is8B() && vd.Is1B()) || (vn.Is16B() && vd.Is1B()) ||
+ (vn.Is4H() && vd.Is1H()) || (vn.Is8H() && vd.Is1H()) ||
+ (vn.Is4S() && vd.Is1S()));
+ if ((op & NEONAcrossLanesFPFMask) == NEONAcrossLanesFPFixed) {
+ Emit(FPFormat(vn) | op | Rn(vn) | Rd(vd));
} else {
- // Convert double to float.
- DCHECK(fn.Is64Bits());
- FPDataProcessing1Source(fd, fn, FCVT_sd);
+ Emit(VFormat(vn) | op | Rn(vn) | Rd(vd));
}
}
+#define NEON_ACROSSLANES_LIST(V) \
+ V(fmaxv, NEON_FMAXV, vd.Is1S()) \
+ V(fminv, NEON_FMINV, vd.Is1S()) \
+ V(fmaxnmv, NEON_FMAXNMV, vd.Is1S()) \
+ V(fminnmv, NEON_FMINNMV, vd.Is1S()) \
+ V(addv, NEON_ADDV, true) \
+ V(smaxv, NEON_SMAXV, true) \
+ V(sminv, NEON_SMINV, true) \
+ V(umaxv, NEON_UMAXV, true) \
+ V(uminv, NEON_UMINV, true)
+
+#define DEFINE_ASM_FUNC(FN, OP, AS) \
+ void Assembler::FN(const VRegister& vd, const VRegister& vn) { \
+ DCHECK(AS); \
+ NEONAcrossLanes(vd, vn, OP); \
+ }
+NEON_ACROSSLANES_LIST(DEFINE_ASM_FUNC)
+#undef DEFINE_ASM_FUNC
+
+void Assembler::mov(const VRegister& vd, int vd_index, const Register& rn) {
+ ins(vd, vd_index, rn);
+}
+
+void Assembler::umov(const Register& rd, const VRegister& vn, int vn_index) {
+ // We support vn arguments of the form vn.VxT() or vn.T(), where x is the
+ // number of lanes, and T is b, h, s or d.
+ int lane_size = vn.LaneSizeInBytes();
+ NEONFormatField format;
+ Instr q = 0;
+ switch (lane_size) {
+ case 1:
+ format = NEON_16B;
+ DCHECK(rd.IsW());
+ break;
+ case 2:
+ format = NEON_8H;
+ DCHECK(rd.IsW());
+ break;
+ case 4:
+ format = NEON_4S;
+ DCHECK(rd.IsW());
+ break;
+ default:
+ DCHECK_EQ(lane_size, 8);
+ DCHECK(rd.IsX());
+ format = NEON_2D;
+ q = NEON_Q;
+ break;
+ }
-void Assembler::fcvtau(const Register& rd, const FPRegister& fn) {
- FPConvertToInt(rd, fn, FCVTAU);
+ DCHECK((0 <= vn_index) &&
+ (vn_index < LaneCountFromFormat(static_cast<VectorFormat>(format))));
+ Emit(q | NEON_UMOV | ImmNEON5(format, vn_index) | Rn(vn) | Rd(rd));
}
+void Assembler::mov(const VRegister& vd, const VRegister& vn, int vn_index) {
+ DCHECK(vd.IsScalar());
+ dup(vd, vn, vn_index);
+}
-void Assembler::fcvtas(const Register& rd, const FPRegister& fn) {
- FPConvertToInt(rd, fn, FCVTAS);
+void Assembler::dup(const VRegister& vd, const Register& rn) {
+ DCHECK(!vd.Is1D());
+ DCHECK_EQ(vd.Is2D(), rn.IsX());
+ Instr q = vd.IsD() ? 0 : NEON_Q;
+ Emit(q | NEON_DUP_GENERAL | ImmNEON5(VFormat(vd), 0) | Rn(rn) | Rd(vd));
}
+void Assembler::ins(const VRegister& vd, int vd_index, const VRegister& vn,
+ int vn_index) {
+ DCHECK(AreSameFormat(vd, vn));
+ // We support vd arguments of the form vd.VxT() or vd.T(), where x is the
+ // number of lanes, and T is b, h, s or d.
+ int lane_size = vd.LaneSizeInBytes();
+ NEONFormatField format;
+ switch (lane_size) {
+ case 1:
+ format = NEON_16B;
+ break;
+ case 2:
+ format = NEON_8H;
+ break;
+ case 4:
+ format = NEON_4S;
+ break;
+ default:
+ DCHECK_EQ(lane_size, 8);
+ format = NEON_2D;
+ break;
+ }
-void Assembler::fcvtmu(const Register& rd, const FPRegister& fn) {
- FPConvertToInt(rd, fn, FCVTMU);
+ DCHECK((0 <= vd_index) &&
+ (vd_index < LaneCountFromFormat(static_cast<VectorFormat>(format))));
+ DCHECK((0 <= vn_index) &&
+ (vn_index < LaneCountFromFormat(static_cast<VectorFormat>(format))));
+ Emit(NEON_INS_ELEMENT | ImmNEON5(format, vd_index) |
+ ImmNEON4(format, vn_index) | Rn(vn) | Rd(vd));
}
+void Assembler::NEONTable(const VRegister& vd, const VRegister& vn,
+ const VRegister& vm, NEONTableOp op) {
+ DCHECK(vd.Is16B() || vd.Is8B());
+ DCHECK(vn.Is16B());
+ DCHECK(AreSameFormat(vd, vm));
+ Emit(op | (vd.IsQ() ? NEON_Q : 0) | Rm(vm) | Rn(vn) | Rd(vd));
+}
-void Assembler::fcvtms(const Register& rd, const FPRegister& fn) {
- FPConvertToInt(rd, fn, FCVTMS);
+void Assembler::tbl(const VRegister& vd, const VRegister& vn,
+ const VRegister& vm) {
+ NEONTable(vd, vn, vm, NEON_TBL_1v);
}
+void Assembler::tbl(const VRegister& vd, const VRegister& vn,
+ const VRegister& vn2, const VRegister& vm) {
+ USE(vn2);
+ DCHECK(AreSameFormat(vn, vn2));
+ DCHECK(AreConsecutive(vn, vn2));
+ NEONTable(vd, vn, vm, NEON_TBL_2v);
+}
-void Assembler::fcvtnu(const Register& rd, const FPRegister& fn) {
- FPConvertToInt(rd, fn, FCVTNU);
+void Assembler::tbl(const VRegister& vd, const VRegister& vn,
+ const VRegister& vn2, const VRegister& vn3,
+ const VRegister& vm) {
+ USE(vn2);
+ USE(vn3);
+ DCHECK(AreSameFormat(vn, vn2, vn3));
+ DCHECK(AreConsecutive(vn, vn2, vn3));
+ NEONTable(vd, vn, vm, NEON_TBL_3v);
}
+void Assembler::tbl(const VRegister& vd, const VRegister& vn,
+ const VRegister& vn2, const VRegister& vn3,
+ const VRegister& vn4, const VRegister& vm) {
+ USE(vn2);
+ USE(vn3);
+ USE(vn4);
+ DCHECK(AreSameFormat(vn, vn2, vn3, vn4));
+ DCHECK(AreConsecutive(vn, vn2, vn3, vn4));
+ NEONTable(vd, vn, vm, NEON_TBL_4v);
+}
-void Assembler::fcvtns(const Register& rd, const FPRegister& fn) {
- FPConvertToInt(rd, fn, FCVTNS);
+void Assembler::tbx(const VRegister& vd, const VRegister& vn,
+ const VRegister& vm) {
+ NEONTable(vd, vn, vm, NEON_TBX_1v);
}
+void Assembler::tbx(const VRegister& vd, const VRegister& vn,
+ const VRegister& vn2, const VRegister& vm) {
+ USE(vn2);
+ DCHECK(AreSameFormat(vn, vn2));
+ DCHECK(AreConsecutive(vn, vn2));
+ NEONTable(vd, vn, vm, NEON_TBX_2v);
+}
-void Assembler::fcvtzu(const Register& rd, const FPRegister& fn) {
- FPConvertToInt(rd, fn, FCVTZU);
+void Assembler::tbx(const VRegister& vd, const VRegister& vn,
+ const VRegister& vn2, const VRegister& vn3,
+ const VRegister& vm) {
+ USE(vn2);
+ USE(vn3);
+ DCHECK(AreSameFormat(vn, vn2, vn3));
+ DCHECK(AreConsecutive(vn, vn2, vn3));
+ NEONTable(vd, vn, vm, NEON_TBX_3v);
}
+void Assembler::tbx(const VRegister& vd, const VRegister& vn,
+ const VRegister& vn2, const VRegister& vn3,
+ const VRegister& vn4, const VRegister& vm) {
+ USE(vn2);
+ USE(vn3);
+ USE(vn4);
+ DCHECK(AreSameFormat(vn, vn2, vn3, vn4));
+ DCHECK(AreConsecutive(vn, vn2, vn3, vn4));
+ NEONTable(vd, vn, vm, NEON_TBX_4v);
+}
-void Assembler::fcvtzs(const Register& rd, const FPRegister& fn) {
- FPConvertToInt(rd, fn, FCVTZS);
+void Assembler::mov(const VRegister& vd, int vd_index, const VRegister& vn,
+ int vn_index) {
+ ins(vd, vd_index, vn, vn_index);
}
+void Assembler::mvn(const Register& rd, const Operand& operand) {
+ orn(rd, AppropriateZeroRegFor(rd), operand);
+}
-void Assembler::scvtf(const FPRegister& fd,
- const Register& rn,
- unsigned fbits) {
- if (fbits == 0) {
- Emit(SF(rn) | FPType(fd) | SCVTF | Rn(rn) | Rd(fd));
- } else {
- Emit(SF(rn) | FPType(fd) | SCVTF_fixed | FPScale(64 - fbits) | Rn(rn) |
- Rd(fd));
- }
+void Assembler::mrs(const Register& rt, SystemRegister sysreg) {
+ DCHECK(rt.Is64Bits());
+ Emit(MRS | ImmSystemRegister(sysreg) | Rt(rt));
+}
+
+void Assembler::msr(SystemRegister sysreg, const Register& rt) {
+ DCHECK(rt.Is64Bits());
+ Emit(MSR | Rt(rt) | ImmSystemRegister(sysreg));
}
+void Assembler::hint(SystemHint code) { Emit(HINT | ImmHint(code) | Rt(xzr)); }
-void Assembler::ucvtf(const FPRegister& fd,
- const Register& rn,
- unsigned fbits) {
- if (fbits == 0) {
- Emit(SF(rn) | FPType(fd) | UCVTF | Rn(rn) | Rd(fd));
+// NEON structure loads and stores.
+Instr Assembler::LoadStoreStructAddrModeField(const MemOperand& addr) {
+ Instr addr_field = RnSP(addr.base());
+
+ if (addr.IsPostIndex()) {
+ static_assert(NEONLoadStoreMultiStructPostIndex ==
+ static_cast<NEONLoadStoreMultiStructPostIndexOp>(
+ NEONLoadStoreSingleStructPostIndex),
+ "Opcodes must match for NEON post index memop.");
+
+ addr_field |= NEONLoadStoreMultiStructPostIndex;
+ if (addr.offset() == 0) {
+ addr_field |= RmNot31(addr.regoffset());
+ } else {
+ // The immediate post index addressing mode is indicated by rm = 31.
+ // The immediate is implied by the number of vector registers used.
+ addr_field |= (0x1f << Rm_offset);
+ }
} else {
- Emit(SF(rn) | FPType(fd) | UCVTF_fixed | FPScale(64 - fbits) | Rn(rn) |
- Rd(fd));
+ DCHECK(addr.IsImmediateOffset() && (addr.offset() == 0));
}
+ return addr_field;
}
-
-void Assembler::dcptr(Label* label) {
- RecordRelocInfo(RelocInfo::INTERNAL_REFERENCE);
- if (label->is_bound()) {
- // The label is bound, so it does not need to be updated and the internal
- // reference should be emitted.
- //
- // In this case, label->pos() returns the offset of the label from the
- // start of the buffer.
- internal_reference_positions_.push_back(pc_offset());
- dc64(reinterpret_cast<uintptr_t>(buffer_ + label->pos()));
+void Assembler::LoadStoreStructVerify(const VRegister& vt,
+ const MemOperand& addr, Instr op) {
+#ifdef DEBUG
+ // Assert that addressing mode is either offset (with immediate 0), post
+ // index by immediate of the size of the register list, or post index by a
+ // value in a core register.
+ if (addr.IsImmediateOffset()) {
+ DCHECK_EQ(addr.offset(), 0);
} else {
- int32_t offset;
- if (label->is_linked()) {
- // The label is linked, so the internal reference should be added
- // onto the end of the label's link chain.
- //
- // In this case, label->pos() returns the offset of the last linked
- // instruction from the start of the buffer.
- offset = label->pos() - pc_offset();
- DCHECK(offset != kStartOfLabelLinkChain);
- } else {
- // The label is unused, so it now becomes linked and the internal
- // reference is at the start of the new link chain.
- offset = kStartOfLabelLinkChain;
- }
- // The instruction at pc is now the last link in the label's chain.
- label->link_to(pc_offset());
+ int offset = vt.SizeInBytes();
+ switch (op) {
+ case NEON_LD1_1v:
+ case NEON_ST1_1v:
+ offset *= 1;
+ break;
+ case NEONLoadStoreSingleStructLoad1:
+ case NEONLoadStoreSingleStructStore1:
+ case NEON_LD1R:
+ offset = (offset / vt.LaneCount()) * 1;
+ break;
- // Traditionally the offset to the previous instruction in the chain is
- // encoded in the instruction payload (e.g. branch range) but internal
- // references are not instructions so while unbound they are encoded as
- // two consecutive brk instructions. The two 16-bit immediates are used
- // to encode the offset.
- offset >>= kInstructionSizeLog2;
- DCHECK(is_int32(offset));
- uint32_t high16 = unsigned_bitextract_32(31, 16, offset);
- uint32_t low16 = unsigned_bitextract_32(15, 0, offset);
+ case NEON_LD1_2v:
+ case NEON_ST1_2v:
+ case NEON_LD2:
+ case NEON_ST2:
+ offset *= 2;
+ break;
+ case NEONLoadStoreSingleStructLoad2:
+ case NEONLoadStoreSingleStructStore2:
+ case NEON_LD2R:
+ offset = (offset / vt.LaneCount()) * 2;
+ break;
- brk(high16);
- brk(low16);
+ case NEON_LD1_3v:
+ case NEON_ST1_3v:
+ case NEON_LD3:
+ case NEON_ST3:
+ offset *= 3;
+ break;
+ case NEONLoadStoreSingleStructLoad3:
+ case NEONLoadStoreSingleStructStore3:
+ case NEON_LD3R:
+ offset = (offset / vt.LaneCount()) * 3;
+ break;
+
+ case NEON_LD1_4v:
+ case NEON_ST1_4v:
+ case NEON_LD4:
+ case NEON_ST4:
+ offset *= 4;
+ break;
+ case NEONLoadStoreSingleStructLoad4:
+ case NEONLoadStoreSingleStructStore4:
+ case NEON_LD4R:
+ offset = (offset / vt.LaneCount()) * 4;
+ break;
+ default:
+ UNREACHABLE();
+ }
+ DCHECK(!addr.regoffset().Is(NoReg) || addr.offset() == offset);
}
+#else
+ USE(vt);
+ USE(addr);
+ USE(op);
+#endif
}
+void Assembler::LoadStoreStruct(const VRegister& vt, const MemOperand& addr,
+ NEONLoadStoreMultiStructOp op) {
+ LoadStoreStructVerify(vt, addr, op);
+ DCHECK(vt.IsVector() || vt.Is1D());
+ Emit(op | LoadStoreStructAddrModeField(addr) | LSVFormat(vt) | Rt(vt));
+}
-// Note:
-// Below, a difference in case for the same letter indicates a
-// negated bit.
-// If b is 1, then B is 0.
-Instr Assembler::ImmFP32(float imm) {
- DCHECK(IsImmFP32(imm));
- // bits: aBbb.bbbc.defg.h000.0000.0000.0000.0000
- uint32_t bits = float_to_rawbits(imm);
- // bit7: a000.0000
- uint32_t bit7 = ((bits >> 31) & 0x1) << 7;
- // bit6: 0b00.0000
- uint32_t bit6 = ((bits >> 29) & 0x1) << 6;
- // bit5_to_0: 00cd.efgh
- uint32_t bit5_to_0 = (bits >> 19) & 0x3f;
-
- return (bit7 | bit6 | bit5_to_0) << ImmFP_offset;
+void Assembler::LoadStoreStructSingleAllLanes(const VRegister& vt,
+ const MemOperand& addr,
+ NEONLoadStoreSingleStructOp op) {
+ LoadStoreStructVerify(vt, addr, op);
+ Emit(op | LoadStoreStructAddrModeField(addr) | LSVFormat(vt) | Rt(vt));
}
+void Assembler::ld1(const VRegister& vt, const MemOperand& src) {
+ LoadStoreStruct(vt, src, NEON_LD1_1v);
+}
-Instr Assembler::ImmFP64(double imm) {
- DCHECK(IsImmFP64(imm));
- // bits: aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000
- // 0000.0000.0000.0000.0000.0000.0000.0000
- uint64_t bits = double_to_rawbits(imm);
- // bit7: a000.0000
- uint64_t bit7 = ((bits >> 63) & 0x1) << 7;
- // bit6: 0b00.0000
- uint64_t bit6 = ((bits >> 61) & 0x1) << 6;
- // bit5_to_0: 00cd.efgh
- uint64_t bit5_to_0 = (bits >> 48) & 0x3f;
+void Assembler::ld1(const VRegister& vt, const VRegister& vt2,
+ const MemOperand& src) {
+ USE(vt2);
+ DCHECK(AreSameFormat(vt, vt2));
+ DCHECK(AreConsecutive(vt, vt2));
+ LoadStoreStruct(vt, src, NEON_LD1_2v);
+}
- return static_cast<Instr>((bit7 | bit6 | bit5_to_0) << ImmFP_offset);
+void Assembler::ld1(const VRegister& vt, const VRegister& vt2,
+ const VRegister& vt3, const MemOperand& src) {
+ USE(vt2);
+ USE(vt3);
+ DCHECK(AreSameFormat(vt, vt2, vt3));
+ DCHECK(AreConsecutive(vt, vt2, vt3));
+ LoadStoreStruct(vt, src, NEON_LD1_3v);
}
+void Assembler::ld1(const VRegister& vt, const VRegister& vt2,
+ const VRegister& vt3, const VRegister& vt4,
+ const MemOperand& src) {
+ USE(vt2);
+ USE(vt3);
+ USE(vt4);
+ DCHECK(AreSameFormat(vt, vt2, vt3, vt4));
+ DCHECK(AreConsecutive(vt, vt2, vt3, vt4));
+ LoadStoreStruct(vt, src, NEON_LD1_4v);
+}
-// Code generation helpers.
-void Assembler::MoveWide(const Register& rd,
- uint64_t imm,
- int shift,
- MoveWideImmediateOp mov_op) {
- // Ignore the top 32 bits of an immediate if we're moving to a W register.
- if (rd.Is32Bits()) {
- // Check that the top 32 bits are zero (a positive 32-bit number) or top
- // 33 bits are one (a negative 32-bit number, sign extended to 64 bits).
- DCHECK(((imm >> kWRegSizeInBits) == 0) ||
- ((imm >> (kWRegSizeInBits - 1)) == 0x1ffffffff));
- imm &= kWRegMask;
- }
+void Assembler::ld2(const VRegister& vt, const VRegister& vt2,
+ const MemOperand& src) {
+ USE(vt2);
+ DCHECK(AreSameFormat(vt, vt2));
+ DCHECK(AreConsecutive(vt, vt2));
+ LoadStoreStruct(vt, src, NEON_LD2);
+}
+
+void Assembler::ld2(const VRegister& vt, const VRegister& vt2, int lane,
+ const MemOperand& src) {
+ USE(vt2);
+ DCHECK(AreSameFormat(vt, vt2));
+ DCHECK(AreConsecutive(vt, vt2));
+ LoadStoreStructSingle(vt, lane, src, NEONLoadStoreSingleStructLoad2);
+}
+
+void Assembler::ld2r(const VRegister& vt, const VRegister& vt2,
+ const MemOperand& src) {
+ USE(vt2);
+ DCHECK(AreSameFormat(vt, vt2));
+ DCHECK(AreConsecutive(vt, vt2));
+ LoadStoreStructSingleAllLanes(vt, src, NEON_LD2R);
+}
+
+void Assembler::ld3(const VRegister& vt, const VRegister& vt2,
+ const VRegister& vt3, const MemOperand& src) {
+ USE(vt2);
+ USE(vt3);
+ DCHECK(AreSameFormat(vt, vt2, vt3));
+ DCHECK(AreConsecutive(vt, vt2, vt3));
+ LoadStoreStruct(vt, src, NEON_LD3);
+}
+
+void Assembler::ld3(const VRegister& vt, const VRegister& vt2,
+ const VRegister& vt3, int lane, const MemOperand& src) {
+ USE(vt2);
+ USE(vt3);
+ DCHECK(AreSameFormat(vt, vt2, vt3));
+ DCHECK(AreConsecutive(vt, vt2, vt3));
+ LoadStoreStructSingle(vt, lane, src, NEONLoadStoreSingleStructLoad3);
+}
+
+void Assembler::ld3r(const VRegister& vt, const VRegister& vt2,
+ const VRegister& vt3, const MemOperand& src) {
+ USE(vt2);
+ USE(vt3);
+ DCHECK(AreSameFormat(vt, vt2, vt3));
+ DCHECK(AreConsecutive(vt, vt2, vt3));
+ LoadStoreStructSingleAllLanes(vt, src, NEON_LD3R);
+}
+
+void Assembler::ld4(const VRegister& vt, const VRegister& vt2,
+ const VRegister& vt3, const VRegister& vt4,
+ const MemOperand& src) {
+ USE(vt2);
+ USE(vt3);
+ USE(vt4);
+ DCHECK(AreSameFormat(vt, vt2, vt3, vt4));
+ DCHECK(AreConsecutive(vt, vt2, vt3, vt4));
+ LoadStoreStruct(vt, src, NEON_LD4);
+}
+
+void Assembler::ld4(const VRegister& vt, const VRegister& vt2,
+ const VRegister& vt3, const VRegister& vt4, int lane,
+ const MemOperand& src) {
+ USE(vt2);
+ USE(vt3);
+ USE(vt4);
+ DCHECK(AreSameFormat(vt, vt2, vt3, vt4));
+ DCHECK(AreConsecutive(vt, vt2, vt3, vt4));
+ LoadStoreStructSingle(vt, lane, src, NEONLoadStoreSingleStructLoad4);
+}
+
+void Assembler::ld4r(const VRegister& vt, const VRegister& vt2,
+ const VRegister& vt3, const VRegister& vt4,
+ const MemOperand& src) {
+ USE(vt2);
+ USE(vt3);
+ USE(vt4);
+ DCHECK(AreSameFormat(vt, vt2, vt3, vt4));
+ DCHECK(AreConsecutive(vt, vt2, vt3, vt4));
+ LoadStoreStructSingleAllLanes(vt, src, NEON_LD4R);
+}
+
+void Assembler::st1(const VRegister& vt, const MemOperand& src) {
+ LoadStoreStruct(vt, src, NEON_ST1_1v);
+}
+
+void Assembler::st1(const VRegister& vt, const VRegister& vt2,
+ const MemOperand& src) {
+ USE(vt2);
+ DCHECK(AreSameFormat(vt, vt2));
+ DCHECK(AreConsecutive(vt, vt2));
+ LoadStoreStruct(vt, src, NEON_ST1_2v);
+}
+
+void Assembler::st1(const VRegister& vt, const VRegister& vt2,
+ const VRegister& vt3, const MemOperand& src) {
+ USE(vt2);
+ USE(vt3);
+ DCHECK(AreSameFormat(vt, vt2, vt3));
+ DCHECK(AreConsecutive(vt, vt2, vt3));
+ LoadStoreStruct(vt, src, NEON_ST1_3v);
+}
+
+void Assembler::st1(const VRegister& vt, const VRegister& vt2,
+ const VRegister& vt3, const VRegister& vt4,
+ const MemOperand& src) {
+ USE(vt2);
+ USE(vt3);
+ USE(vt4);
+ DCHECK(AreSameFormat(vt, vt2, vt3, vt4));
+ DCHECK(AreConsecutive(vt, vt2, vt3, vt4));
+ LoadStoreStruct(vt, src, NEON_ST1_4v);
+}
+
+void Assembler::st2(const VRegister& vt, const VRegister& vt2,
+ const MemOperand& dst) {
+ USE(vt2);
+ DCHECK(AreSameFormat(vt, vt2));
+ DCHECK(AreConsecutive(vt, vt2));
+ LoadStoreStruct(vt, dst, NEON_ST2);
+}
+
+void Assembler::st2(const VRegister& vt, const VRegister& vt2, int lane,
+ const MemOperand& dst) {
+ USE(vt2);
+ DCHECK(AreSameFormat(vt, vt2));
+ DCHECK(AreConsecutive(vt, vt2));
+ LoadStoreStructSingle(vt, lane, dst, NEONLoadStoreSingleStructStore2);
+}
+
+void Assembler::st3(const VRegister& vt, const VRegister& vt2,
+ const VRegister& vt3, const MemOperand& dst) {
+ USE(vt2);
+ USE(vt3);
+ DCHECK(AreSameFormat(vt, vt2, vt3));
+ DCHECK(AreConsecutive(vt, vt2, vt3));
+ LoadStoreStruct(vt, dst, NEON_ST3);
+}
+
+void Assembler::st3(const VRegister& vt, const VRegister& vt2,
+ const VRegister& vt3, int lane, const MemOperand& dst) {
+ USE(vt2);
+ USE(vt3);
+ DCHECK(AreSameFormat(vt, vt2, vt3));
+ DCHECK(AreConsecutive(vt, vt2, vt3));
+ LoadStoreStructSingle(vt, lane, dst, NEONLoadStoreSingleStructStore3);
+}
+
+void Assembler::st4(const VRegister& vt, const VRegister& vt2,
+ const VRegister& vt3, const VRegister& vt4,
+ const MemOperand& dst) {
+ USE(vt2);
+ USE(vt3);
+ USE(vt4);
+ DCHECK(AreSameFormat(vt, vt2, vt3, vt4));
+ DCHECK(AreConsecutive(vt, vt2, vt3, vt4));
+ LoadStoreStruct(vt, dst, NEON_ST4);
+}
+
+void Assembler::st4(const VRegister& vt, const VRegister& vt2,
+ const VRegister& vt3, const VRegister& vt4, int lane,
+ const MemOperand& dst) {
+ USE(vt2);
+ USE(vt3);
+ USE(vt4);
+ DCHECK(AreSameFormat(vt, vt2, vt3, vt4));
+ DCHECK(AreConsecutive(vt, vt2, vt3, vt4));
+ LoadStoreStructSingle(vt, lane, dst, NEONLoadStoreSingleStructStore4);
+}
+
+void Assembler::LoadStoreStructSingle(const VRegister& vt, uint32_t lane,
+ const MemOperand& addr,
+ NEONLoadStoreSingleStructOp op) {
+ LoadStoreStructVerify(vt, addr, op);
+
+ // We support vt arguments of the form vt.VxT() or vt.T(), where x is the
+ // number of lanes, and T is b, h, s or d.
+ unsigned lane_size = vt.LaneSizeInBytes();
+ DCHECK_LT(lane, kQRegSize / lane_size);
+
+ // Lane size is encoded in the opcode field. Lane index is encoded in the Q,
+ // S and size fields.
+ lane *= lane_size;
+
+ // Encodings for S[0]/D[0] and S[2]/D[1] are distinguished using the least-
+ // significant bit of the size field, so we increment lane here to account for
+ // that.
+ if (lane_size == 8) lane++;
+
+ Instr size = (lane << NEONLSSize_offset) & NEONLSSize_mask;
+ Instr s = (lane << (NEONS_offset - 2)) & NEONS_mask;
+ Instr q = (lane << (NEONQ_offset - 3)) & NEONQ_mask;
+
+ Instr instr = op;
+ switch (lane_size) {
+ case 1:
+ instr |= NEONLoadStoreSingle_b;
+ break;
+ case 2:
+ instr |= NEONLoadStoreSingle_h;
+ break;
+ case 4:
+ instr |= NEONLoadStoreSingle_s;
+ break;
+ default:
+ DCHECK_EQ(lane_size, 8U);
+ instr |= NEONLoadStoreSingle_d;
+ }
+
+ Emit(instr | LoadStoreStructAddrModeField(addr) | q | size | s | Rt(vt));
+}
+
+void Assembler::ld1(const VRegister& vt, int lane, const MemOperand& src) {
+ LoadStoreStructSingle(vt, lane, src, NEONLoadStoreSingleStructLoad1);
+}
+
+void Assembler::ld1r(const VRegister& vt, const MemOperand& src) {
+ LoadStoreStructSingleAllLanes(vt, src, NEON_LD1R);
+}
+
+void Assembler::st1(const VRegister& vt, int lane, const MemOperand& dst) {
+ LoadStoreStructSingle(vt, lane, dst, NEONLoadStoreSingleStructStore1);
+}
+
+void Assembler::dmb(BarrierDomain domain, BarrierType type) {
+ Emit(DMB | ImmBarrierDomain(domain) | ImmBarrierType(type));
+}
+
+void Assembler::dsb(BarrierDomain domain, BarrierType type) {
+ Emit(DSB | ImmBarrierDomain(domain) | ImmBarrierType(type));
+}
+
+void Assembler::isb() {
+ Emit(ISB | ImmBarrierDomain(FullSystem) | ImmBarrierType(BarrierAll));
+}
+
+void Assembler::fmov(const VRegister& vd, double imm) {
+ if (vd.IsScalar()) {
+ DCHECK(vd.Is1D());
+ Emit(FMOV_d_imm | Rd(vd) | ImmFP(imm));
+ } else {
+ DCHECK(vd.Is2D());
+ Instr op = NEONModifiedImmediate_MOVI | NEONModifiedImmediateOpBit;
+ Emit(NEON_Q | op | ImmNEONFP(imm) | NEONCmode(0xf) | Rd(vd));
+ }
+}
+
+void Assembler::fmov(const VRegister& vd, float imm) {
+ if (vd.IsScalar()) {
+ DCHECK(vd.Is1S());
+ Emit(FMOV_s_imm | Rd(vd) | ImmFP(imm));
+ } else {
+ DCHECK(vd.Is2S() | vd.Is4S());
+ Instr op = NEONModifiedImmediate_MOVI;
+ Instr q = vd.Is4S() ? NEON_Q : 0;
+ Emit(q | op | ImmNEONFP(imm) | NEONCmode(0xf) | Rd(vd));
+ }
+}
+
+void Assembler::fmov(const Register& rd, const VRegister& fn) {
+ DCHECK_EQ(rd.SizeInBits(), fn.SizeInBits());
+ FPIntegerConvertOp op = rd.Is32Bits() ? FMOV_ws : FMOV_xd;
+ Emit(op | Rd(rd) | Rn(fn));
+}
+
+void Assembler::fmov(const VRegister& vd, const Register& rn) {
+ DCHECK_EQ(vd.SizeInBits(), rn.SizeInBits());
+ FPIntegerConvertOp op = vd.Is32Bits() ? FMOV_sw : FMOV_dx;
+ Emit(op | Rd(vd) | Rn(rn));
+}
+
+void Assembler::fmov(const VRegister& vd, const VRegister& vn) {
+ DCHECK_EQ(vd.SizeInBits(), vn.SizeInBits());
+ Emit(FPType(vd) | FMOV | Rd(vd) | Rn(vn));
+}
+
+void Assembler::fmov(const VRegister& vd, int index, const Register& rn) {
+ DCHECK((index == 1) && vd.Is1D() && rn.IsX());
+ USE(index);
+ Emit(FMOV_d1_x | Rd(vd) | Rn(rn));
+}
+
+void Assembler::fmov(const Register& rd, const VRegister& vn, int index) {
+ DCHECK((index == 1) && vn.Is1D() && rd.IsX());
+ USE(index);
+ Emit(FMOV_x_d1 | Rd(rd) | Rn(vn));
+}
+
+void Assembler::fmadd(const VRegister& fd, const VRegister& fn,
+ const VRegister& fm, const VRegister& fa) {
+ FPDataProcessing3Source(fd, fn, fm, fa, fd.Is32Bits() ? FMADD_s : FMADD_d);
+}
+
+void Assembler::fmsub(const VRegister& fd, const VRegister& fn,
+ const VRegister& fm, const VRegister& fa) {
+ FPDataProcessing3Source(fd, fn, fm, fa, fd.Is32Bits() ? FMSUB_s : FMSUB_d);
+}
+
+void Assembler::fnmadd(const VRegister& fd, const VRegister& fn,
+ const VRegister& fm, const VRegister& fa) {
+ FPDataProcessing3Source(fd, fn, fm, fa, fd.Is32Bits() ? FNMADD_s : FNMADD_d);
+}
+
+void Assembler::fnmsub(const VRegister& fd, const VRegister& fn,
+ const VRegister& fm, const VRegister& fa) {
+ FPDataProcessing3Source(fd, fn, fm, fa, fd.Is32Bits() ? FNMSUB_s : FNMSUB_d);
+}
+
+void Assembler::fnmul(const VRegister& vd, const VRegister& vn,
+ const VRegister& vm) {
+ DCHECK(AreSameSizeAndType(vd, vn, vm));
+ Instr op = vd.Is1S() ? FNMUL_s : FNMUL_d;
+ Emit(FPType(vd) | op | Rm(vm) | Rn(vn) | Rd(vd));
+}
+
+void Assembler::fcmp(const VRegister& fn, const VRegister& fm) {
+ DCHECK_EQ(fn.SizeInBits(), fm.SizeInBits());
+ Emit(FPType(fn) | FCMP | Rm(fm) | Rn(fn));
+}
+
+void Assembler::fcmp(const VRegister& fn, double value) {
+ USE(value);
+ // Although the fcmp instruction can strictly only take an immediate value of
+ // +0.0, we don't need to check for -0.0 because the sign of 0.0 doesn't
+ // affect the result of the comparison.
+ DCHECK_EQ(value, 0.0);
+ Emit(FPType(fn) | FCMP_zero | Rn(fn));
+}
+
+void Assembler::fccmp(const VRegister& fn, const VRegister& fm,
+ StatusFlags nzcv, Condition cond) {
+ DCHECK_EQ(fn.SizeInBits(), fm.SizeInBits());
+ Emit(FPType(fn) | FCCMP | Rm(fm) | Cond(cond) | Rn(fn) | Nzcv(nzcv));
+}
+
+void Assembler::fcsel(const VRegister& fd, const VRegister& fn,
+ const VRegister& fm, Condition cond) {
+ DCHECK_EQ(fd.SizeInBits(), fn.SizeInBits());
+ DCHECK_EQ(fd.SizeInBits(), fm.SizeInBits());
+ Emit(FPType(fd) | FCSEL | Rm(fm) | Cond(cond) | Rn(fn) | Rd(fd));
+}
+
+void Assembler::NEONFPConvertToInt(const Register& rd, const VRegister& vn,
+ Instr op) {
+ Emit(SF(rd) | FPType(vn) | op | Rn(vn) | Rd(rd));
+}
+
+void Assembler::NEONFPConvertToInt(const VRegister& vd, const VRegister& vn,
+ Instr op) {
+ if (vn.IsScalar()) {
+ DCHECK((vd.Is1S() && vn.Is1S()) || (vd.Is1D() && vn.Is1D()));
+ op |= NEON_Q | NEONScalar;
+ }
+ Emit(FPFormat(vn) | op | Rn(vn) | Rd(vd));
+}
+
+void Assembler::fcvt(const VRegister& vd, const VRegister& vn) {
+ FPDataProcessing1SourceOp op;
+ if (vd.Is1D()) {
+ DCHECK(vn.Is1S() || vn.Is1H());
+ op = vn.Is1S() ? FCVT_ds : FCVT_dh;
+ } else if (vd.Is1S()) {
+ DCHECK(vn.Is1D() || vn.Is1H());
+ op = vn.Is1D() ? FCVT_sd : FCVT_sh;
+ } else {
+ DCHECK(vd.Is1H());
+ DCHECK(vn.Is1D() || vn.Is1S());
+ op = vn.Is1D() ? FCVT_hd : FCVT_hs;
+ }
+ FPDataProcessing1Source(vd, vn, op);
+}
+
+void Assembler::fcvtl(const VRegister& vd, const VRegister& vn) {
+ DCHECK((vd.Is4S() && vn.Is4H()) || (vd.Is2D() && vn.Is2S()));
+ Instr format = vd.Is2D() ? (1 << NEONSize_offset) : 0;
+ Emit(format | NEON_FCVTL | Rn(vn) | Rd(vd));
+}
+
+void Assembler::fcvtl2(const VRegister& vd, const VRegister& vn) {
+ DCHECK((vd.Is4S() && vn.Is8H()) || (vd.Is2D() && vn.Is4S()));
+ Instr format = vd.Is2D() ? (1 << NEONSize_offset) : 0;
+ Emit(NEON_Q | format | NEON_FCVTL | Rn(vn) | Rd(vd));
+}
+
+void Assembler::fcvtn(const VRegister& vd, const VRegister& vn) {
+ DCHECK((vn.Is4S() && vd.Is4H()) || (vn.Is2D() && vd.Is2S()));
+ Instr format = vn.Is2D() ? (1 << NEONSize_offset) : 0;
+ Emit(format | NEON_FCVTN | Rn(vn) | Rd(vd));
+}
+
+void Assembler::fcvtn2(const VRegister& vd, const VRegister& vn) {
+ DCHECK((vn.Is4S() && vd.Is8H()) || (vn.Is2D() && vd.Is4S()));
+ Instr format = vn.Is2D() ? (1 << NEONSize_offset) : 0;
+ Emit(NEON_Q | format | NEON_FCVTN | Rn(vn) | Rd(vd));
+}
+
+void Assembler::fcvtxn(const VRegister& vd, const VRegister& vn) {
+ Instr format = 1 << NEONSize_offset;
+ if (vd.IsScalar()) {
+ DCHECK(vd.Is1S() && vn.Is1D());
+ Emit(format | NEON_FCVTXN_scalar | Rn(vn) | Rd(vd));
+ } else {
+ DCHECK(vd.Is2S() && vn.Is2D());
+ Emit(format | NEON_FCVTXN | Rn(vn) | Rd(vd));
+ }
+}
+
+void Assembler::fcvtxn2(const VRegister& vd, const VRegister& vn) {
+ DCHECK(vd.Is4S() && vn.Is2D());
+ Instr format = 1 << NEONSize_offset;
+ Emit(NEON_Q | format | NEON_FCVTXN | Rn(vn) | Rd(vd));
+}
+
+#define NEON_FP2REGMISC_FCVT_LIST(V) \
+ V(fcvtnu, NEON_FCVTNU, FCVTNU) \
+ V(fcvtns, NEON_FCVTNS, FCVTNS) \
+ V(fcvtpu, NEON_FCVTPU, FCVTPU) \
+ V(fcvtps, NEON_FCVTPS, FCVTPS) \
+ V(fcvtmu, NEON_FCVTMU, FCVTMU) \
+ V(fcvtms, NEON_FCVTMS, FCVTMS) \
+ V(fcvtau, NEON_FCVTAU, FCVTAU) \
+ V(fcvtas, NEON_FCVTAS, FCVTAS)
+
+#define DEFINE_ASM_FUNCS(FN, VEC_OP, SCA_OP) \
+ void Assembler::FN(const Register& rd, const VRegister& vn) { \
+ NEONFPConvertToInt(rd, vn, SCA_OP); \
+ } \
+ void Assembler::FN(const VRegister& vd, const VRegister& vn) { \
+ NEONFPConvertToInt(vd, vn, VEC_OP); \
+ }
+NEON_FP2REGMISC_FCVT_LIST(DEFINE_ASM_FUNCS)
+#undef DEFINE_ASM_FUNCS
+
+void Assembler::scvtf(const VRegister& vd, const VRegister& vn, int fbits) {
+ DCHECK_GE(fbits, 0);
+ if (fbits == 0) {
+ NEONFP2RegMisc(vd, vn, NEON_SCVTF);
+ } else {
+ DCHECK(vd.Is1D() || vd.Is1S() || vd.Is2D() || vd.Is2S() || vd.Is4S());
+ NEONShiftRightImmediate(vd, vn, fbits, NEON_SCVTF_imm);
+ }
+}
+
+void Assembler::ucvtf(const VRegister& vd, const VRegister& vn, int fbits) {
+ DCHECK_GE(fbits, 0);
+ if (fbits == 0) {
+ NEONFP2RegMisc(vd, vn, NEON_UCVTF);
+ } else {
+ DCHECK(vd.Is1D() || vd.Is1S() || vd.Is2D() || vd.Is2S() || vd.Is4S());
+ NEONShiftRightImmediate(vd, vn, fbits, NEON_UCVTF_imm);
+ }
+}
+
+void Assembler::scvtf(const VRegister& vd, const Register& rn, int fbits) {
+ DCHECK_GE(fbits, 0);
+ if (fbits == 0) {
+ Emit(SF(rn) | FPType(vd) | SCVTF | Rn(rn) | Rd(vd));
+ } else {
+ Emit(SF(rn) | FPType(vd) | SCVTF_fixed | FPScale(64 - fbits) | Rn(rn) |
+ Rd(vd));
+ }
+}
+
+void Assembler::ucvtf(const VRegister& fd, const Register& rn, int fbits) {
+ DCHECK_GE(fbits, 0);
+ if (fbits == 0) {
+ Emit(SF(rn) | FPType(fd) | UCVTF | Rn(rn) | Rd(fd));
+ } else {
+ Emit(SF(rn) | FPType(fd) | UCVTF_fixed | FPScale(64 - fbits) | Rn(rn) |
+ Rd(fd));
+ }
+}
+
+void Assembler::NEON3Same(const VRegister& vd, const VRegister& vn,
+ const VRegister& vm, NEON3SameOp vop) {
+ DCHECK(AreSameFormat(vd, vn, vm));
+ DCHECK(vd.IsVector() || !vd.IsQ());
+
+ Instr format, op = vop;
+ if (vd.IsScalar()) {
+ op |= NEON_Q | NEONScalar;
+ format = SFormat(vd);
+ } else {
+ format = VFormat(vd);
+ }
+
+ Emit(format | op | Rm(vm) | Rn(vn) | Rd(vd));
+}
+
+void Assembler::NEONFP3Same(const VRegister& vd, const VRegister& vn,
+ const VRegister& vm, Instr op) {
+ DCHECK(AreSameFormat(vd, vn, vm));
+ Emit(FPFormat(vd) | op | Rm(vm) | Rn(vn) | Rd(vd));
+}
+
+#define NEON_FP2REGMISC_LIST(V) \
+ V(fabs, NEON_FABS, FABS) \
+ V(fneg, NEON_FNEG, FNEG) \
+ V(fsqrt, NEON_FSQRT, FSQRT) \
+ V(frintn, NEON_FRINTN, FRINTN) \
+ V(frinta, NEON_FRINTA, FRINTA) \
+ V(frintp, NEON_FRINTP, FRINTP) \
+ V(frintm, NEON_FRINTM, FRINTM) \
+ V(frintx, NEON_FRINTX, FRINTX) \
+ V(frintz, NEON_FRINTZ, FRINTZ) \
+ V(frinti, NEON_FRINTI, FRINTI) \
+ V(frsqrte, NEON_FRSQRTE, NEON_FRSQRTE_scalar) \
+ V(frecpe, NEON_FRECPE, NEON_FRECPE_scalar)
+
+#define DEFINE_ASM_FUNC(FN, VEC_OP, SCA_OP) \
+ void Assembler::FN(const VRegister& vd, const VRegister& vn) { \
+ Instr op; \
+ if (vd.IsScalar()) { \
+ DCHECK(vd.Is1S() || vd.Is1D()); \
+ op = SCA_OP; \
+ } else { \
+ DCHECK(vd.Is2S() || vd.Is2D() || vd.Is4S()); \
+ op = VEC_OP; \
+ } \
+ NEONFP2RegMisc(vd, vn, op); \
+ }
+NEON_FP2REGMISC_LIST(DEFINE_ASM_FUNC)
+#undef DEFINE_ASM_FUNC
+
+void Assembler::shll(const VRegister& vd, const VRegister& vn, int shift) {
+ DCHECK((vd.Is8H() && vn.Is8B() && shift == 8) ||
+ (vd.Is4S() && vn.Is4H() && shift == 16) ||
+ (vd.Is2D() && vn.Is2S() && shift == 32));
+ USE(shift);
+ Emit(VFormat(vn) | NEON_SHLL | Rn(vn) | Rd(vd));
+}
+
+void Assembler::shll2(const VRegister& vd, const VRegister& vn, int shift) {
+ USE(shift);
+ DCHECK((vd.Is8H() && vn.Is16B() && shift == 8) ||
+ (vd.Is4S() && vn.Is8H() && shift == 16) ||
+ (vd.Is2D() && vn.Is4S() && shift == 32));
+ Emit(VFormat(vn) | NEON_SHLL | Rn(vn) | Rd(vd));
+}
+
+void Assembler::NEONFP2RegMisc(const VRegister& vd, const VRegister& vn,
+ NEON2RegMiscOp vop, double value) {
+ DCHECK(AreSameFormat(vd, vn));
+ DCHECK_EQ(value, 0.0);
+ USE(value);
+
+ Instr op = vop;
+ if (vd.IsScalar()) {
+ DCHECK(vd.Is1S() || vd.Is1D());
+ op |= NEON_Q | NEONScalar;
+ } else {
+ DCHECK(vd.Is2S() || vd.Is2D() || vd.Is4S());
+ }
+
+ Emit(FPFormat(vd) | op | Rn(vn) | Rd(vd));
+}
+
+void Assembler::fcmeq(const VRegister& vd, const VRegister& vn, double value) {
+ NEONFP2RegMisc(vd, vn, NEON_FCMEQ_zero, value);
+}
+
+void Assembler::fcmge(const VRegister& vd, const VRegister& vn, double value) {
+ NEONFP2RegMisc(vd, vn, NEON_FCMGE_zero, value);
+}
+
+void Assembler::fcmgt(const VRegister& vd, const VRegister& vn, double value) {
+ NEONFP2RegMisc(vd, vn, NEON_FCMGT_zero, value);
+}
+
+void Assembler::fcmle(const VRegister& vd, const VRegister& vn, double value) {
+ NEONFP2RegMisc(vd, vn, NEON_FCMLE_zero, value);
+}
+
+void Assembler::fcmlt(const VRegister& vd, const VRegister& vn, double value) {
+ NEONFP2RegMisc(vd, vn, NEON_FCMLT_zero, value);
+}
+
+void Assembler::frecpx(const VRegister& vd, const VRegister& vn) {
+ DCHECK(vd.IsScalar());
+ DCHECK(AreSameFormat(vd, vn));
+ DCHECK(vd.Is1S() || vd.Is1D());
+ Emit(FPFormat(vd) | NEON_FRECPX_scalar | Rn(vn) | Rd(vd));
+}
+
+void Assembler::fcvtzs(const Register& rd, const VRegister& vn, int fbits) {
+ DCHECK(vn.Is1S() || vn.Is1D());
+ DCHECK((fbits >= 0) && (fbits <= rd.SizeInBits()));
+ if (fbits == 0) {
+ Emit(SF(rd) | FPType(vn) | FCVTZS | Rn(vn) | Rd(rd));
+ } else {
+ Emit(SF(rd) | FPType(vn) | FCVTZS_fixed | FPScale(64 - fbits) | Rn(vn) |
+ Rd(rd));
+ }
+}
+
+void Assembler::fcvtzs(const VRegister& vd, const VRegister& vn, int fbits) {
+ DCHECK_GE(fbits, 0);
+ if (fbits == 0) {
+ NEONFP2RegMisc(vd, vn, NEON_FCVTZS);
+ } else {
+ DCHECK(vd.Is1D() || vd.Is1S() || vd.Is2D() || vd.Is2S() || vd.Is4S());
+ NEONShiftRightImmediate(vd, vn, fbits, NEON_FCVTZS_imm);
+ }
+}
+
+void Assembler::fcvtzu(const Register& rd, const VRegister& vn, int fbits) {
+ DCHECK(vn.Is1S() || vn.Is1D());
+ DCHECK((fbits >= 0) && (fbits <= rd.SizeInBits()));
+ if (fbits == 0) {
+ Emit(SF(rd) | FPType(vn) | FCVTZU | Rn(vn) | Rd(rd));
+ } else {
+ Emit(SF(rd) | FPType(vn) | FCVTZU_fixed | FPScale(64 - fbits) | Rn(vn) |
+ Rd(rd));
+ }
+}
+
+void Assembler::fcvtzu(const VRegister& vd, const VRegister& vn, int fbits) {
+ DCHECK_GE(fbits, 0);
+ if (fbits == 0) {
+ NEONFP2RegMisc(vd, vn, NEON_FCVTZU);
+ } else {
+ DCHECK(vd.Is1D() || vd.Is1S() || vd.Is2D() || vd.Is2S() || vd.Is4S());
+ NEONShiftRightImmediate(vd, vn, fbits, NEON_FCVTZU_imm);
+ }
+}
+
+void Assembler::NEONFP2RegMisc(const VRegister& vd, const VRegister& vn,
+ Instr op) {
+ DCHECK(AreSameFormat(vd, vn));
+ Emit(FPFormat(vd) | op | Rn(vn) | Rd(vd));
+}
+
+void Assembler::NEON2RegMisc(const VRegister& vd, const VRegister& vn,
+ NEON2RegMiscOp vop, int value) {
+ DCHECK(AreSameFormat(vd, vn));
+ DCHECK_EQ(value, 0);
+ USE(value);
+
+ Instr format, op = vop;
+ if (vd.IsScalar()) {
+ op |= NEON_Q | NEONScalar;
+ format = SFormat(vd);
+ } else {
+ format = VFormat(vd);
+ }
+
+ Emit(format | op | Rn(vn) | Rd(vd));
+}
+
+void Assembler::cmeq(const VRegister& vd, const VRegister& vn, int value) {
+ DCHECK(vd.IsVector() || vd.Is1D());
+ NEON2RegMisc(vd, vn, NEON_CMEQ_zero, value);
+}
+
+void Assembler::cmge(const VRegister& vd, const VRegister& vn, int value) {
+ DCHECK(vd.IsVector() || vd.Is1D());
+ NEON2RegMisc(vd, vn, NEON_CMGE_zero, value);
+}
+
+void Assembler::cmgt(const VRegister& vd, const VRegister& vn, int value) {
+ DCHECK(vd.IsVector() || vd.Is1D());
+ NEON2RegMisc(vd, vn, NEON_CMGT_zero, value);
+}
+
+void Assembler::cmle(const VRegister& vd, const VRegister& vn, int value) {
+ DCHECK(vd.IsVector() || vd.Is1D());
+ NEON2RegMisc(vd, vn, NEON_CMLE_zero, value);
+}
+
+void Assembler::cmlt(const VRegister& vd, const VRegister& vn, int value) {
+ DCHECK(vd.IsVector() || vd.Is1D());
+ NEON2RegMisc(vd, vn, NEON_CMLT_zero, value);
+}
+
+#define NEON_3SAME_LIST(V) \
+ V(add, NEON_ADD, vd.IsVector() || vd.Is1D()) \
+ V(addp, NEON_ADDP, vd.IsVector() || vd.Is1D()) \
+ V(sub, NEON_SUB, vd.IsVector() || vd.Is1D()) \
+ V(cmeq, NEON_CMEQ, vd.IsVector() || vd.Is1D()) \
+ V(cmge, NEON_CMGE, vd.IsVector() || vd.Is1D()) \
+ V(cmgt, NEON_CMGT, vd.IsVector() || vd.Is1D()) \
+ V(cmhi, NEON_CMHI, vd.IsVector() || vd.Is1D()) \
+ V(cmhs, NEON_CMHS, vd.IsVector() || vd.Is1D()) \
+ V(cmtst, NEON_CMTST, vd.IsVector() || vd.Is1D()) \
+ V(sshl, NEON_SSHL, vd.IsVector() || vd.Is1D()) \
+ V(ushl, NEON_USHL, vd.IsVector() || vd.Is1D()) \
+ V(srshl, NEON_SRSHL, vd.IsVector() || vd.Is1D()) \
+ V(urshl, NEON_URSHL, vd.IsVector() || vd.Is1D()) \
+ V(sqdmulh, NEON_SQDMULH, vd.IsLaneSizeH() || vd.IsLaneSizeS()) \
+ V(sqrdmulh, NEON_SQRDMULH, vd.IsLaneSizeH() || vd.IsLaneSizeS()) \
+ V(shadd, NEON_SHADD, vd.IsVector() && !vd.IsLaneSizeD()) \
+ V(uhadd, NEON_UHADD, vd.IsVector() && !vd.IsLaneSizeD()) \
+ V(srhadd, NEON_SRHADD, vd.IsVector() && !vd.IsLaneSizeD()) \
+ V(urhadd, NEON_URHADD, vd.IsVector() && !vd.IsLaneSizeD()) \
+ V(shsub, NEON_SHSUB, vd.IsVector() && !vd.IsLaneSizeD()) \
+ V(uhsub, NEON_UHSUB, vd.IsVector() && !vd.IsLaneSizeD()) \
+ V(smax, NEON_SMAX, vd.IsVector() && !vd.IsLaneSizeD()) \
+ V(smaxp, NEON_SMAXP, vd.IsVector() && !vd.IsLaneSizeD()) \
+ V(smin, NEON_SMIN, vd.IsVector() && !vd.IsLaneSizeD()) \
+ V(sminp, NEON_SMINP, vd.IsVector() && !vd.IsLaneSizeD()) \
+ V(umax, NEON_UMAX, vd.IsVector() && !vd.IsLaneSizeD()) \
+ V(umaxp, NEON_UMAXP, vd.IsVector() && !vd.IsLaneSizeD()) \
+ V(umin, NEON_UMIN, vd.IsVector() && !vd.IsLaneSizeD()) \
+ V(uminp, NEON_UMINP, vd.IsVector() && !vd.IsLaneSizeD()) \
+ V(saba, NEON_SABA, vd.IsVector() && !vd.IsLaneSizeD()) \
+ V(sabd, NEON_SABD, vd.IsVector() && !vd.IsLaneSizeD()) \
+ V(uaba, NEON_UABA, vd.IsVector() && !vd.IsLaneSizeD()) \
+ V(uabd, NEON_UABD, vd.IsVector() && !vd.IsLaneSizeD()) \
+ V(mla, NEON_MLA, vd.IsVector() && !vd.IsLaneSizeD()) \
+ V(mls, NEON_MLS, vd.IsVector() && !vd.IsLaneSizeD()) \
+ V(mul, NEON_MUL, vd.IsVector() && !vd.IsLaneSizeD()) \
+ V(and_, NEON_AND, vd.Is8B() || vd.Is16B()) \
+ V(orr, NEON_ORR, vd.Is8B() || vd.Is16B()) \
+ V(orn, NEON_ORN, vd.Is8B() || vd.Is16B()) \
+ V(eor, NEON_EOR, vd.Is8B() || vd.Is16B()) \
+ V(bic, NEON_BIC, vd.Is8B() || vd.Is16B()) \
+ V(bit, NEON_BIT, vd.Is8B() || vd.Is16B()) \
+ V(bif, NEON_BIF, vd.Is8B() || vd.Is16B()) \
+ V(bsl, NEON_BSL, vd.Is8B() || vd.Is16B()) \
+ V(pmul, NEON_PMUL, vd.Is8B() || vd.Is16B()) \
+ V(uqadd, NEON_UQADD, true) \
+ V(sqadd, NEON_SQADD, true) \
+ V(uqsub, NEON_UQSUB, true) \
+ V(sqsub, NEON_SQSUB, true) \
+ V(sqshl, NEON_SQSHL, true) \
+ V(uqshl, NEON_UQSHL, true) \
+ V(sqrshl, NEON_SQRSHL, true) \
+ V(uqrshl, NEON_UQRSHL, true)
+
+#define DEFINE_ASM_FUNC(FN, OP, AS) \
+ void Assembler::FN(const VRegister& vd, const VRegister& vn, \
+ const VRegister& vm) { \
+ DCHECK(AS); \
+ NEON3Same(vd, vn, vm, OP); \
+ }
+NEON_3SAME_LIST(DEFINE_ASM_FUNC)
+#undef DEFINE_ASM_FUNC
+
+#define NEON_FP3SAME_LIST(V) \
+ V(fadd, NEON_FADD, FADD) \
+ V(fsub, NEON_FSUB, FSUB) \
+ V(fmul, NEON_FMUL, FMUL) \
+ V(fdiv, NEON_FDIV, FDIV) \
+ V(fmax, NEON_FMAX, FMAX) \
+ V(fmaxnm, NEON_FMAXNM, FMAXNM) \
+ V(fmin, NEON_FMIN, FMIN) \
+ V(fminnm, NEON_FMINNM, FMINNM) \
+ V(fmulx, NEON_FMULX, NEON_FMULX_scalar) \
+ V(frecps, NEON_FRECPS, NEON_FRECPS_scalar) \
+ V(frsqrts, NEON_FRSQRTS, NEON_FRSQRTS_scalar) \
+ V(fabd, NEON_FABD, NEON_FABD_scalar) \
+ V(fmla, NEON_FMLA, 0) \
+ V(fmls, NEON_FMLS, 0) \
+ V(facge, NEON_FACGE, NEON_FACGE_scalar) \
+ V(facgt, NEON_FACGT, NEON_FACGT_scalar) \
+ V(fcmeq, NEON_FCMEQ, NEON_FCMEQ_scalar) \
+ V(fcmge, NEON_FCMGE, NEON_FCMGE_scalar) \
+ V(fcmgt, NEON_FCMGT, NEON_FCMGT_scalar) \
+ V(faddp, NEON_FADDP, 0) \
+ V(fmaxp, NEON_FMAXP, 0) \
+ V(fminp, NEON_FMINP, 0) \
+ V(fmaxnmp, NEON_FMAXNMP, 0) \
+ V(fminnmp, NEON_FMINNMP, 0)
+
+#define DEFINE_ASM_FUNC(FN, VEC_OP, SCA_OP) \
+ void Assembler::FN(const VRegister& vd, const VRegister& vn, \
+ const VRegister& vm) { \
+ Instr op; \
+ if ((SCA_OP != 0) && vd.IsScalar()) { \
+ DCHECK(vd.Is1S() || vd.Is1D()); \
+ op = SCA_OP; \
+ } else { \
+ DCHECK(vd.IsVector()); \
+ DCHECK(vd.Is2S() || vd.Is2D() || vd.Is4S()); \
+ op = VEC_OP; \
+ } \
+ NEONFP3Same(vd, vn, vm, op); \
+ }
+NEON_FP3SAME_LIST(DEFINE_ASM_FUNC)
+#undef DEFINE_ASM_FUNC
+
+void Assembler::addp(const VRegister& vd, const VRegister& vn) {
+ DCHECK((vd.Is1D() && vn.Is2D()));
+ Emit(SFormat(vd) | NEON_ADDP_scalar | Rn(vn) | Rd(vd));
+}
+
+void Assembler::faddp(const VRegister& vd, const VRegister& vn) {
+ DCHECK((vd.Is1S() && vn.Is2S()) || (vd.Is1D() && vn.Is2D()));
+ Emit(FPFormat(vd) | NEON_FADDP_scalar | Rn(vn) | Rd(vd));
+}
+
+void Assembler::fmaxp(const VRegister& vd, const VRegister& vn) {
+ DCHECK((vd.Is1S() && vn.Is2S()) || (vd.Is1D() && vn.Is2D()));
+ Emit(FPFormat(vd) | NEON_FMAXP_scalar | Rn(vn) | Rd(vd));
+}
+
+void Assembler::fminp(const VRegister& vd, const VRegister& vn) {
+ DCHECK((vd.Is1S() && vn.Is2S()) || (vd.Is1D() && vn.Is2D()));
+ Emit(FPFormat(vd) | NEON_FMINP_scalar | Rn(vn) | Rd(vd));
+}
+
+void Assembler::fmaxnmp(const VRegister& vd, const VRegister& vn) {
+ DCHECK((vd.Is1S() && vn.Is2S()) || (vd.Is1D() && vn.Is2D()));
+ Emit(FPFormat(vd) | NEON_FMAXNMP_scalar | Rn(vn) | Rd(vd));
+}
+
+void Assembler::fminnmp(const VRegister& vd, const VRegister& vn) {
+ DCHECK((vd.Is1S() && vn.Is2S()) || (vd.Is1D() && vn.Is2D()));
+ Emit(FPFormat(vd) | NEON_FMINNMP_scalar | Rn(vn) | Rd(vd));
+}
+
+void Assembler::orr(const VRegister& vd, const int imm8, const int left_shift) {
+ NEONModifiedImmShiftLsl(vd, imm8, left_shift, NEONModifiedImmediate_ORR);
+}
+
+void Assembler::mov(const VRegister& vd, const VRegister& vn) {
+ DCHECK(AreSameFormat(vd, vn));
+ if (vd.IsD()) {
+ orr(vd.V8B(), vn.V8B(), vn.V8B());
+ } else {
+ DCHECK(vd.IsQ());
+ orr(vd.V16B(), vn.V16B(), vn.V16B());
+ }
+}
+
+void Assembler::bic(const VRegister& vd, const int imm8, const int left_shift) {
+ NEONModifiedImmShiftLsl(vd, imm8, left_shift, NEONModifiedImmediate_BIC);
+}
+
+void Assembler::movi(const VRegister& vd, const uint64_t imm, Shift shift,
+ const int shift_amount) {
+ DCHECK((shift == LSL) || (shift == MSL));
+ if (vd.Is2D() || vd.Is1D()) {
+ DCHECK_EQ(shift_amount, 0);
+ int imm8 = 0;
+ for (int i = 0; i < 8; ++i) {
+ int byte = (imm >> (i * 8)) & 0xff;
+ DCHECK((byte == 0) || (byte == 0xff));
+ if (byte == 0xff) {
+ imm8 |= (1 << i);
+ }
+ }
+ Instr q = vd.Is2D() ? NEON_Q : 0;
+ Emit(q | NEONModImmOp(1) | NEONModifiedImmediate_MOVI |
+ ImmNEONabcdefgh(imm8) | NEONCmode(0xe) | Rd(vd));
+ } else if (shift == LSL) {
+ NEONModifiedImmShiftLsl(vd, static_cast<int>(imm), shift_amount,
+ NEONModifiedImmediate_MOVI);
+ } else {
+ NEONModifiedImmShiftMsl(vd, static_cast<int>(imm), shift_amount,
+ NEONModifiedImmediate_MOVI);
+ }
+}
+
+void Assembler::mvn(const VRegister& vd, const VRegister& vn) {
+ DCHECK(AreSameFormat(vd, vn));
+ if (vd.IsD()) {
+ not_(vd.V8B(), vn.V8B());
+ } else {
+ DCHECK(vd.IsQ());
+ not_(vd.V16B(), vn.V16B());
+ }
+}
+
+void Assembler::mvni(const VRegister& vd, const int imm8, Shift shift,
+ const int shift_amount) {
+ DCHECK((shift == LSL) || (shift == MSL));
+ if (shift == LSL) {
+ NEONModifiedImmShiftLsl(vd, imm8, shift_amount, NEONModifiedImmediate_MVNI);
+ } else {
+ NEONModifiedImmShiftMsl(vd, imm8, shift_amount, NEONModifiedImmediate_MVNI);
+ }
+}
+
+void Assembler::NEONFPByElement(const VRegister& vd, const VRegister& vn,
+ const VRegister& vm, int vm_index,
+ NEONByIndexedElementOp vop) {
+ DCHECK(AreSameFormat(vd, vn));
+ DCHECK((vd.Is2S() && vm.Is1S()) || (vd.Is4S() && vm.Is1S()) ||
+ (vd.Is1S() && vm.Is1S()) || (vd.Is2D() && vm.Is1D()) ||
+ (vd.Is1D() && vm.Is1D()));
+ DCHECK((vm.Is1S() && (vm_index < 4)) || (vm.Is1D() && (vm_index < 2)));
+
+ Instr op = vop;
+ int index_num_bits = vm.Is1S() ? 2 : 1;
+ if (vd.IsScalar()) {
+ op |= NEON_Q | NEONScalar;
+ }
+
+ Emit(FPFormat(vd) | op | ImmNEONHLM(vm_index, index_num_bits) | Rm(vm) |
+ Rn(vn) | Rd(vd));
+}
+
+void Assembler::NEONByElement(const VRegister& vd, const VRegister& vn,
+ const VRegister& vm, int vm_index,
+ NEONByIndexedElementOp vop) {
+ DCHECK(AreSameFormat(vd, vn));
+ DCHECK((vd.Is4H() && vm.Is1H()) || (vd.Is8H() && vm.Is1H()) ||
+ (vd.Is1H() && vm.Is1H()) || (vd.Is2S() && vm.Is1S()) ||
+ (vd.Is4S() && vm.Is1S()) || (vd.Is1S() && vm.Is1S()));
+ DCHECK((vm.Is1H() && (vm.code() < 16) && (vm_index < 8)) ||
+ (vm.Is1S() && (vm_index < 4)));
+
+ Instr format, op = vop;
+ int index_num_bits = vm.Is1H() ? 3 : 2;
+ if (vd.IsScalar()) {
+ op |= NEONScalar | NEON_Q;
+ format = SFormat(vn);
+ } else {
+ format = VFormat(vn);
+ }
+ Emit(format | op | ImmNEONHLM(vm_index, index_num_bits) | Rm(vm) | Rn(vn) |
+ Rd(vd));
+}
+
+void Assembler::NEONByElementL(const VRegister& vd, const VRegister& vn,
+ const VRegister& vm, int vm_index,
+ NEONByIndexedElementOp vop) {
+ DCHECK((vd.Is4S() && vn.Is4H() && vm.Is1H()) ||
+ (vd.Is4S() && vn.Is8H() && vm.Is1H()) ||
+ (vd.Is1S() && vn.Is1H() && vm.Is1H()) ||
+ (vd.Is2D() && vn.Is2S() && vm.Is1S()) ||
+ (vd.Is2D() && vn.Is4S() && vm.Is1S()) ||
+ (vd.Is1D() && vn.Is1S() && vm.Is1S()));
+
+ DCHECK((vm.Is1H() && (vm.code() < 16) && (vm_index < 8)) ||
+ (vm.Is1S() && (vm_index < 4)));
+
+ Instr format, op = vop;
+ int index_num_bits = vm.Is1H() ? 3 : 2;
+ if (vd.IsScalar()) {
+ op |= NEONScalar | NEON_Q;
+ format = SFormat(vn);
+ } else {
+ format = VFormat(vn);
+ }
+ Emit(format | op | ImmNEONHLM(vm_index, index_num_bits) | Rm(vm) | Rn(vn) |
+ Rd(vd));
+}
+
+#define NEON_BYELEMENT_LIST(V) \
+ V(mul, NEON_MUL_byelement, vn.IsVector()) \
+ V(mla, NEON_MLA_byelement, vn.IsVector()) \
+ V(mls, NEON_MLS_byelement, vn.IsVector()) \
+ V(sqdmulh, NEON_SQDMULH_byelement, true) \
+ V(sqrdmulh, NEON_SQRDMULH_byelement, true)
+
+#define DEFINE_ASM_FUNC(FN, OP, AS) \
+ void Assembler::FN(const VRegister& vd, const VRegister& vn, \
+ const VRegister& vm, int vm_index) { \
+ DCHECK(AS); \
+ NEONByElement(vd, vn, vm, vm_index, OP); \
+ }
+NEON_BYELEMENT_LIST(DEFINE_ASM_FUNC)
+#undef DEFINE_ASM_FUNC
+
+#define NEON_FPBYELEMENT_LIST(V) \
+ V(fmul, NEON_FMUL_byelement) \
+ V(fmla, NEON_FMLA_byelement) \
+ V(fmls, NEON_FMLS_byelement) \
+ V(fmulx, NEON_FMULX_byelement)
+
+#define DEFINE_ASM_FUNC(FN, OP) \
+ void Assembler::FN(const VRegister& vd, const VRegister& vn, \
+ const VRegister& vm, int vm_index) { \
+ NEONFPByElement(vd, vn, vm, vm_index, OP); \
+ }
+NEON_FPBYELEMENT_LIST(DEFINE_ASM_FUNC)
+#undef DEFINE_ASM_FUNC
+
+#define NEON_BYELEMENT_LONG_LIST(V) \
+ V(sqdmull, NEON_SQDMULL_byelement, vn.IsScalar() || vn.IsD()) \
+ V(sqdmull2, NEON_SQDMULL_byelement, vn.IsVector() && vn.IsQ()) \
+ V(sqdmlal, NEON_SQDMLAL_byelement, vn.IsScalar() || vn.IsD()) \
+ V(sqdmlal2, NEON_SQDMLAL_byelement, vn.IsVector() && vn.IsQ()) \
+ V(sqdmlsl, NEON_SQDMLSL_byelement, vn.IsScalar() || vn.IsD()) \
+ V(sqdmlsl2, NEON_SQDMLSL_byelement, vn.IsVector() && vn.IsQ()) \
+ V(smull, NEON_SMULL_byelement, vn.IsVector() && vn.IsD()) \
+ V(smull2, NEON_SMULL_byelement, vn.IsVector() && vn.IsQ()) \
+ V(umull, NEON_UMULL_byelement, vn.IsVector() && vn.IsD()) \
+ V(umull2, NEON_UMULL_byelement, vn.IsVector() && vn.IsQ()) \
+ V(smlal, NEON_SMLAL_byelement, vn.IsVector() && vn.IsD()) \
+ V(smlal2, NEON_SMLAL_byelement, vn.IsVector() && vn.IsQ()) \
+ V(umlal, NEON_UMLAL_byelement, vn.IsVector() && vn.IsD()) \
+ V(umlal2, NEON_UMLAL_byelement, vn.IsVector() && vn.IsQ()) \
+ V(smlsl, NEON_SMLSL_byelement, vn.IsVector() && vn.IsD()) \
+ V(smlsl2, NEON_SMLSL_byelement, vn.IsVector() && vn.IsQ()) \
+ V(umlsl, NEON_UMLSL_byelement, vn.IsVector() && vn.IsD()) \
+ V(umlsl2, NEON_UMLSL_byelement, vn.IsVector() && vn.IsQ())
+
+#define DEFINE_ASM_FUNC(FN, OP, AS) \
+ void Assembler::FN(const VRegister& vd, const VRegister& vn, \
+ const VRegister& vm, int vm_index) { \
+ DCHECK(AS); \
+ NEONByElementL(vd, vn, vm, vm_index, OP); \
+ }
+NEON_BYELEMENT_LONG_LIST(DEFINE_ASM_FUNC)
+#undef DEFINE_ASM_FUNC
+
+void Assembler::suqadd(const VRegister& vd, const VRegister& vn) {
+ NEON2RegMisc(vd, vn, NEON_SUQADD);
+}
+
+void Assembler::usqadd(const VRegister& vd, const VRegister& vn) {
+ NEON2RegMisc(vd, vn, NEON_USQADD);
+}
+
+void Assembler::abs(const VRegister& vd, const VRegister& vn) {
+ DCHECK(vd.IsVector() || vd.Is1D());
+ NEON2RegMisc(vd, vn, NEON_ABS);
+}
+
+void Assembler::sqabs(const VRegister& vd, const VRegister& vn) {
+ NEON2RegMisc(vd, vn, NEON_SQABS);
+}
+
+void Assembler::neg(const VRegister& vd, const VRegister& vn) {
+ DCHECK(vd.IsVector() || vd.Is1D());
+ NEON2RegMisc(vd, vn, NEON_NEG);
+}
+
+void Assembler::sqneg(const VRegister& vd, const VRegister& vn) {
+ NEON2RegMisc(vd, vn, NEON_SQNEG);
+}
+
+void Assembler::NEONXtn(const VRegister& vd, const VRegister& vn,
+ NEON2RegMiscOp vop) {
+ Instr format, op = vop;
+ if (vd.IsScalar()) {
+ DCHECK((vd.Is1B() && vn.Is1H()) || (vd.Is1H() && vn.Is1S()) ||
+ (vd.Is1S() && vn.Is1D()));
+ op |= NEON_Q | NEONScalar;
+ format = SFormat(vd);
+ } else {
+ DCHECK((vd.Is8B() && vn.Is8H()) || (vd.Is4H() && vn.Is4S()) ||
+ (vd.Is2S() && vn.Is2D()) || (vd.Is16B() && vn.Is8H()) ||
+ (vd.Is8H() && vn.Is4S()) || (vd.Is4S() && vn.Is2D()));
+ format = VFormat(vd);
+ }
+ Emit(format | op | Rn(vn) | Rd(vd));
+}
+
+void Assembler::xtn(const VRegister& vd, const VRegister& vn) {
+ DCHECK(vd.IsVector() && vd.IsD());
+ NEONXtn(vd, vn, NEON_XTN);
+}
+
+void Assembler::xtn2(const VRegister& vd, const VRegister& vn) {
+ DCHECK(vd.IsVector() && vd.IsQ());
+ NEONXtn(vd, vn, NEON_XTN);
+}
+
+void Assembler::sqxtn(const VRegister& vd, const VRegister& vn) {
+ DCHECK(vd.IsScalar() || vd.IsD());
+ NEONXtn(vd, vn, NEON_SQXTN);
+}
+
+void Assembler::sqxtn2(const VRegister& vd, const VRegister& vn) {
+ DCHECK(vd.IsVector() && vd.IsQ());
+ NEONXtn(vd, vn, NEON_SQXTN);
+}
+
+void Assembler::sqxtun(const VRegister& vd, const VRegister& vn) {
+ DCHECK(vd.IsScalar() || vd.IsD());
+ NEONXtn(vd, vn, NEON_SQXTUN);
+}
+
+void Assembler::sqxtun2(const VRegister& vd, const VRegister& vn) {
+ DCHECK(vd.IsVector() && vd.IsQ());
+ NEONXtn(vd, vn, NEON_SQXTUN);
+}
+
+void Assembler::uqxtn(const VRegister& vd, const VRegister& vn) {
+ DCHECK(vd.IsScalar() || vd.IsD());
+ NEONXtn(vd, vn, NEON_UQXTN);
+}
+
+void Assembler::uqxtn2(const VRegister& vd, const VRegister& vn) {
+ DCHECK(vd.IsVector() && vd.IsQ());
+ NEONXtn(vd, vn, NEON_UQXTN);
+}
+
+// NEON NOT and RBIT are distinguised by bit 22, the bottom bit of "size".
+void Assembler::not_(const VRegister& vd, const VRegister& vn) {
+ DCHECK(AreSameFormat(vd, vn));
+ DCHECK(vd.Is8B() || vd.Is16B());
+ Emit(VFormat(vd) | NEON_RBIT_NOT | Rn(vn) | Rd(vd));
+}
+
+void Assembler::rbit(const VRegister& vd, const VRegister& vn) {
+ DCHECK(AreSameFormat(vd, vn));
+ DCHECK(vd.Is8B() || vd.Is16B());
+ Emit(VFormat(vn) | (1 << NEONSize_offset) | NEON_RBIT_NOT | Rn(vn) | Rd(vd));
+}
+
+void Assembler::ext(const VRegister& vd, const VRegister& vn,
+ const VRegister& vm, int index) {
+ DCHECK(AreSameFormat(vd, vn, vm));
+ DCHECK(vd.Is8B() || vd.Is16B());
+ DCHECK((0 <= index) && (index < vd.LaneCount()));
+ Emit(VFormat(vd) | NEON_EXT | Rm(vm) | ImmNEONExt(index) | Rn(vn) | Rd(vd));
+}
+
+void Assembler::dup(const VRegister& vd, const VRegister& vn, int vn_index) {
+ Instr q, scalar;
+
+ // We support vn arguments of the form vn.VxT() or vn.T(), where x is the
+ // number of lanes, and T is b, h, s or d.
+ int lane_size = vn.LaneSizeInBytes();
+ NEONFormatField format;
+ switch (lane_size) {
+ case 1:
+ format = NEON_16B;
+ break;
+ case 2:
+ format = NEON_8H;
+ break;
+ case 4:
+ format = NEON_4S;
+ break;
+ default:
+ DCHECK_EQ(lane_size, 8);
+ format = NEON_2D;
+ break;
+ }
+
+ if (vd.IsScalar()) {
+ q = NEON_Q;
+ scalar = NEONScalar;
+ } else {
+ DCHECK(!vd.Is1D());
+ q = vd.IsD() ? 0 : NEON_Q;
+ scalar = 0;
+ }
+ Emit(q | scalar | NEON_DUP_ELEMENT | ImmNEON5(format, vn_index) | Rn(vn) |
+ Rd(vd));
+}
+
+void Assembler::dcptr(Label* label) {
+ RecordRelocInfo(RelocInfo::INTERNAL_REFERENCE);
+ if (label->is_bound()) {
+ // The label is bound, so it does not need to be updated and the internal
+ // reference should be emitted.
+ //
+ // In this case, label->pos() returns the offset of the label from the
+ // start of the buffer.
+ internal_reference_positions_.push_back(pc_offset());
+ dc64(reinterpret_cast<uintptr_t>(buffer_ + label->pos()));
+ } else {
+ int32_t offset;
+ if (label->is_linked()) {
+ // The label is linked, so the internal reference should be added
+ // onto the end of the label's link chain.
+ //
+ // In this case, label->pos() returns the offset of the last linked
+ // instruction from the start of the buffer.
+ offset = label->pos() - pc_offset();
+ DCHECK(offset != kStartOfLabelLinkChain);
+ } else {
+ // The label is unused, so it now becomes linked and the internal
+ // reference is at the start of the new link chain.
+ offset = kStartOfLabelLinkChain;
+ }
+ // The instruction at pc is now the last link in the label's chain.
+ label->link_to(pc_offset());
+
+ // Traditionally the offset to the previous instruction in the chain is
+ // encoded in the instruction payload (e.g. branch range) but internal
+ // references are not instructions so while unbound they are encoded as
+ // two consecutive brk instructions. The two 16-bit immediates are used
+ // to encode the offset.
+ offset >>= kInstructionSizeLog2;
+ DCHECK(is_int32(offset));
+ uint32_t high16 = unsigned_bitextract_32(31, 16, offset);
+ uint32_t low16 = unsigned_bitextract_32(15, 0, offset);
+
+ brk(high16);
+ brk(low16);
+ }
+}
+
+// Below, a difference in case for the same letter indicates a
+// negated bit. If b is 1, then B is 0.
+uint32_t Assembler::FPToImm8(double imm) {
+ DCHECK(IsImmFP64(imm));
+ // bits: aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000
+ // 0000.0000.0000.0000.0000.0000.0000.0000
+ uint64_t bits = bit_cast<uint64_t>(imm);
+ // bit7: a000.0000
+ uint64_t bit7 = ((bits >> 63) & 0x1) << 7;
+ // bit6: 0b00.0000
+ uint64_t bit6 = ((bits >> 61) & 0x1) << 6;
+ // bit5_to_0: 00cd.efgh
+ uint64_t bit5_to_0 = (bits >> 48) & 0x3f;
+
+ return static_cast<uint32_t>(bit7 | bit6 | bit5_to_0);
+}
+
+Instr Assembler::ImmFP(double imm) { return FPToImm8(imm) << ImmFP_offset; }
+Instr Assembler::ImmNEONFP(double imm) {
+ return ImmNEONabcdefgh(FPToImm8(imm));
+}
+
+// Code generation helpers.
+void Assembler::MoveWide(const Register& rd, uint64_t imm, int shift,
+ MoveWideImmediateOp mov_op) {
+ // Ignore the top 32 bits of an immediate if we're moving to a W register.
+ if (rd.Is32Bits()) {
+ // Check that the top 32 bits are zero (a positive 32-bit number) or top
+ // 33 bits are one (a negative 32-bit number, sign extended to 64 bits).
+ DCHECK(((imm >> kWRegSizeInBits) == 0) ||
+ ((imm >> (kWRegSizeInBits - 1)) == 0x1ffffffff));
+ imm &= kWRegMask;
+ }
if (shift >= 0) {
// Explicit shift specified.
@@ -2245,13 +3993,9 @@ void Assembler::MoveWide(const Register& rd,
ImmMoveWide(static_cast<int>(imm)) | ShiftMoveWide(shift));
}
-
-void Assembler::AddSub(const Register& rd,
- const Register& rn,
- const Operand& operand,
- FlagsUpdate S,
- AddSubOp op) {
- DCHECK(rd.SizeInBits() == rn.SizeInBits());
+void Assembler::AddSub(const Register& rd, const Register& rn,
+ const Operand& operand, FlagsUpdate S, AddSubOp op) {
+ DCHECK_EQ(rd.SizeInBits(), rn.SizeInBits());
DCHECK(!operand.NeedsRelocation(this));
if (operand.IsImmediate()) {
int64_t immediate = operand.ImmediateValue();
@@ -2260,8 +4004,8 @@ void Assembler::AddSub(const Register& rd,
Emit(SF(rd) | AddSubImmediateFixed | op | Flags(S) |
ImmAddSub(static_cast<int>(immediate)) | dest_reg | RnSP(rn));
} else if (operand.IsShiftedRegister()) {
- DCHECK(operand.reg().SizeInBits() == rd.SizeInBits());
- DCHECK(operand.shift() != ROR);
+ DCHECK_EQ(operand.reg().SizeInBits(), rd.SizeInBits());
+ DCHECK_NE(operand.shift(), ROR);
// For instructions of the form:
// add/sub wsp, <Wn>, <Wm> [, LSL #0-3 ]
@@ -2283,39 +4027,34 @@ void Assembler::AddSub(const Register& rd,
}
}
-
-void Assembler::AddSubWithCarry(const Register& rd,
- const Register& rn,
- const Operand& operand,
- FlagsUpdate S,
+void Assembler::AddSubWithCarry(const Register& rd, const Register& rn,
+ const Operand& operand, FlagsUpdate S,
AddSubWithCarryOp op) {
- DCHECK(rd.SizeInBits() == rn.SizeInBits());
- DCHECK(rd.SizeInBits() == operand.reg().SizeInBits());
+ DCHECK_EQ(rd.SizeInBits(), rn.SizeInBits());
+ DCHECK_EQ(rd.SizeInBits(), operand.reg().SizeInBits());
DCHECK(operand.IsShiftedRegister() && (operand.shift_amount() == 0));
DCHECK(!operand.NeedsRelocation(this));
Emit(SF(rd) | op | Flags(S) | Rm(operand.reg()) | Rn(rn) | Rd(rd));
}
-
void Assembler::hlt(int code) {
DCHECK(is_uint16(code));
Emit(HLT | ImmException(code));
}
-
void Assembler::brk(int code) {
DCHECK(is_uint16(code));
Emit(BRK | ImmException(code));
}
-
void Assembler::EmitStringData(const char* string) {
size_t len = strlen(string) + 1;
- DCHECK(RoundUp(len, kInstructionSize) <= static_cast<size_t>(kGap));
+ DCHECK_LE(RoundUp(len, kInstructionSize), static_cast<size_t>(kGap));
EmitData(string, static_cast<int>(len));
// Pad with NULL characters until pc_ is aligned.
const char pad[] = {'\0', '\0', '\0', '\0'};
- STATIC_ASSERT(sizeof(pad) == kInstructionSize);
+ static_assert(sizeof(pad) == kInstructionSize,
+ "Size of padding must match instruction size.");
EmitData(pad, RoundUp(pc_offset(), kInstructionSize) - pc_offset());
}
@@ -2432,33 +4171,75 @@ void Assembler::DataProcessing1Source(const Register& rd,
Emit(SF(rn) | op | Rn(rn) | Rd(rd));
}
-
-void Assembler::FPDataProcessing1Source(const FPRegister& fd,
- const FPRegister& fn,
+void Assembler::FPDataProcessing1Source(const VRegister& vd,
+ const VRegister& vn,
FPDataProcessing1SourceOp op) {
- Emit(FPType(fn) | op | Rn(fn) | Rd(fd));
+ Emit(FPType(vn) | op | Rn(vn) | Rd(vd));
}
-
-void Assembler::FPDataProcessing2Source(const FPRegister& fd,
- const FPRegister& fn,
- const FPRegister& fm,
+void Assembler::FPDataProcessing2Source(const VRegister& fd,
+ const VRegister& fn,
+ const VRegister& fm,
FPDataProcessing2SourceOp op) {
DCHECK(fd.SizeInBits() == fn.SizeInBits());
DCHECK(fd.SizeInBits() == fm.SizeInBits());
Emit(FPType(fd) | op | Rm(fm) | Rn(fn) | Rd(fd));
}
-
-void Assembler::FPDataProcessing3Source(const FPRegister& fd,
- const FPRegister& fn,
- const FPRegister& fm,
- const FPRegister& fa,
+void Assembler::FPDataProcessing3Source(const VRegister& fd,
+ const VRegister& fn,
+ const VRegister& fm,
+ const VRegister& fa,
FPDataProcessing3SourceOp op) {
DCHECK(AreSameSizeAndType(fd, fn, fm, fa));
Emit(FPType(fd) | op | Rm(fm) | Rn(fn) | Rd(fd) | Ra(fa));
}
+void Assembler::NEONModifiedImmShiftLsl(const VRegister& vd, const int imm8,
+ const int left_shift,
+ NEONModifiedImmediateOp op) {
+ DCHECK(vd.Is8B() || vd.Is16B() || vd.Is4H() || vd.Is8H() || vd.Is2S() ||
+ vd.Is4S());
+ DCHECK((left_shift == 0) || (left_shift == 8) || (left_shift == 16) ||
+ (left_shift == 24));
+ DCHECK(is_uint8(imm8));
+
+ int cmode_1, cmode_2, cmode_3;
+ if (vd.Is8B() || vd.Is16B()) {
+ DCHECK_EQ(op, NEONModifiedImmediate_MOVI);
+ cmode_1 = 1;
+ cmode_2 = 1;
+ cmode_3 = 1;
+ } else {
+ cmode_1 = (left_shift >> 3) & 1;
+ cmode_2 = left_shift >> 4;
+ cmode_3 = 0;
+ if (vd.Is4H() || vd.Is8H()) {
+ DCHECK((left_shift == 0) || (left_shift == 8));
+ cmode_3 = 1;
+ }
+ }
+ int cmode = (cmode_3 << 3) | (cmode_2 << 2) | (cmode_1 << 1);
+
+ Instr q = vd.IsQ() ? NEON_Q : 0;
+
+ Emit(q | op | ImmNEONabcdefgh(imm8) | NEONCmode(cmode) | Rd(vd));
+}
+
+void Assembler::NEONModifiedImmShiftMsl(const VRegister& vd, const int imm8,
+ const int shift_amount,
+ NEONModifiedImmediateOp op) {
+ DCHECK(vd.Is2S() || vd.Is4S());
+ DCHECK((shift_amount == 8) || (shift_amount == 16));
+ DCHECK(is_uint8(imm8));
+
+ int cmode_0 = (shift_amount >> 4) & 1;
+ int cmode = 0xc | cmode_0;
+
+ Instr q = vd.IsQ() ? NEON_Q : 0;
+
+ Emit(q | op | ImmNEONabcdefgh(imm8) | NEONCmode(cmode) | Rd(vd));
+}
void Assembler::EmitShift(const Register& rd,
const Register& rn,
@@ -2558,7 +4339,7 @@ void Assembler::LoadStore(const CPURegister& rt,
Instr memop = op | Rt(rt) | RnSP(addr.base());
if (addr.IsImmediateOffset()) {
- LSDataSize size = CalcLSDataSize(op);
+ unsigned size = CalcLSDataSize(op);
if (IsImmLSScaled(addr.offset(), size)) {
int offset = static_cast<int>(addr.offset());
// Use the scaled addressing mode.
@@ -2611,14 +4392,12 @@ bool Assembler::IsImmLSUnscaled(int64_t offset) {
return is_int9(offset);
}
-
-bool Assembler::IsImmLSScaled(int64_t offset, LSDataSize size) {
+bool Assembler::IsImmLSScaled(int64_t offset, unsigned size) {
bool offset_is_size_multiple = (((offset >> size) << size) == offset);
return offset_is_size_multiple && is_uint12(offset >> size);
}
-
-bool Assembler::IsImmLSPair(int64_t offset, LSDataSize size) {
+bool Assembler::IsImmLSPair(int64_t offset, unsigned size) {
bool offset_is_size_multiple = (((offset >> size) << size) == offset);
return offset_is_size_multiple && is_int7(offset >> size);
}
@@ -2849,7 +4628,7 @@ bool Assembler::IsImmConditionalCompare(int64_t immediate) {
bool Assembler::IsImmFP32(float imm) {
// Valid values will have the form:
// aBbb.bbbc.defg.h000.0000.0000.0000.0000
- uint32_t bits = float_to_rawbits(imm);
+ uint32_t bits = bit_cast<uint32_t>(imm);
// bits[19..0] are cleared.
if ((bits & 0x7ffff) != 0) {
return false;
@@ -2874,7 +4653,7 @@ bool Assembler::IsImmFP64(double imm) {
// Valid values will have the form:
// aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000
// 0000.0000.0000.0000.0000.0000.0000.0000
- uint64_t bits = double_to_rawbits(imm);
+ uint64_t bits = bit_cast<uint64_t>(imm);
// bits[47..0] are cleared.
if ((bits & 0xffffffffffffL) != 0) {
return false;
« no previous file with comments | « src/arm64/assembler-arm64.h ('k') | src/arm64/assembler-arm64-inl.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698