Index: src/arm/assembler-arm.cc |
diff --git a/src/arm/assembler-arm.cc b/src/arm/assembler-arm.cc |
index 6e0b2db281f045c72c95561f3d3d479cd46a79aa..aa6be2110b28bb7621da4e24ef37c555a1900edb 100644 |
--- a/src/arm/assembler-arm.cc |
+++ b/src/arm/assembler-arm.cc |
@@ -483,30 +483,6 @@ void NeonMemOperand::SetAlignment(int align) { |
} |
} |
- |
-NeonListOperand::NeonListOperand(DoubleRegister base, int registers_count) { |
- base_ = base; |
- switch (registers_count) { |
- case 1: |
- type_ = nlt_1; |
- break; |
- case 2: |
- type_ = nlt_2; |
- break; |
- case 3: |
- type_ = nlt_3; |
- break; |
- case 4: |
- type_ = nlt_4; |
- break; |
- default: |
- UNREACHABLE(); |
- type_ = nlt_1; |
- break; |
- } |
-} |
- |
- |
// ----------------------------------------------------------------------------- |
// Specific instructions, constants, and masks. |
@@ -2968,7 +2944,6 @@ void Assembler::vmov(const Register dst, |
emit(cond | 0xE*B24 | B20 | sn*B16 | dst.code()*B12 | 0xA*B8 | n*B7 | B4); |
} |
- |
// Type of data to read from or write to VFP register. |
// Used as specifier in generic vcvt instruction. |
enum VFPType { S32, U32, F32, F64 }; |
@@ -3902,6 +3877,57 @@ void Assembler::vmovl(NeonDataType dt, QwNeonRegister dst, DwVfpRegister src) { |
(dt & NeonDataTypeSizeMask)*B19 | vd*B12 | 0xA*B8 | m*B5 | B4 | vm); |
} |
+static int EncodeScalar(NeonDataType dt, int index) { |
+ int opc1_opc2 = 0; |
+ DCHECK_LE(0, index); |
+ switch (dt) { |
+ case NeonS8: |
+ case NeonU8: |
+ DCHECK_GT(8, index); |
+ opc1_opc2 = 0x8 | index; |
+ break; |
+ case NeonS16: |
+ case NeonU16: |
+ DCHECK_GT(4, index); |
+ opc1_opc2 = 0x1 | (index << 1); |
+ break; |
+ case NeonS32: |
+ case NeonU32: |
+ DCHECK_GT(2, index); |
+ opc1_opc2 = index << 2; |
+ break; |
+ default: |
+ UNREACHABLE(); |
+ break; |
+ } |
+ return (opc1_opc2 >> 2) * B21 | (opc1_opc2 & 0x3) * B5; |
+} |
+ |
+void Assembler::vmov(NeonDataType dt, DwVfpRegister dst, int index, |
+ Register src) { |
+ // Instruction details available in ARM DDI 0406C.b, A8.8.940. |
+ // vmov ARM core register to scalar. |
+ DCHECK(dt == NeonS32 || dt == NeonU32 || IsEnabled(NEON)); |
+ int vd, d; |
+ dst.split_code(&vd, &d); |
+ int opc1_opc2 = EncodeScalar(dt, index); |
+ emit(0xEEu * B24 | vd * B16 | src.code() * B12 | 0xB * B8 | d * B7 | B4 | |
+ opc1_opc2); |
+} |
+ |
+void Assembler::vmov(NeonDataType dt, Register dst, DwVfpRegister src, |
+ int index) { |
+ // Instruction details available in ARM DDI 0406C.b, A8.8.942. |
+ // vmov Arm scalar to core register. |
+ DCHECK(dt == NeonS32 || dt == NeonU32 || IsEnabled(NEON)); |
+ int vn, n; |
+ src.split_code(&vn, &n); |
+ int opc1_opc2 = EncodeScalar(dt, index); |
+ int u = (dt & NeonDataTypeUMask) != 0 ? 1 : 0; |
+ emit(0xEEu * B24 | u * B23 | B20 | vn * B16 | dst.code() * B12 | 0xB * B8 | |
+ n * B7 | B4 | opc1_opc2); |
+} |
+ |
void Assembler::vmov(const QwNeonRegister dst, const QwNeonRegister src) { |
DCHECK(IsEnabled(NEON)); |
// Instruction details available in ARM DDI 0406C.b, A8-938. |
@@ -3915,6 +3941,18 @@ void Assembler::vmov(const QwNeonRegister dst, const QwNeonRegister src) { |
B6 | m * B5 | B4 | vm); |
} |
+void Assembler::vmvn(const QwNeonRegister dst, const QwNeonRegister src) { |
+ DCHECK(IsEnabled(NEON)); |
+ // Instruction details available in ARM DDI 0406C.b, A8-966. |
+ DCHECK(VfpRegisterIsAvailable(dst)); |
+ DCHECK(VfpRegisterIsAvailable(src)); |
+ int vd, d; |
+ dst.split_code(&vd, &d); |
+ int vm, m; |
+ src.split_code(&vm, &m); |
+ emit(0x1E7U * B23 | d * B22 | 3 * B20 | vd * B12 | 0x17 * B6 | m * B5 | vm); |
+} |
+ |
void Assembler::vswp(DwVfpRegister dst, DwVfpRegister src) { |
// Instruction details available in ARM DDI 0406C.b, A8.8.418. |
// 1111(31-28) | 00111(27-23) | D(22) | 110010(21-16) | |
@@ -3940,8 +3978,105 @@ void Assembler::vswp(QwNeonRegister dst, QwNeonRegister src) { |
vm); |
} |
+void Assembler::vdup(NeonSize size, const QwNeonRegister dst, |
+ const Register src) { |
+ DCHECK(IsEnabled(NEON)); |
+ // Instruction details available in ARM DDI 0406C.b, A8-886. |
+ int B = 0, E = 0; |
+ switch (size) { |
+ case Neon8: |
+ B = 1; |
+ break; |
+ case Neon16: |
+ E = 1; |
+ break; |
+ case Neon32: |
+ break; |
+ default: |
+ UNREACHABLE(); |
+ break; |
+ } |
+ int vd, d; |
+ dst.split_code(&vd, &d); |
+ |
+ emit(al | 0x1D * B23 | B * B22 | B21 | vd * B16 | src.code() * B12 | |
+ 0xB * B8 | d * B7 | E * B5 | B4); |
+} |
+ |
+void Assembler::vdup(const QwNeonRegister dst, const SwVfpRegister src) { |
+ DCHECK(IsEnabled(NEON)); |
+ // Instruction details available in ARM DDI 0406C.b, A8-884. |
+ int index = src.code() & 1; |
+ int d_reg = src.code() / 2; |
+ int imm4 = 4 | index << 3; // esize = 32, index in bit 3. |
+ int vd, d; |
+ dst.split_code(&vd, &d); |
+ int vm, m; |
+ DwVfpRegister::from_code(d_reg).split_code(&vm, &m); |
+ |
+ emit(0x1E7U * B23 | d * B22 | 0x3 * B20 | imm4 * B16 | vd * B12 | 0x18 * B7 | |
+ B6 | m * B5 | vm); |
+} |
+ |
+// Encode NEON vcvt.src_type.dst_type instruction. |
+static Instr EncodeNeonVCVT(const VFPType dst_type, const QwNeonRegister dst, |
+ const VFPType src_type, const QwNeonRegister src) { |
+ DCHECK(src_type != dst_type); |
+ DCHECK(src_type == F32 || dst_type == F32); |
+ // Instruction details available in ARM DDI 0406C.b, A8.8.868. |
+ int vd, d; |
+ dst.split_code(&vd, &d); |
+ int vm, m; |
+ src.split_code(&vm, &m); |
+ |
+ int op = 0; |
+ if (src_type == F32) { |
+ DCHECK(dst_type == S32 || dst_type == U32); |
+ op = dst_type == U32 ? 3 : 2; |
+ } else { |
+ DCHECK(src_type == S32 || src_type == U32); |
+ op = src_type == U32 ? 1 : 0; |
+ } |
+ |
+ return 0x1E7U * B23 | d * B22 | 0x3B * B16 | vd * B12 | 0x3 * B9 | op * B7 | |
+ B6 | m * B5 | vm; |
+} |
+ |
+void Assembler::vcvt_f32_s32(const QwNeonRegister dst, |
+ const QwNeonRegister src) { |
+ DCHECK(IsEnabled(NEON)); |
+ DCHECK(VfpRegisterIsAvailable(dst)); |
+ DCHECK(VfpRegisterIsAvailable(src)); |
+ emit(EncodeNeonVCVT(F32, dst, S32, src)); |
+} |
+ |
+void Assembler::vcvt_f32_u32(const QwNeonRegister dst, |
+ const QwNeonRegister src) { |
+ DCHECK(IsEnabled(NEON)); |
+ DCHECK(VfpRegisterIsAvailable(dst)); |
+ DCHECK(VfpRegisterIsAvailable(src)); |
+ emit(EncodeNeonVCVT(F32, dst, U32, src)); |
+} |
+ |
+void Assembler::vcvt_s32_f32(const QwNeonRegister dst, |
+ const QwNeonRegister src) { |
+ DCHECK(IsEnabled(NEON)); |
+ DCHECK(VfpRegisterIsAvailable(dst)); |
+ DCHECK(VfpRegisterIsAvailable(src)); |
+ emit(EncodeNeonVCVT(S32, dst, F32, src)); |
+} |
+ |
+void Assembler::vcvt_u32_f32(const QwNeonRegister dst, |
+ const QwNeonRegister src) { |
+ DCHECK(IsEnabled(NEON)); |
+ DCHECK(VfpRegisterIsAvailable(dst)); |
+ DCHECK(VfpRegisterIsAvailable(src)); |
+ emit(EncodeNeonVCVT(U32, dst, F32, src)); |
+} |
+ |
void Assembler::veor(DwVfpRegister dst, DwVfpRegister src1, |
DwVfpRegister src2) { |
+ // Dd = veor(Dn, Dm) 64 bit integer exclusive OR. |
// Instruction details available in ARM DDI 0406C.b, A8.8.888. |
DCHECK(IsEnabled(NEON)); |
int vd, d; |
@@ -3956,6 +4091,7 @@ void Assembler::veor(DwVfpRegister dst, DwVfpRegister src1, |
void Assembler::veor(QwNeonRegister dst, QwNeonRegister src1, |
QwNeonRegister src2) { |
+ // Qd = veor(Qn, Qm) SIMD integer exclusive OR. |
// Instruction details available in ARM DDI 0406C.b, A8.8.888. |
DCHECK(IsEnabled(NEON)); |
int vd, d; |
@@ -3968,6 +4104,146 @@ void Assembler::veor(QwNeonRegister dst, QwNeonRegister src1, |
m * B5 | B4 | vm); |
} |
+void Assembler::vadd(QwNeonRegister dst, const QwNeonRegister src1, |
+ const QwNeonRegister src2) { |
+ DCHECK(IsEnabled(NEON)); |
+ // Qd = vadd(Qn, Qm) SIMD floating point addition. |
+ // Instruction details available in ARM DDI 0406C.b, A8-830. |
+ int vd, d; |
+ dst.split_code(&vd, &d); |
+ int vn, n; |
+ src1.split_code(&vn, &n); |
+ int vm, m; |
+ src2.split_code(&vm, &m); |
+ emit(0x1E4U * B23 | d * B22 | vn * B16 | vd * B12 | 0xD * B8 | n * B7 | B6 | |
+ m * B5 | vm); |
+} |
+ |
+void Assembler::vadd(NeonSize size, QwNeonRegister dst, |
+ const QwNeonRegister src1, const QwNeonRegister src2) { |
+ DCHECK(IsEnabled(NEON)); |
+ // Qd = vadd(Qn, Qm) SIMD integer addition. |
+ // Instruction details available in ARM DDI 0406C.b, A8-828. |
+ int vd, d; |
+ dst.split_code(&vd, &d); |
+ int vn, n; |
+ src1.split_code(&vn, &n); |
+ int vm, m; |
+ src2.split_code(&vm, &m); |
+ int sz = static_cast<int>(size); |
+ emit(0x1E4U * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x8 * B8 | |
+ n * B7 | B6 | m * B5 | vm); |
+} |
+ |
+void Assembler::vsub(QwNeonRegister dst, const QwNeonRegister src1, |
+ const QwNeonRegister src2) { |
+ DCHECK(IsEnabled(NEON)); |
+ // Qd = vsub(Qn, Qm) SIMD floating point subtraction. |
+ // Instruction details available in ARM DDI 0406C.b, A8-1086. |
+ int vd, d; |
+ dst.split_code(&vd, &d); |
+ int vn, n; |
+ src1.split_code(&vn, &n); |
+ int vm, m; |
+ src2.split_code(&vm, &m); |
+ emit(0x1E4U * B23 | d * B22 | B21 | vn * B16 | vd * B12 | 0xD * B8 | n * B7 | |
+ B6 | m * B5 | vm); |
+} |
+ |
+void Assembler::vsub(NeonSize size, QwNeonRegister dst, |
+ const QwNeonRegister src1, const QwNeonRegister src2) { |
+ DCHECK(IsEnabled(NEON)); |
+ // Qd = vsub(Qn, Qm) SIMD integer subtraction. |
+ // Instruction details available in ARM DDI 0406C.b, A8-1084. |
+ int vd, d; |
+ dst.split_code(&vd, &d); |
+ int vn, n; |
+ src1.split_code(&vn, &n); |
+ int vm, m; |
+ src2.split_code(&vm, &m); |
+ int sz = static_cast<int>(size); |
+ emit(0x1E6U * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x8 * B8 | |
+ n * B7 | B6 | m * B5 | vm); |
+} |
+ |
+void Assembler::vtst(NeonSize size, QwNeonRegister dst, |
+ const QwNeonRegister src1, const QwNeonRegister src2) { |
+ DCHECK(IsEnabled(NEON)); |
+ // Qd = vtst(Qn, Qm) SIMD test integer operands. |
+ // Instruction details available in ARM DDI 0406C.b, A8-1098. |
+ int vd, d; |
+ dst.split_code(&vd, &d); |
+ int vn, n; |
+ src1.split_code(&vn, &n); |
+ int vm, m; |
+ src2.split_code(&vm, &m); |
+ int sz = static_cast<int>(size); |
+ emit(0x1E4U * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x8 * B8 | |
+ n * B7 | B6 | m * B5 | B4 | vm); |
+} |
+ |
+void Assembler::vceq(NeonSize size, QwNeonRegister dst, |
+ const QwNeonRegister src1, const QwNeonRegister src2) { |
+ DCHECK(IsEnabled(NEON)); |
+ // Qd = vceq(Qn, Qm) SIMD integer compare equal. |
+ // Instruction details available in ARM DDI 0406C.b, A8-844. |
+ int vd, d; |
+ dst.split_code(&vd, &d); |
+ int vn, n; |
+ src1.split_code(&vn, &n); |
+ int vm, m; |
+ src2.split_code(&vm, &m); |
+ int sz = static_cast<int>(size); |
+ emit(0x1E6U * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x8 * B8 | |
+ n * B7 | B6 | m * B5 | B4 | vm); |
+} |
+ |
+void Assembler::vbsl(QwNeonRegister dst, const QwNeonRegister src1, |
+ const QwNeonRegister src2) { |
+ DCHECK(IsEnabled(NEON)); |
+ // Qd = vbsl(Qn, Qm) SIMD bitwise select. |
+ // Instruction details available in ARM DDI 0406C.b, A8-844. |
+ int vd, d; |
+ dst.split_code(&vd, &d); |
+ int vn, n; |
+ src1.split_code(&vn, &n); |
+ int vm, m; |
+ src2.split_code(&vm, &m); |
+ int op = 1; // vbsl |
+ emit(0x1E6U * B23 | d * B22 | op * B20 | vn * B16 | vd * B12 | 0x1 * B8 | |
+ n * B7 | B6 | m * B5 | B4 | vm); |
+} |
+ |
+// Encode NEON vtbl / vtbx instruction. |
+static Instr EncodeNeonVTB(const DwVfpRegister dst, const NeonListOperand& list, |
+ const DwVfpRegister index, bool vtbx) { |
+ // Dd = vtbl(table, Dm) SIMD vector permute, zero at out of range indices. |
+ // Instruction details available in ARM DDI 0406C.b, A8-1094. |
+ // Dd = vtbx(table, Dm) SIMD vector permute, skip out of range indices. |
+ // Instruction details available in ARM DDI 0406C.b, A8-1094. |
+ int vd, d; |
+ dst.split_code(&vd, &d); |
+ int vn, n; |
+ list.base().split_code(&vn, &n); |
+ int vm, m; |
+ index.split_code(&vm, &m); |
+ int op = vtbx ? 1 : 0; // vtbl = 0, vtbx = 1. |
+ return 0x1E7U * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 | 0x2 * B10 | |
+ list.length() * B8 | n * B7 | op * B6 | m * B5 | vm; |
+} |
+ |
+void Assembler::vtbl(const DwVfpRegister dst, const NeonListOperand& list, |
+ const DwVfpRegister index) { |
+ DCHECK(IsEnabled(NEON)); |
+ emit(EncodeNeonVTB(dst, list, index, false)); |
+} |
+ |
+void Assembler::vtbx(const DwVfpRegister dst, const NeonListOperand& list, |
+ const DwVfpRegister index) { |
+ DCHECK(IsEnabled(NEON)); |
+ emit(EncodeNeonVTB(dst, list, index, true)); |
+} |
+ |
// Pseudo instructions. |
void Assembler::nop(int type) { |
// ARMv6{K/T2} and v7 have an actual NOP instruction but it serializes |