Index: src/arm/assembler-arm.cc |
diff --git a/src/arm/assembler-arm.cc b/src/arm/assembler-arm.cc |
index 0a671c06244aefca6d2ca8997a8c35fd583281cd..da29480af353e769f8753d6c7a188c65d959a6ab 100644 |
--- a/src/arm/assembler-arm.cc |
+++ b/src/arm/assembler-arm.cc |
@@ -483,30 +483,6 @@ void NeonMemOperand::SetAlignment(int align) { |
} |
} |
- |
-NeonListOperand::NeonListOperand(DoubleRegister base, int registers_count) { |
- base_ = base; |
- switch (registers_count) { |
- case 1: |
- type_ = nlt_1; |
- break; |
- case 2: |
- type_ = nlt_2; |
- break; |
- case 3: |
- type_ = nlt_3; |
- break; |
- case 4: |
- type_ = nlt_4; |
- break; |
- default: |
- UNREACHABLE(); |
- type_ = nlt_1; |
- break; |
- } |
-} |
- |
- |
// ----------------------------------------------------------------------------- |
// Specific instructions, constants, and masks. |
@@ -2968,7 +2944,6 @@ void Assembler::vmov(const Register dst, |
emit(cond | 0xE*B24 | B20 | sn*B16 | dst.code()*B12 | 0xA*B8 | n*B7 | B4); |
} |
- |
// Type of data to read from or write to VFP register. |
// Used as specifier in generic vcvt instruction. |
enum VFPType { S32, U32, F32, F64 }; |
@@ -3915,6 +3890,18 @@ void Assembler::vmov(const QwNeonRegister dst, const QwNeonRegister src) { |
B6 | m * B5 | B4 | vm); |
} |
+void Assembler::vmvn(const QwNeonRegister dst, const QwNeonRegister src) { |
+ DCHECK(IsEnabled(NEON)); |
+ // Instruction details available in ARM DDI 0406C.b, A8-966. |
+ DCHECK(VfpRegisterIsAvailable(dst)); |
+ DCHECK(VfpRegisterIsAvailable(src)); |
+ int vd, d; |
+ dst.split_code(&vd, &d); |
+ int vm, m; |
+ src.split_code(&vm, &m); |
+ emit(0x1E7 * B23 | d * B22 | 3 * B20 | vd * B12 | 0x17 * B6 | m * B5 | vm); |
Rodolph Perfetta (ARM)
2016/12/08 18:08:27
0x1E7U according to issue 5725
bbudge
2016/12/10 21:33:03
Done.
|
+} |
+ |
void Assembler::vswp(DwVfpRegister dst, DwVfpRegister src) { |
// Instruction details available in ARM DDI 0406C.b, A8.8.418. |
// 1111(31-28) | 00111(27-23) | D(22) | 110010(21-16) | |
@@ -3940,8 +3927,105 @@ void Assembler::vswp(QwNeonRegister dst, QwNeonRegister src) { |
vm); |
} |
+void Assembler::vdup(const QwNeonRegister dst, const Register src, |
+ NeonSize size) { |
+ DCHECK(IsEnabled(NEON)); |
+ // Instruction details available in ARM DDI 0406C.b, A8-886. |
+ int B = 0, E = 0; |
+ switch (size) { |
+ case Neon8: |
+ B = 1; |
+ break; |
+ case Neon16: |
+ E = 1; |
+ break; |
+ case Neon32: |
+ break; |
+ default: |
+ UNREACHABLE(); |
+ break; |
+ } |
+ int vd, d; |
+ dst.split_code(&vd, &d); |
+ |
+ emit(al | 0x1D * B23 | B * B22 | B21 | vd * B16 | src.code() * B12 | |
+ 0xB * B8 | d * B7 | E * B5 | B4); |
+} |
+ |
+void Assembler::vdup(const QwNeonRegister dst, const SwVfpRegister src) { |
+ DCHECK(IsEnabled(NEON)); |
+ // Instruction details available in ARM DDI 0406C.b, A8-884. |
+ int index = src.code() & 1; |
+ int d_reg = src.code() / 2; |
+ int imm4 = 4 | index << 3; // esize = 32, index in bit 3. |
+ int vd, d; |
+ dst.split_code(&vd, &d); |
+ int vm, m; |
+ DwVfpRegister::from_code(d_reg).split_code(&vm, &m); |
+ |
+ emit(0x1E7 * B23 | d * B22 | 0x3 * B20 | imm4 * B16 | vd * B12 | 0x18 * B7 | |
Rodolph Perfetta (ARM)
2016/12/08 18:08:27
0x1E7U
bbudge
2016/12/10 21:33:03
Done.
|
+ B6 | m * B5 | vm); |
+} |
+ |
+// Encode NEON vcvt.src_type.dst_type instruction. |
+static Instr EncodeNeonVCVT(const VFPType dst_type, const QwNeonRegister dst, |
+ const VFPType src_type, const QwNeonRegister src) { |
+ DCHECK(src_type != dst_type); |
+ DCHECK(src_type == F32 || dst_type == F32); |
+ DCHECK(src_type != F64 && dst_type != F64); |
+ // Instruction details available in ARM DDI 0406C.b, A8.8.868. |
+ int vd, d; |
+ dst.split_code(&vd, &d); |
+ int vm, m; |
+ src.split_code(&vm, &m); |
+ |
+ int op = 0; |
+ if (src_type == F32) { |
+ op = dst_type == U32 ? 3 : 2; |
Rodolph Perfetta (ARM)
2016/12/08 18:08:27
DCECHK((dst_type == U32) || (dst_type == S32));
bbudge
2016/12/10 21:33:03
It's (subtly) implied by the existing DCHECKs abov
|
+ } else { |
+ DCHECK_EQ(F32, dst_type); |
+ op = src_type == U32 ? 1 : 0; |
Rodolph Perfetta (ARM)
2016/12/08 18:08:27
ditto with src_type
bbudge
2016/12/10 21:33:03
Done.
|
+ } |
+ |
+ return 0x1E7u * B23 | d * B22 | 0x3B * B16 | vd * B12 | 0x3 * B9 | op * B7 | |
+ B6 | m * B5 | vm; |
+} |
+ |
+void Assembler::vcvt_f32_s32(const QwNeonRegister dst, |
+ const QwNeonRegister src) { |
+ DCHECK(IsEnabled(NEON)); |
+ DCHECK(VfpRegisterIsAvailable(dst)); |
+ DCHECK(VfpRegisterIsAvailable(src)); |
+ emit(EncodeNeonVCVT(F32, dst, S32, src)); |
+} |
+ |
+void Assembler::vcvt_f32_u32(const QwNeonRegister dst, |
+ const QwNeonRegister src) { |
+ DCHECK(IsEnabled(NEON)); |
+ DCHECK(VfpRegisterIsAvailable(dst)); |
+ DCHECK(VfpRegisterIsAvailable(src)); |
+ emit(EncodeNeonVCVT(F32, dst, U32, src)); |
+} |
+ |
+void Assembler::vcvt_s32_f32(const QwNeonRegister dst, |
+ const QwNeonRegister src) { |
+ DCHECK(IsEnabled(NEON)); |
+ DCHECK(VfpRegisterIsAvailable(dst)); |
+ DCHECK(VfpRegisterIsAvailable(src)); |
+ emit(EncodeNeonVCVT(S32, dst, F32, src)); |
+} |
+ |
+void Assembler::vcvt_u32_f32(const QwNeonRegister dst, |
+ const QwNeonRegister src) { |
+ DCHECK(IsEnabled(NEON)); |
+ DCHECK(VfpRegisterIsAvailable(dst)); |
+ DCHECK(VfpRegisterIsAvailable(src)); |
+ emit(EncodeNeonVCVT(U32, dst, F32, src)); |
+} |
+ |
void Assembler::veor(DwVfpRegister dst, DwVfpRegister src1, |
DwVfpRegister src2) { |
+ // Dd = veor(Dn, Dm) 64 bit integer exclusive OR. |
// Instruction details available in ARM DDI 0406C.b, A8.8.888. |
DCHECK(IsEnabled(NEON)); |
int vd, d; |
@@ -3956,6 +4040,7 @@ void Assembler::veor(DwVfpRegister dst, DwVfpRegister src1, |
void Assembler::veor(QwNeonRegister dst, QwNeonRegister src1, |
QwNeonRegister src2) { |
+ // Qd = veor(Qn, Qm) SIMD integer exclusive OR. |
// Instruction details available in ARM DDI 0406C.b, A8.8.888. |
DCHECK(IsEnabled(NEON)); |
int vd, d; |
@@ -3968,6 +4053,148 @@ void Assembler::veor(QwNeonRegister dst, QwNeonRegister src1, |
B4 | vm); |
} |
+void Assembler::vadd(QwNeonRegister dst, const QwNeonRegister src1, |
+ const QwNeonRegister src2) { |
+ DCHECK(IsEnabled(NEON)); |
+ // Qd = vadd(Qn, Qm) SIMD floating point addition. |
+ // Instruction details available in ARM DDI 0406C.b, A8-830. |
+ int vd, d; |
+ dst.split_code(&vd, &d); |
+ int vn, n; |
+ src1.split_code(&vn, &n); |
+ int vm, m; |
+ src2.split_code(&vm, &m); |
+ emit(0x1E4 * B23 | d * B22 | vn * B16 | vd * B12 | 0xD * B8 | n * B7 | B6 | |
Rodolph Perfetta (ARM)
2016/12/08 18:08:27
0x1E4U
bbudge
2016/12/10 21:33:03
Done.
|
+ m * B5 | vm); |
+} |
+ |
+void Assembler::vadd(QwNeonRegister dst, const QwNeonRegister src1, |
+ const QwNeonRegister src2, NeonSize size) { |
+ DCHECK(IsEnabled(NEON)); |
+ // Qd = vadd(Qn, Qm) SIMD integer addition. |
+ // Instruction details available in ARM DDI 0406C.b, A8-828. |
+ int vd, d; |
+ dst.split_code(&vd, &d); |
+ int vn, n; |
+ src1.split_code(&vn, &n); |
+ int vm, m; |
+ src2.split_code(&vm, &m); |
+ int sz = static_cast<int>(size); |
+ emit(0x1E4 * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x8 * B8 | |
Rodolph Perfetta (ARM)
2016/12/08 18:08:27
ditto
bbudge
2016/12/10 21:33:03
Done.
|
+ n * B7 | B6 | m * B5 | vm); |
+} |
+ |
+void Assembler::vsub(QwNeonRegister dst, const QwNeonRegister src1, |
+ const QwNeonRegister src2) { |
+ DCHECK(IsEnabled(NEON)); |
+ // Qd = vsub(Qn, Qm) SIMD floating point subtraction. |
+ // Instruction details available in ARM DDI 0406C.b, A8-1086. |
+ int vd, d; |
+ dst.split_code(&vd, &d); |
+ int vn, n; |
+ src1.split_code(&vn, &n); |
+ int vm, m; |
+ src2.split_code(&vm, &m); |
+ emit(0x1E4 * B23 | d * B22 | B21 | vn * B16 | vd * B12 | 0xD * B8 | n * B7 | |
Rodolph Perfetta (ARM)
2016/12/08 18:08:27
ditto
bbudge
2016/12/10 21:33:03
Done.
|
+ B6 | m * B5 | vm); |
+} |
+ |
+void Assembler::vsub(QwNeonRegister dst, const QwNeonRegister src1, |
+ const QwNeonRegister src2, NeonSize size) { |
+ DCHECK(IsEnabled(NEON)); |
+ // Qd = vsub(Qn, Qm) SIMD integer subtraction. |
+ // Instruction details available in ARM DDI 0406C.b, A8-1084. |
+ int vd, d; |
+ dst.split_code(&vd, &d); |
+ int vn, n; |
+ src1.split_code(&vn, &n); |
+ int vm, m; |
+ src2.split_code(&vm, &m); |
+ int sz = static_cast<int>(size); |
+ emit(0x1E6 * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x8 * B8 | |
Rodolph Perfetta (ARM)
2016/12/08 18:08:27
ditto
bbudge
2016/12/10 21:33:03
Done.
|
+ n * B7 | B6 | m * B5 | vm); |
+} |
+ |
+void Assembler::vtst(QwNeonRegister dst, const QwNeonRegister src1, |
+ const QwNeonRegister src2, NeonSize size) { |
+ DCHECK(IsEnabled(NEON)); |
+ // Qd = vtst(Qn, Qm) SIMD test integer operands. |
+ // Instruction details available in ARM DDI 0406C.b, A8-1098. |
+ int vd, d; |
+ dst.split_code(&vd, &d); |
+ int vn, n; |
+ src1.split_code(&vn, &n); |
+ int vm, m; |
+ src2.split_code(&vm, &m); |
+ int sz = static_cast<int>(size); |
+ emit(0x1E4 * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x8 * B8 | |
Rodolph Perfetta (ARM)
2016/12/08 18:08:27
ditto
bbudge
2016/12/10 21:33:03
Done.
|
+ n * B7 | B6 | m * B5 | B4 | vm); |
+} |
+ |
+void Assembler::vceq(QwNeonRegister dst, const QwNeonRegister src1, |
+ const QwNeonRegister src2, NeonSize size) { |
+ DCHECK(IsEnabled(NEON)); |
+ // Qd = vceq(Qn, Qm) SIMD integer compare equal. |
+ // Instruction details available in ARM DDI 0406C.b, A8-844. |
+ int vd, d; |
+ dst.split_code(&vd, &d); |
+ int vn, n; |
+ src1.split_code(&vn, &n); |
+ int vm, m; |
+ src2.split_code(&vm, &m); |
+ int sz = static_cast<int>(size); |
+ emit(0x1E6 * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x8 * B8 | |
Rodolph Perfetta (ARM)
2016/12/08 18:08:27
ditto
bbudge
2016/12/10 21:33:04
Done.
|
+ n * B7 | B6 | m * B5 | B4 | vm); |
+} |
+ |
+void Assembler::vbsl(QwNeonRegister dst, const QwNeonRegister src1, |
+ const QwNeonRegister src2) { |
+ DCHECK(IsEnabled(NEON)); |
+ // Qd = vbsl(Qn, Qm) SIMD bitwise select. |
+ // Instruction details available in ARM DDI 0406C.b, A8-844. |
+ int vd, d; |
+ dst.split_code(&vd, &d); |
+ int vn, n; |
+ src1.split_code(&vn, &n); |
+ int vm, m; |
+ src2.split_code(&vm, &m); |
+ int op = 1; // vbsl |
Rodolph Perfetta (ARM)
2016/12/08 18:08:27
is that necessary? why not using B20 directly belo
bbudge
2016/12/10 21:33:04
Done.
|
+ emit(0x1E6 * B23 | d * B22 | op * B20 | vn * B16 | vd * B12 | 0x1 * B8 | |
Rodolph Perfetta (ARM)
2016/12/08 18:08:27
0x1E6U
bbudge
2016/12/10 21:33:03
Done.
|
+ n * B7 | B6 | m * B5 | B4 | vm); |
+} |
+ |
+void Assembler::vtbl(const DwVfpRegister dst, const NeonListOperand& list, |
+ const DwVfpRegister index) { |
+ DCHECK(IsEnabled(NEON)); |
+ // Dd = vtbl(table, Dm) SIMD vector permute, zero at out of range indices. |
+ // Instruction details available in ARM DDI 0406C.b, A8-1094. |
+ int vd, d; |
+ dst.split_code(&vd, &d); |
+ int vn, n; |
+ list.base().split_code(&vn, &n); |
+ int vm, m; |
+ index.split_code(&vm, &m); |
+ int op = 1; // vbsl |
Rodolph Perfetta (ARM)
2016/12/08 18:08:27
unused.
bbudge
2016/12/10 21:33:04
Done.
|
+ emit(0x1E7 * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 | 0x2 * B10 | |
Rodolph Perfetta (ARM)
2016/12/08 18:08:27
0x1E7U
bbudge
2016/12/10 21:33:03
Done.
|
+ list.len() * B8 | n * B7 | m * B5 | vm); |
+} |
+ |
+void Assembler::vtbx(const DwVfpRegister dst, const NeonListOperand& list, |
Rodolph Perfetta (ARM)
2016/12/08 18:08:27
vtbl and vtbx encoding only differ by the value of
bbudge
2016/12/10 21:33:03
Yes, Done.
|
+ const DwVfpRegister index) { |
+ DCHECK(IsEnabled(NEON)); |
+ // Dd = vtbx(table, Dm) SIMD vector permute, skip out of range indices. |
+ // Instruction details available in ARM DDI 0406C.b, A8-1094. |
+ int vd, d; |
+ dst.split_code(&vd, &d); |
+ int vn, n; |
+ list.base().split_code(&vn, &n); |
+ int vm, m; |
+ index.split_code(&vm, &m); |
+ int op = 1; // vbsl |
Rodolph Perfetta (ARM)
2016/12/08 18:08:27
unused
bbudge
2016/12/10 21:33:03
Done.
|
+ emit(0x1E7 * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 | 0x2 * B10 | |
Rodolph Perfetta (ARM)
2016/12/08 18:08:27
0x1E7U
bbudge
2016/12/10 21:33:04
Done.
|
+ list.len() * B8 | n * B7 | B6 | m * B5 | vm); |
+} |
+ |
// Pseudo instructions. |
void Assembler::nop(int type) { |
// ARMv6{K/T2} and v7 have an actual NOP instruction but it serializes |