Chromium Code Reviews| Index: src/arm/assembler-arm.cc |
| diff --git a/src/arm/assembler-arm.cc b/src/arm/assembler-arm.cc |
| index 0a671c06244aefca6d2ca8997a8c35fd583281cd..da29480af353e769f8753d6c7a188c65d959a6ab 100644 |
| --- a/src/arm/assembler-arm.cc |
| +++ b/src/arm/assembler-arm.cc |
| @@ -483,30 +483,6 @@ void NeonMemOperand::SetAlignment(int align) { |
| } |
| } |
| - |
| -NeonListOperand::NeonListOperand(DoubleRegister base, int registers_count) { |
| - base_ = base; |
| - switch (registers_count) { |
| - case 1: |
| - type_ = nlt_1; |
| - break; |
| - case 2: |
| - type_ = nlt_2; |
| - break; |
| - case 3: |
| - type_ = nlt_3; |
| - break; |
| - case 4: |
| - type_ = nlt_4; |
| - break; |
| - default: |
| - UNREACHABLE(); |
| - type_ = nlt_1; |
| - break; |
| - } |
| -} |
| - |
| - |
| // ----------------------------------------------------------------------------- |
| // Specific instructions, constants, and masks. |
| @@ -2968,7 +2944,6 @@ void Assembler::vmov(const Register dst, |
| emit(cond | 0xE*B24 | B20 | sn*B16 | dst.code()*B12 | 0xA*B8 | n*B7 | B4); |
| } |
| - |
| // Type of data to read from or write to VFP register. |
| // Used as specifier in generic vcvt instruction. |
| enum VFPType { S32, U32, F32, F64 }; |
| @@ -3915,6 +3890,18 @@ void Assembler::vmov(const QwNeonRegister dst, const QwNeonRegister src) { |
| B6 | m * B5 | B4 | vm); |
| } |
| +void Assembler::vmvn(const QwNeonRegister dst, const QwNeonRegister src) { |
| + DCHECK(IsEnabled(NEON)); |
| + // Instruction details available in ARM DDI 0406C.b, A8-966. |
| + DCHECK(VfpRegisterIsAvailable(dst)); |
| + DCHECK(VfpRegisterIsAvailable(src)); |
| + int vd, d; |
| + dst.split_code(&vd, &d); |
| + int vm, m; |
| + src.split_code(&vm, &m); |
| + emit(0x1E7 * B23 | d * B22 | 3 * B20 | vd * B12 | 0x17 * B6 | m * B5 | vm); |
|
Rodolph Perfetta (ARM)
2016/12/08 18:08:27
0x1E7U according to issue 5725
bbudge
2016/12/10 21:33:03
Done.
|
| +} |
| + |
| void Assembler::vswp(DwVfpRegister dst, DwVfpRegister src) { |
| // Instruction details available in ARM DDI 0406C.b, A8.8.418. |
| // 1111(31-28) | 00111(27-23) | D(22) | 110010(21-16) | |
| @@ -3940,8 +3927,105 @@ void Assembler::vswp(QwNeonRegister dst, QwNeonRegister src) { |
| vm); |
| } |
| +void Assembler::vdup(const QwNeonRegister dst, const Register src, |
| + NeonSize size) { |
| + DCHECK(IsEnabled(NEON)); |
| + // Instruction details available in ARM DDI 0406C.b, A8-886. |
| + int B = 0, E = 0; |
| + switch (size) { |
| + case Neon8: |
| + B = 1; |
| + break; |
| + case Neon16: |
| + E = 1; |
| + break; |
| + case Neon32: |
| + break; |
| + default: |
| + UNREACHABLE(); |
| + break; |
| + } |
| + int vd, d; |
| + dst.split_code(&vd, &d); |
| + |
| + emit(al | 0x1D * B23 | B * B22 | B21 | vd * B16 | src.code() * B12 | |
| + 0xB * B8 | d * B7 | E * B5 | B4); |
| +} |
| + |
| +void Assembler::vdup(const QwNeonRegister dst, const SwVfpRegister src) { |
| + DCHECK(IsEnabled(NEON)); |
| + // Instruction details available in ARM DDI 0406C.b, A8-884. |
| + int index = src.code() & 1; |
| + int d_reg = src.code() / 2; |
| + int imm4 = 4 | index << 3; // esize = 32, index in bit 3. |
| + int vd, d; |
| + dst.split_code(&vd, &d); |
| + int vm, m; |
| + DwVfpRegister::from_code(d_reg).split_code(&vm, &m); |
| + |
| + emit(0x1E7 * B23 | d * B22 | 0x3 * B20 | imm4 * B16 | vd * B12 | 0x18 * B7 | |
|
Rodolph Perfetta (ARM)
2016/12/08 18:08:27
0x1E7U
bbudge
2016/12/10 21:33:03
Done.
|
| + B6 | m * B5 | vm); |
| +} |
| + |
| +// Encode NEON vcvt.src_type.dst_type instruction. |
| +static Instr EncodeNeonVCVT(const VFPType dst_type, const QwNeonRegister dst, |
| + const VFPType src_type, const QwNeonRegister src) { |
| + DCHECK(src_type != dst_type); |
| + DCHECK(src_type == F32 || dst_type == F32); |
| + DCHECK(src_type != F64 && dst_type != F64); |
| + // Instruction details available in ARM DDI 0406C.b, A8.8.868. |
| + int vd, d; |
| + dst.split_code(&vd, &d); |
| + int vm, m; |
| + src.split_code(&vm, &m); |
| + |
| + int op = 0; |
| + if (src_type == F32) { |
| + op = dst_type == U32 ? 3 : 2; |
|
Rodolph Perfetta (ARM)
2016/12/08 18:08:27
DCECHK((dst_type == U32) || (dst_type == S32));
bbudge
2016/12/10 21:33:03
It's (subtly) implied by the existing DCHECKs abov
|
| + } else { |
| + DCHECK_EQ(F32, dst_type); |
| + op = src_type == U32 ? 1 : 0; |
|
Rodolph Perfetta (ARM)
2016/12/08 18:08:27
ditto with src_type
bbudge
2016/12/10 21:33:03
Done.
|
| + } |
| + |
| + return 0x1E7u * B23 | d * B22 | 0x3B * B16 | vd * B12 | 0x3 * B9 | op * B7 | |
| + B6 | m * B5 | vm; |
| +} |
| + |
| +void Assembler::vcvt_f32_s32(const QwNeonRegister dst, |
| + const QwNeonRegister src) { |
| + DCHECK(IsEnabled(NEON)); |
| + DCHECK(VfpRegisterIsAvailable(dst)); |
| + DCHECK(VfpRegisterIsAvailable(src)); |
| + emit(EncodeNeonVCVT(F32, dst, S32, src)); |
| +} |
| + |
| +void Assembler::vcvt_f32_u32(const QwNeonRegister dst, |
| + const QwNeonRegister src) { |
| + DCHECK(IsEnabled(NEON)); |
| + DCHECK(VfpRegisterIsAvailable(dst)); |
| + DCHECK(VfpRegisterIsAvailable(src)); |
| + emit(EncodeNeonVCVT(F32, dst, U32, src)); |
| +} |
| + |
| +void Assembler::vcvt_s32_f32(const QwNeonRegister dst, |
| + const QwNeonRegister src) { |
| + DCHECK(IsEnabled(NEON)); |
| + DCHECK(VfpRegisterIsAvailable(dst)); |
| + DCHECK(VfpRegisterIsAvailable(src)); |
| + emit(EncodeNeonVCVT(S32, dst, F32, src)); |
| +} |
| + |
| +void Assembler::vcvt_u32_f32(const QwNeonRegister dst, |
| + const QwNeonRegister src) { |
| + DCHECK(IsEnabled(NEON)); |
| + DCHECK(VfpRegisterIsAvailable(dst)); |
| + DCHECK(VfpRegisterIsAvailable(src)); |
| + emit(EncodeNeonVCVT(U32, dst, F32, src)); |
| +} |
| + |
| void Assembler::veor(DwVfpRegister dst, DwVfpRegister src1, |
| DwVfpRegister src2) { |
| + // Dd = veor(Dn, Dm) 64 bit integer exclusive OR. |
| // Instruction details available in ARM DDI 0406C.b, A8.8.888. |
| DCHECK(IsEnabled(NEON)); |
| int vd, d; |
| @@ -3956,6 +4040,7 @@ void Assembler::veor(DwVfpRegister dst, DwVfpRegister src1, |
| void Assembler::veor(QwNeonRegister dst, QwNeonRegister src1, |
| QwNeonRegister src2) { |
| + // Qd = veor(Qn, Qm) SIMD integer exclusive OR. |
| // Instruction details available in ARM DDI 0406C.b, A8.8.888. |
| DCHECK(IsEnabled(NEON)); |
| int vd, d; |
| @@ -3968,6 +4053,148 @@ void Assembler::veor(QwNeonRegister dst, QwNeonRegister src1, |
| B4 | vm); |
| } |
| +void Assembler::vadd(QwNeonRegister dst, const QwNeonRegister src1, |
| + const QwNeonRegister src2) { |
| + DCHECK(IsEnabled(NEON)); |
| + // Qd = vadd(Qn, Qm) SIMD floating point addition. |
| + // Instruction details available in ARM DDI 0406C.b, A8-830. |
| + int vd, d; |
| + dst.split_code(&vd, &d); |
| + int vn, n; |
| + src1.split_code(&vn, &n); |
| + int vm, m; |
| + src2.split_code(&vm, &m); |
| + emit(0x1E4 * B23 | d * B22 | vn * B16 | vd * B12 | 0xD * B8 | n * B7 | B6 | |
|
Rodolph Perfetta (ARM)
2016/12/08 18:08:27
0x1E4U
bbudge
2016/12/10 21:33:03
Done.
|
| + m * B5 | vm); |
| +} |
| + |
| +void Assembler::vadd(QwNeonRegister dst, const QwNeonRegister src1, |
| + const QwNeonRegister src2, NeonSize size) { |
| + DCHECK(IsEnabled(NEON)); |
| + // Qd = vadd(Qn, Qm) SIMD integer addition. |
| + // Instruction details available in ARM DDI 0406C.b, A8-828. |
| + int vd, d; |
| + dst.split_code(&vd, &d); |
| + int vn, n; |
| + src1.split_code(&vn, &n); |
| + int vm, m; |
| + src2.split_code(&vm, &m); |
| + int sz = static_cast<int>(size); |
| + emit(0x1E4 * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x8 * B8 | |
|
Rodolph Perfetta (ARM)
2016/12/08 18:08:27
ditto
bbudge
2016/12/10 21:33:03
Done.
|
| + n * B7 | B6 | m * B5 | vm); |
| +} |
| + |
| +void Assembler::vsub(QwNeonRegister dst, const QwNeonRegister src1, |
| + const QwNeonRegister src2) { |
| + DCHECK(IsEnabled(NEON)); |
| + // Qd = vsub(Qn, Qm) SIMD floating point subtraction. |
| + // Instruction details available in ARM DDI 0406C.b, A8-1086. |
| + int vd, d; |
| + dst.split_code(&vd, &d); |
| + int vn, n; |
| + src1.split_code(&vn, &n); |
| + int vm, m; |
| + src2.split_code(&vm, &m); |
| + emit(0x1E4 * B23 | d * B22 | B21 | vn * B16 | vd * B12 | 0xD * B8 | n * B7 | |
|
Rodolph Perfetta (ARM)
2016/12/08 18:08:27
ditto
bbudge
2016/12/10 21:33:03
Done.
|
| + B6 | m * B5 | vm); |
| +} |
| + |
| +void Assembler::vsub(QwNeonRegister dst, const QwNeonRegister src1, |
| + const QwNeonRegister src2, NeonSize size) { |
| + DCHECK(IsEnabled(NEON)); |
| + // Qd = vsub(Qn, Qm) SIMD integer subtraction. |
| + // Instruction details available in ARM DDI 0406C.b, A8-1084. |
| + int vd, d; |
| + dst.split_code(&vd, &d); |
| + int vn, n; |
| + src1.split_code(&vn, &n); |
| + int vm, m; |
| + src2.split_code(&vm, &m); |
| + int sz = static_cast<int>(size); |
| + emit(0x1E6 * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x8 * B8 | |
|
Rodolph Perfetta (ARM)
2016/12/08 18:08:27
ditto
bbudge
2016/12/10 21:33:03
Done.
|
| + n * B7 | B6 | m * B5 | vm); |
| +} |
| + |
| +void Assembler::vtst(QwNeonRegister dst, const QwNeonRegister src1, |
| + const QwNeonRegister src2, NeonSize size) { |
| + DCHECK(IsEnabled(NEON)); |
| + // Qd = vtst(Qn, Qm) SIMD test integer operands. |
| + // Instruction details available in ARM DDI 0406C.b, A8-1098. |
| + int vd, d; |
| + dst.split_code(&vd, &d); |
| + int vn, n; |
| + src1.split_code(&vn, &n); |
| + int vm, m; |
| + src2.split_code(&vm, &m); |
| + int sz = static_cast<int>(size); |
| + emit(0x1E4 * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x8 * B8 | |
|
Rodolph Perfetta (ARM)
2016/12/08 18:08:27
ditto
bbudge
2016/12/10 21:33:03
Done.
|
| + n * B7 | B6 | m * B5 | B4 | vm); |
| +} |
| + |
| +void Assembler::vceq(QwNeonRegister dst, const QwNeonRegister src1, |
| + const QwNeonRegister src2, NeonSize size) { |
| + DCHECK(IsEnabled(NEON)); |
| + // Qd = vceq(Qn, Qm) SIMD integer compare equal. |
| + // Instruction details available in ARM DDI 0406C.b, A8-844. |
| + int vd, d; |
| + dst.split_code(&vd, &d); |
| + int vn, n; |
| + src1.split_code(&vn, &n); |
| + int vm, m; |
| + src2.split_code(&vm, &m); |
| + int sz = static_cast<int>(size); |
| + emit(0x1E6 * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x8 * B8 | |
|
Rodolph Perfetta (ARM)
2016/12/08 18:08:27
ditto
bbudge
2016/12/10 21:33:04
Done.
|
| + n * B7 | B6 | m * B5 | B4 | vm); |
| +} |
| + |
| +void Assembler::vbsl(QwNeonRegister dst, const QwNeonRegister src1, |
| + const QwNeonRegister src2) { |
| + DCHECK(IsEnabled(NEON)); |
| + // Qd = vbsl(Qn, Qm) SIMD bitwise select. |
| + // Instruction details available in ARM DDI 0406C.b, A8-844. |
| + int vd, d; |
| + dst.split_code(&vd, &d); |
| + int vn, n; |
| + src1.split_code(&vn, &n); |
| + int vm, m; |
| + src2.split_code(&vm, &m); |
| + int op = 1; // vbsl |
|
Rodolph Perfetta (ARM)
2016/12/08 18:08:27
is that necessary? why not using B20 directly belo
bbudge
2016/12/10 21:33:04
Done.
|
| + emit(0x1E6 * B23 | d * B22 | op * B20 | vn * B16 | vd * B12 | 0x1 * B8 | |
|
Rodolph Perfetta (ARM)
2016/12/08 18:08:27
0x1E6U
bbudge
2016/12/10 21:33:03
Done.
|
| + n * B7 | B6 | m * B5 | B4 | vm); |
| +} |
| + |
| +void Assembler::vtbl(const DwVfpRegister dst, const NeonListOperand& list, |
| + const DwVfpRegister index) { |
| + DCHECK(IsEnabled(NEON)); |
| + // Dd = vtbl(table, Dm) SIMD vector permute, zero at out of range indices. |
| + // Instruction details available in ARM DDI 0406C.b, A8-1094. |
| + int vd, d; |
| + dst.split_code(&vd, &d); |
| + int vn, n; |
| + list.base().split_code(&vn, &n); |
| + int vm, m; |
| + index.split_code(&vm, &m); |
| + int op = 1; // vbsl |
|
Rodolph Perfetta (ARM)
2016/12/08 18:08:27
unused.
bbudge
2016/12/10 21:33:04
Done.
|
| + emit(0x1E7 * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 | 0x2 * B10 | |
|
Rodolph Perfetta (ARM)
2016/12/08 18:08:27
0x1E7U
bbudge
2016/12/10 21:33:03
Done.
|
| + list.len() * B8 | n * B7 | m * B5 | vm); |
| +} |
| + |
| +void Assembler::vtbx(const DwVfpRegister dst, const NeonListOperand& list, |
|
Rodolph Perfetta (ARM)
2016/12/08 18:08:27
vtbl and vtbx encoding only differ by the value of
bbudge
2016/12/10 21:33:03
Yes, Done.
|
| + const DwVfpRegister index) { |
| + DCHECK(IsEnabled(NEON)); |
| + // Dd = vtbx(table, Dm) SIMD vector permute, skip out of range indices. |
| + // Instruction details available in ARM DDI 0406C.b, A8-1094. |
| + int vd, d; |
| + dst.split_code(&vd, &d); |
| + int vn, n; |
| + list.base().split_code(&vn, &n); |
| + int vm, m; |
| + index.split_code(&vm, &m); |
| + int op = 1; // vbsl |
|
Rodolph Perfetta (ARM)
2016/12/08 18:08:27
unused
bbudge
2016/12/10 21:33:03
Done.
|
| + emit(0x1E7 * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 | 0x2 * B10 | |
|
Rodolph Perfetta (ARM)
2016/12/08 18:08:27
0x1E7U
bbudge
2016/12/10 21:33:04
Done.
|
| + list.len() * B8 | n * B7 | B6 | m * B5 | vm); |
| +} |
| + |
| // Pseudo instructions. |
| void Assembler::nop(int type) { |
| // ARMv6{K/T2} and v7 have an actual NOP instruction but it serializes |