| Index: src/arm/assembler-arm.cc
|
| diff --git a/src/arm/assembler-arm.cc b/src/arm/assembler-arm.cc
|
| index 6e0b2db281f045c72c95561f3d3d479cd46a79aa..aa6be2110b28bb7621da4e24ef37c555a1900edb 100644
|
| --- a/src/arm/assembler-arm.cc
|
| +++ b/src/arm/assembler-arm.cc
|
| @@ -483,30 +483,6 @@ void NeonMemOperand::SetAlignment(int align) {
|
| }
|
| }
|
|
|
| -
|
| -NeonListOperand::NeonListOperand(DoubleRegister base, int registers_count) {
|
| - base_ = base;
|
| - switch (registers_count) {
|
| - case 1:
|
| - type_ = nlt_1;
|
| - break;
|
| - case 2:
|
| - type_ = nlt_2;
|
| - break;
|
| - case 3:
|
| - type_ = nlt_3;
|
| - break;
|
| - case 4:
|
| - type_ = nlt_4;
|
| - break;
|
| - default:
|
| - UNREACHABLE();
|
| - type_ = nlt_1;
|
| - break;
|
| - }
|
| -}
|
| -
|
| -
|
| // -----------------------------------------------------------------------------
|
| // Specific instructions, constants, and masks.
|
|
|
| @@ -2968,7 +2944,6 @@ void Assembler::vmov(const Register dst,
|
| emit(cond | 0xE*B24 | B20 | sn*B16 | dst.code()*B12 | 0xA*B8 | n*B7 | B4);
|
| }
|
|
|
| -
|
| // Type of data to read from or write to VFP register.
|
| // Used as specifier in generic vcvt instruction.
|
| enum VFPType { S32, U32, F32, F64 };
|
| @@ -3902,6 +3877,57 @@ void Assembler::vmovl(NeonDataType dt, QwNeonRegister dst, DwVfpRegister src) {
|
| (dt & NeonDataTypeSizeMask)*B19 | vd*B12 | 0xA*B8 | m*B5 | B4 | vm);
|
| }
|
|
|
| +static int EncodeScalar(NeonDataType dt, int index) {
|
| + int opc1_opc2 = 0;
|
| + DCHECK_LE(0, index);
|
| + switch (dt) {
|
| + case NeonS8:
|
| + case NeonU8:
|
| + DCHECK_GT(8, index);
|
| + opc1_opc2 = 0x8 | index;
|
| + break;
|
| + case NeonS16:
|
| + case NeonU16:
|
| + DCHECK_GT(4, index);
|
| + opc1_opc2 = 0x1 | (index << 1);
|
| + break;
|
| + case NeonS32:
|
| + case NeonU32:
|
| + DCHECK_GT(2, index);
|
| + opc1_opc2 = index << 2;
|
| + break;
|
| + default:
|
| + UNREACHABLE();
|
| + break;
|
| + }
|
| + return (opc1_opc2 >> 2) * B21 | (opc1_opc2 & 0x3) * B5;
|
| +}
|
| +
|
| +void Assembler::vmov(NeonDataType dt, DwVfpRegister dst, int index,
|
| + Register src) {
|
| + // Instruction details available in ARM DDI 0406C.b, A8.8.940.
|
| + // vmov ARM core register to scalar.
|
| + DCHECK(dt == NeonS32 || dt == NeonU32 || IsEnabled(NEON));
|
| + int vd, d;
|
| + dst.split_code(&vd, &d);
|
| + int opc1_opc2 = EncodeScalar(dt, index);
|
| + emit(0xEEu * B24 | vd * B16 | src.code() * B12 | 0xB * B8 | d * B7 | B4 |
|
| + opc1_opc2);
|
| +}
|
| +
|
| +void Assembler::vmov(NeonDataType dt, Register dst, DwVfpRegister src,
|
| + int index) {
|
| + // Instruction details available in ARM DDI 0406C.b, A8.8.942.
|
| + // vmov Arm scalar to core register.
|
| + DCHECK(dt == NeonS32 || dt == NeonU32 || IsEnabled(NEON));
|
| + int vn, n;
|
| + src.split_code(&vn, &n);
|
| + int opc1_opc2 = EncodeScalar(dt, index);
|
| + int u = (dt & NeonDataTypeUMask) != 0 ? 1 : 0;
|
| + emit(0xEEu * B24 | u * B23 | B20 | vn * B16 | dst.code() * B12 | 0xB * B8 |
|
| + n * B7 | B4 | opc1_opc2);
|
| +}
|
| +
|
| void Assembler::vmov(const QwNeonRegister dst, const QwNeonRegister src) {
|
| DCHECK(IsEnabled(NEON));
|
| // Instruction details available in ARM DDI 0406C.b, A8-938.
|
| @@ -3915,6 +3941,18 @@ void Assembler::vmov(const QwNeonRegister dst, const QwNeonRegister src) {
|
| B6 | m * B5 | B4 | vm);
|
| }
|
|
|
| +void Assembler::vmvn(const QwNeonRegister dst, const QwNeonRegister src) {
|
| + DCHECK(IsEnabled(NEON));
|
| + // Instruction details available in ARM DDI 0406C.b, A8-966.
|
| + DCHECK(VfpRegisterIsAvailable(dst));
|
| + DCHECK(VfpRegisterIsAvailable(src));
|
| + int vd, d;
|
| + dst.split_code(&vd, &d);
|
| + int vm, m;
|
| + src.split_code(&vm, &m);
|
| + emit(0x1E7U * B23 | d * B22 | 3 * B20 | vd * B12 | 0x17 * B6 | m * B5 | vm);
|
| +}
|
| +
|
| void Assembler::vswp(DwVfpRegister dst, DwVfpRegister src) {
|
| // Instruction details available in ARM DDI 0406C.b, A8.8.418.
|
| // 1111(31-28) | 00111(27-23) | D(22) | 110010(21-16) |
|
| @@ -3940,8 +3978,105 @@ void Assembler::vswp(QwNeonRegister dst, QwNeonRegister src) {
|
| vm);
|
| }
|
|
|
| +void Assembler::vdup(NeonSize size, const QwNeonRegister dst,
|
| + const Register src) {
|
| + DCHECK(IsEnabled(NEON));
|
| + // Instruction details available in ARM DDI 0406C.b, A8-886.
|
| + int B = 0, E = 0;
|
| + switch (size) {
|
| + case Neon8:
|
| + B = 1;
|
| + break;
|
| + case Neon16:
|
| + E = 1;
|
| + break;
|
| + case Neon32:
|
| + break;
|
| + default:
|
| + UNREACHABLE();
|
| + break;
|
| + }
|
| + int vd, d;
|
| + dst.split_code(&vd, &d);
|
| +
|
| + emit(al | 0x1D * B23 | B * B22 | B21 | vd * B16 | src.code() * B12 |
|
| + 0xB * B8 | d * B7 | E * B5 | B4);
|
| +}
|
| +
|
| +void Assembler::vdup(const QwNeonRegister dst, const SwVfpRegister src) {
|
| + DCHECK(IsEnabled(NEON));
|
| + // Instruction details available in ARM DDI 0406C.b, A8-884.
|
| + int index = src.code() & 1;
|
| + int d_reg = src.code() / 2;
|
| + int imm4 = 4 | index << 3; // esize = 32, index in bit 3.
|
| + int vd, d;
|
| + dst.split_code(&vd, &d);
|
| + int vm, m;
|
| + DwVfpRegister::from_code(d_reg).split_code(&vm, &m);
|
| +
|
| + emit(0x1E7U * B23 | d * B22 | 0x3 * B20 | imm4 * B16 | vd * B12 | 0x18 * B7 |
|
| + B6 | m * B5 | vm);
|
| +}
|
| +
|
| +// Encode NEON vcvt.src_type.dst_type instruction.
|
| +static Instr EncodeNeonVCVT(const VFPType dst_type, const QwNeonRegister dst,
|
| + const VFPType src_type, const QwNeonRegister src) {
|
| + DCHECK(src_type != dst_type);
|
| + DCHECK(src_type == F32 || dst_type == F32);
|
| + // Instruction details available in ARM DDI 0406C.b, A8.8.868.
|
| + int vd, d;
|
| + dst.split_code(&vd, &d);
|
| + int vm, m;
|
| + src.split_code(&vm, &m);
|
| +
|
| + int op = 0;
|
| + if (src_type == F32) {
|
| + DCHECK(dst_type == S32 || dst_type == U32);
|
| + op = dst_type == U32 ? 3 : 2;
|
| + } else {
|
| + DCHECK(src_type == S32 || src_type == U32);
|
| + op = src_type == U32 ? 1 : 0;
|
| + }
|
| +
|
| + return 0x1E7U * B23 | d * B22 | 0x3B * B16 | vd * B12 | 0x3 * B9 | op * B7 |
|
| + B6 | m * B5 | vm;
|
| +}
|
| +
|
| +void Assembler::vcvt_f32_s32(const QwNeonRegister dst,
|
| + const QwNeonRegister src) {
|
| + DCHECK(IsEnabled(NEON));
|
| + DCHECK(VfpRegisterIsAvailable(dst));
|
| + DCHECK(VfpRegisterIsAvailable(src));
|
| + emit(EncodeNeonVCVT(F32, dst, S32, src));
|
| +}
|
| +
|
| +void Assembler::vcvt_f32_u32(const QwNeonRegister dst,
|
| + const QwNeonRegister src) {
|
| + DCHECK(IsEnabled(NEON));
|
| + DCHECK(VfpRegisterIsAvailable(dst));
|
| + DCHECK(VfpRegisterIsAvailable(src));
|
| + emit(EncodeNeonVCVT(F32, dst, U32, src));
|
| +}
|
| +
|
| +void Assembler::vcvt_s32_f32(const QwNeonRegister dst,
|
| + const QwNeonRegister src) {
|
| + DCHECK(IsEnabled(NEON));
|
| + DCHECK(VfpRegisterIsAvailable(dst));
|
| + DCHECK(VfpRegisterIsAvailable(src));
|
| + emit(EncodeNeonVCVT(S32, dst, F32, src));
|
| +}
|
| +
|
| +void Assembler::vcvt_u32_f32(const QwNeonRegister dst,
|
| + const QwNeonRegister src) {
|
| + DCHECK(IsEnabled(NEON));
|
| + DCHECK(VfpRegisterIsAvailable(dst));
|
| + DCHECK(VfpRegisterIsAvailable(src));
|
| + emit(EncodeNeonVCVT(U32, dst, F32, src));
|
| +}
|
| +
|
| void Assembler::veor(DwVfpRegister dst, DwVfpRegister src1,
|
| DwVfpRegister src2) {
|
| + // Dd = veor(Dn, Dm) 64 bit integer exclusive OR.
|
| // Instruction details available in ARM DDI 0406C.b, A8.8.888.
|
| DCHECK(IsEnabled(NEON));
|
| int vd, d;
|
| @@ -3956,6 +4091,7 @@ void Assembler::veor(DwVfpRegister dst, DwVfpRegister src1,
|
|
|
| void Assembler::veor(QwNeonRegister dst, QwNeonRegister src1,
|
| QwNeonRegister src2) {
|
| + // Qd = veor(Qn, Qm) SIMD integer exclusive OR.
|
| // Instruction details available in ARM DDI 0406C.b, A8.8.888.
|
| DCHECK(IsEnabled(NEON));
|
| int vd, d;
|
| @@ -3968,6 +4104,146 @@ void Assembler::veor(QwNeonRegister dst, QwNeonRegister src1,
|
| m * B5 | B4 | vm);
|
| }
|
|
|
| +void Assembler::vadd(QwNeonRegister dst, const QwNeonRegister src1,
|
| + const QwNeonRegister src2) {
|
| + DCHECK(IsEnabled(NEON));
|
| + // Qd = vadd(Qn, Qm) SIMD floating point addition.
|
| + // Instruction details available in ARM DDI 0406C.b, A8-830.
|
| + int vd, d;
|
| + dst.split_code(&vd, &d);
|
| + int vn, n;
|
| + src1.split_code(&vn, &n);
|
| + int vm, m;
|
| + src2.split_code(&vm, &m);
|
| + emit(0x1E4U * B23 | d * B22 | vn * B16 | vd * B12 | 0xD * B8 | n * B7 | B6 |
|
| + m * B5 | vm);
|
| +}
|
| +
|
| +void Assembler::vadd(NeonSize size, QwNeonRegister dst,
|
| + const QwNeonRegister src1, const QwNeonRegister src2) {
|
| + DCHECK(IsEnabled(NEON));
|
| + // Qd = vadd(Qn, Qm) SIMD integer addition.
|
| + // Instruction details available in ARM DDI 0406C.b, A8-828.
|
| + int vd, d;
|
| + dst.split_code(&vd, &d);
|
| + int vn, n;
|
| + src1.split_code(&vn, &n);
|
| + int vm, m;
|
| + src2.split_code(&vm, &m);
|
| + int sz = static_cast<int>(size);
|
| + emit(0x1E4U * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x8 * B8 |
|
| + n * B7 | B6 | m * B5 | vm);
|
| +}
|
| +
|
| +void Assembler::vsub(QwNeonRegister dst, const QwNeonRegister src1,
|
| + const QwNeonRegister src2) {
|
| + DCHECK(IsEnabled(NEON));
|
| + // Qd = vsub(Qn, Qm) SIMD floating point subtraction.
|
| + // Instruction details available in ARM DDI 0406C.b, A8-1086.
|
| + int vd, d;
|
| + dst.split_code(&vd, &d);
|
| + int vn, n;
|
| + src1.split_code(&vn, &n);
|
| + int vm, m;
|
| + src2.split_code(&vm, &m);
|
| + emit(0x1E4U * B23 | d * B22 | B21 | vn * B16 | vd * B12 | 0xD * B8 | n * B7 |
|
| + B6 | m * B5 | vm);
|
| +}
|
| +
|
| +void Assembler::vsub(NeonSize size, QwNeonRegister dst,
|
| + const QwNeonRegister src1, const QwNeonRegister src2) {
|
| + DCHECK(IsEnabled(NEON));
|
| + // Qd = vsub(Qn, Qm) SIMD integer subtraction.
|
| + // Instruction details available in ARM DDI 0406C.b, A8-1084.
|
| + int vd, d;
|
| + dst.split_code(&vd, &d);
|
| + int vn, n;
|
| + src1.split_code(&vn, &n);
|
| + int vm, m;
|
| + src2.split_code(&vm, &m);
|
| + int sz = static_cast<int>(size);
|
| + emit(0x1E6U * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x8 * B8 |
|
| + n * B7 | B6 | m * B5 | vm);
|
| +}
|
| +
|
| +void Assembler::vtst(NeonSize size, QwNeonRegister dst,
|
| + const QwNeonRegister src1, const QwNeonRegister src2) {
|
| + DCHECK(IsEnabled(NEON));
|
| + // Qd = vtst(Qn, Qm) SIMD test integer operands.
|
| + // Instruction details available in ARM DDI 0406C.b, A8-1098.
|
| + int vd, d;
|
| + dst.split_code(&vd, &d);
|
| + int vn, n;
|
| + src1.split_code(&vn, &n);
|
| + int vm, m;
|
| + src2.split_code(&vm, &m);
|
| + int sz = static_cast<int>(size);
|
| + emit(0x1E4U * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x8 * B8 |
|
| + n * B7 | B6 | m * B5 | B4 | vm);
|
| +}
|
| +
|
| +void Assembler::vceq(NeonSize size, QwNeonRegister dst,
|
| + const QwNeonRegister src1, const QwNeonRegister src2) {
|
| + DCHECK(IsEnabled(NEON));
|
| + // Qd = vceq(Qn, Qm) SIMD integer compare equal.
|
| + // Instruction details available in ARM DDI 0406C.b, A8-844.
|
| + int vd, d;
|
| + dst.split_code(&vd, &d);
|
| + int vn, n;
|
| + src1.split_code(&vn, &n);
|
| + int vm, m;
|
| + src2.split_code(&vm, &m);
|
| + int sz = static_cast<int>(size);
|
| + emit(0x1E6U * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x8 * B8 |
|
| + n * B7 | B6 | m * B5 | B4 | vm);
|
| +}
|
| +
|
| +void Assembler::vbsl(QwNeonRegister dst, const QwNeonRegister src1,
|
| + const QwNeonRegister src2) {
|
| + DCHECK(IsEnabled(NEON));
|
| + // Qd = vbsl(Qn, Qm) SIMD bitwise select.
|
| + // Instruction details available in ARM DDI 0406C.b, A8-844.
|
| + int vd, d;
|
| + dst.split_code(&vd, &d);
|
| + int vn, n;
|
| + src1.split_code(&vn, &n);
|
| + int vm, m;
|
| + src2.split_code(&vm, &m);
|
| + int op = 1; // vbsl
|
| + emit(0x1E6U * B23 | d * B22 | op * B20 | vn * B16 | vd * B12 | 0x1 * B8 |
|
| + n * B7 | B6 | m * B5 | B4 | vm);
|
| +}
|
| +
|
| +// Encode NEON vtbl / vtbx instruction.
|
| +static Instr EncodeNeonVTB(const DwVfpRegister dst, const NeonListOperand& list,
|
| + const DwVfpRegister index, bool vtbx) {
|
| + // Dd = vtbl(table, Dm) SIMD vector permute, zero at out of range indices.
|
| + // Instruction details available in ARM DDI 0406C.b, A8-1094.
|
| + // Dd = vtbx(table, Dm) SIMD vector permute, skip out of range indices.
|
| + // Instruction details available in ARM DDI 0406C.b, A8-1094.
|
| + int vd, d;
|
| + dst.split_code(&vd, &d);
|
| + int vn, n;
|
| + list.base().split_code(&vn, &n);
|
| + int vm, m;
|
| + index.split_code(&vm, &m);
|
| + int op = vtbx ? 1 : 0; // vtbl = 0, vtbx = 1.
|
| + return 0x1E7U * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 | 0x2 * B10 |
|
| + list.length() * B8 | n * B7 | op * B6 | m * B5 | vm;
|
| +}
|
| +
|
| +void Assembler::vtbl(const DwVfpRegister dst, const NeonListOperand& list,
|
| + const DwVfpRegister index) {
|
| + DCHECK(IsEnabled(NEON));
|
| + emit(EncodeNeonVTB(dst, list, index, false));
|
| +}
|
| +
|
| +void Assembler::vtbx(const DwVfpRegister dst, const NeonListOperand& list,
|
| + const DwVfpRegister index) {
|
| + DCHECK(IsEnabled(NEON));
|
| + emit(EncodeNeonVTB(dst, list, index, true));
|
| +}
|
| +
|
| // Pseudo instructions.
|
| void Assembler::nop(int type) {
|
| // ARMv6{K/T2} and v7 have an actual NOP instruction but it serializes
|
|
|