Chromium Code Reviews| Index: src/arm/assembler-arm.cc |
| diff --git a/src/arm/assembler-arm.cc b/src/arm/assembler-arm.cc |
| index 0a671c06244aefca6d2ca8997a8c35fd583281cd..bb03354a622119427a86e57a6b96fef4c9c88489 100644 |
| --- a/src/arm/assembler-arm.cc |
| +++ b/src/arm/assembler-arm.cc |
| @@ -483,30 +483,6 @@ void NeonMemOperand::SetAlignment(int align) { |
| } |
| } |
| - |
| -NeonListOperand::NeonListOperand(DoubleRegister base, int registers_count) { |
| - base_ = base; |
| - switch (registers_count) { |
| - case 1: |
| - type_ = nlt_1; |
| - break; |
| - case 2: |
| - type_ = nlt_2; |
| - break; |
| - case 3: |
| - type_ = nlt_3; |
| - break; |
| - case 4: |
| - type_ = nlt_4; |
| - break; |
| - default: |
| - UNREACHABLE(); |
| - type_ = nlt_1; |
| - break; |
| - } |
| -} |
| - |
| - |
| // ----------------------------------------------------------------------------- |
| // Specific instructions, constants, and masks. |
| @@ -2968,7 +2944,6 @@ void Assembler::vmov(const Register dst, |
| emit(cond | 0xE*B24 | B20 | sn*B16 | dst.code()*B12 | 0xA*B8 | n*B7 | B4); |
| } |
| - |
| // Type of data to read from or write to VFP register. |
| // Used as specifier in generic vcvt instruction. |
| enum VFPType { S32, U32, F32, F64 }; |
| @@ -3915,6 +3890,18 @@ void Assembler::vmov(const QwNeonRegister dst, const QwNeonRegister src) { |
| B6 | m * B5 | B4 | vm); |
| } |
| +void Assembler::vmvn(const QwNeonRegister dst, const QwNeonRegister src) { |
| + DCHECK(IsEnabled(NEON)); |
| + // Instruction details available in ARM DDI 0406C.b, A8-966. |
| + DCHECK(VfpRegisterIsAvailable(dst)); |
| + DCHECK(VfpRegisterIsAvailable(src)); |
| + int vd, d; |
| + dst.split_code(&vd, &d); |
| + int vm, m; |
| + src.split_code(&vm, &m); |
| + emit(0x1E7 * B23 | d * B22 | 3 * B20 | vd * B12 | 0x17 * B6 | m * B5 | vm); |
| +} |
| + |
| void Assembler::vswp(DwVfpRegister dst, DwVfpRegister src) { |
| // Instruction details available in ARM DDI 0406C.b, A8.8.418. |
| // 1111(31-28) | 00111(27-23) | D(22) | 110010(21-16) | |
| @@ -3940,8 +3927,72 @@ void Assembler::vswp(QwNeonRegister dst, QwNeonRegister src) { |
| vm); |
| } |
| +void Assembler::vdup(const QwNeonRegister dst, const Register src, |
| + NeonSize size) { |
| + DCHECK(IsEnabled(NEON)); |
| + // Instruction details available in ARM DDI 0406C.b, A8-886. |
| + int B = 0, E = 0; |
| + switch (size) { |
| + case Neon8: |
| + B = 1; |
| + break; |
| + case Neon16: |
| + E = 1; |
| + break; |
| + case Neon32: |
| + break; |
| + default: |
| + UNREACHABLE(); |
| + break; |
| + } |
| + int vd, d; |
| + dst.split_code(&vd, &d); |
| + |
| + emit(al | 0x1D * B23 | B * B22 | B21 | vd * B16 | src.code() * B12 | |
| + 0xB * B8 | d * B7 | E * B5 | B4); |
| +} |
| + |
| +void Assembler::vdup(const QwNeonRegister dst, const SwVfpRegister src) { |
| + DCHECK(IsEnabled(NEON)); |
| + // Instruction details available in ARM DDI 0406C.b, A8-884. |
| + int index = src.code() & 1; |
| + int d_reg = src.code() / 2; |
| + int imm4 = 4 | index << 3; // esize = 32, index in bit 3. |
| + int vd, d; |
| + dst.split_code(&vd, &d); |
| + int vm, m; |
| + DwVfpRegister::from_code(d_reg).split_code(&vm, &m); |
| + |
| + emit(0x1E7 * B23 | d * B22 | 0x3 * B20 | imm4 * B16 | vd * B12 | 0x18 * B7 | |
| + B6 | m * B5 | vm); |
| +} |
| + |
| +void Assembler::vcvt(const QwNeonRegister dst, const QwNeonRegister src, |
|
Rodolph Perfetta (ARM)
2016/12/07 15:59:00
other vcvt instruction indicate the conversion dir
bbudge
2016/12/08 03:07:30
Yes, that's a lot more consistent with the existin
|
| + NeonDataType from, NeonDataType to) { |
| + // Instruction details available in ARM DDI 0406C.b, A8.8.868. |
| + DCHECK(IsEnabled(NEON)); |
| + int vd, d; |
| + dst.split_code(&vd, &d); |
| + int vm, m; |
| + src.split_code(&vm, &m); |
| + |
| + int op = 0; |
| + if (from == NeonOtherDataType) { |
| + DCHECK_EQ(NeonS32, to & NeonDataTypeSizeMask); |
| + op = ((to & NeonDataTypeUMask) != 0) ? 1 : 0; |
|
Rodolph Perfetta (ARM)
2016/12/07 15:59:00
this is the other way round: when converting to in
bbudge
2016/12/08 03:07:30
Done.
|
| + } else { |
| + DCHECK_EQ(NeonOtherDataType, to); |
| + DCHECK_EQ(NeonS32, from & NeonDataTypeSizeMask); |
| + op = ((from & NeonDataTypeUMask) != 0) ? 3 : 2; |
|
Rodolph Perfetta (ARM)
2016/12/07 15:59:00
and 1 and 0 here.
bbudge
2016/12/08 03:07:30
Done.
|
| + } |
| + |
| + emit(0x1E7 * B23 | d * B22 | 0x3B * B16 | vd * B12 | 0x3 * B9 | op * B7 | |
| + m * B5 | vm); |
| +} |
| + |
| void Assembler::veor(DwVfpRegister dst, DwVfpRegister src1, |
| DwVfpRegister src2) { |
| + // Dd = veor(Dn, Dm) 64 bit integer exclusive OR. |
| // Instruction details available in ARM DDI 0406C.b, A8.8.888. |
| DCHECK(IsEnabled(NEON)); |
| int vd, d; |
| @@ -3956,6 +4007,7 @@ void Assembler::veor(DwVfpRegister dst, DwVfpRegister src1, |
| void Assembler::veor(QwNeonRegister dst, QwNeonRegister src1, |
| QwNeonRegister src2) { |
| + // Qd = veor(Qn, Qm) SIMD integer exclusive OR. |
| // Instruction details available in ARM DDI 0406C.b, A8.8.888. |
| DCHECK(IsEnabled(NEON)); |
| int vd, d; |
| @@ -3968,6 +4020,148 @@ void Assembler::veor(QwNeonRegister dst, QwNeonRegister src1, |
| B4 | vm); |
| } |
| +void Assembler::vadd(QwNeonRegister dst, const QwNeonRegister src1, |
| + const QwNeonRegister src2) { |
| + DCHECK(IsEnabled(NEON)); |
| + // Qd = vadd(Qn, Qm) SIMD floating point addition. |
| + // Instruction details available in ARM DDI 0406C.b, A8-830. |
| + int vd, d; |
| + dst.split_code(&vd, &d); |
| + int vn, n; |
| + src1.split_code(&vn, &n); |
| + int vm, m; |
| + src2.split_code(&vm, &m); |
| + emit(0x1E4 * B23 | d * B22 | vn * B16 | vd * B12 | 0xD * B8 | n * B7 | B6 | |
| + m * B5 | vm); |
| +} |
| + |
| +void Assembler::vadd(QwNeonRegister dst, const QwNeonRegister src1, |
| + const QwNeonRegister src2, NeonSize size) { |
| + DCHECK(IsEnabled(NEON)); |
| + // Qd = vadd(Qn, Qm) SIMD integer addition. |
| + // Instruction details available in ARM DDI 0406C.b, A8-828. |
| + int vd, d; |
| + dst.split_code(&vd, &d); |
| + int vn, n; |
| + src1.split_code(&vn, &n); |
| + int vm, m; |
| + src2.split_code(&vm, &m); |
| + int sz = static_cast<int>(size); |
| + emit(0x1E4 * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x8 * B8 | |
| + n * B7 | B6 | m * B5 | vm); |
| +} |
| + |
| +void Assembler::vsub(QwNeonRegister dst, const QwNeonRegister src1, |
| + const QwNeonRegister src2) { |
| + DCHECK(IsEnabled(NEON)); |
| + // Qd = vsub(Qn, Qm) SIMD floating point subtraction. |
| + // Instruction details available in ARM DDI 0406C.b, A8-1086. |
| + int vd, d; |
| + dst.split_code(&vd, &d); |
| + int vn, n; |
| + src1.split_code(&vn, &n); |
| + int vm, m; |
| + src2.split_code(&vm, &m); |
| + emit(0x1E4 * B23 | d * B22 | B21 | vn * B16 | vd * B12 | 0xD * B8 | n * B7 | |
| + B6 | m * B5 | vm); |
| +} |
| + |
| +void Assembler::vsub(QwNeonRegister dst, const QwNeonRegister src1, |
| + const QwNeonRegister src2, NeonSize size) { |
| + DCHECK(IsEnabled(NEON)); |
| + // Qd = vsub(Qn, Qm) SIMD integer subtraction. |
| + // Instruction details available in ARM DDI 0406C.b, A8-1084. |
| + int vd, d; |
| + dst.split_code(&vd, &d); |
| + int vn, n; |
| + src1.split_code(&vn, &n); |
| + int vm, m; |
| + src2.split_code(&vm, &m); |
| + int sz = static_cast<int>(size); |
| + emit(0x1E6 * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x8 * B8 | |
| + n * B7 | B6 | m * B5 | vm); |
| +} |
| + |
| +void Assembler::vtst(QwNeonRegister dst, const QwNeonRegister src1, |
| + const QwNeonRegister src2, NeonSize size) { |
| + DCHECK(IsEnabled(NEON)); |
| + // Qd = vtst(Qn, Qm) SIMD test integer operands. |
| + // Instruction details available in ARM DDI 0406C.b, A8-1098. |
| + int vd, d; |
| + dst.split_code(&vd, &d); |
| + int vn, n; |
| + src1.split_code(&vn, &n); |
| + int vm, m; |
| + src2.split_code(&vm, &m); |
| + int sz = static_cast<int>(size); |
| + emit(0x1E4 * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x8 * B8 | |
| + n * B7 | B6 | m * B5 | B4 | vm); |
| +} |
| + |
| +void Assembler::vceq(QwNeonRegister dst, const QwNeonRegister src1, |
| + const QwNeonRegister src2, NeonSize size) { |
| + DCHECK(IsEnabled(NEON)); |
| + // Qd = vceq(Qn, Qm) SIMD integer compare equal. |
| + // Instruction details available in ARM DDI 0406C.b, A8-844. |
| + int vd, d; |
| + dst.split_code(&vd, &d); |
| + int vn, n; |
| + src1.split_code(&vn, &n); |
| + int vm, m; |
| + src2.split_code(&vm, &m); |
| + int sz = static_cast<int>(size); |
| + emit(0x1E6 * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x8 * B8 | |
| + n * B7 | B6 | m * B5 | B4 | vm); |
| +} |
| + |
| +void Assembler::vbsl(QwNeonRegister dst, const QwNeonRegister src1, |
| + const QwNeonRegister src2) { |
| + DCHECK(IsEnabled(NEON)); |
| + // Qd = vbsl(Qn, Qm) SIMD bitwise select. |
| + // Instruction details available in ARM DDI 0406C.b, A8-844. |
| + int vd, d; |
| + dst.split_code(&vd, &d); |
| + int vn, n; |
| + src1.split_code(&vn, &n); |
| + int vm, m; |
| + src2.split_code(&vm, &m); |
| + int op = 1; // vbsl |
| + emit(0x1E6 * B23 | d * B22 | op * B20 | vn * B16 | vd * B12 | 0x1 * B8 | |
| + n * B7 | B6 | m * B5 | B4 | vm); |
| +} |
| + |
| +void Assembler::vtbl(const DwVfpRegister dst, const NeonListOperand& list, |
| + const DwVfpRegister index) { |
| + DCHECK(IsEnabled(NEON)); |
| + // Dd = vtbl(table, Dm) SIMD vector permute, zero at out of range indices. |
| + // Instruction details available in ARM DDI 0406C.b, A8-1094. |
| + int vd, d; |
| + dst.split_code(&vd, &d); |
| + int vn, n; |
| + list.base().split_code(&vn, &n); |
| + int vm, m; |
| + index.split_code(&vm, &m); |
| + int op = 1; // vbsl |
| + emit(0x1E7 * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 | 0x2 * B10 | |
| + list.len() * B8 | n * B7 | m * B5 | vm); |
| +} |
| + |
| +void Assembler::vtbx(const DwVfpRegister dst, const NeonListOperand& list, |
| + const DwVfpRegister index) { |
| + DCHECK(IsEnabled(NEON)); |
| + // Dd = vtbx(table, Dm) SIMD vector permute, skip out of range indices. |
| + // Instruction details available in ARM DDI 0406C.b, A8-1094. |
| + int vd, d; |
| + dst.split_code(&vd, &d); |
| + int vn, n; |
| + list.base().split_code(&vn, &n); |
| + int vm, m; |
| + index.split_code(&vm, &m); |
| + int op = 1; // vbsl |
| + emit(0x1E7 * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 | 0x2 * B10 | |
| + list.len() * B8 | n * B7 | B6 | m * B5 | vm); |
| +} |
| + |
| // Pseudo instructions. |
| void Assembler::nop(int type) { |
| // ARMv6{K/T2} and v7 have an actual NOP instruction but it serializes |