Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(523)

Unified Diff: src/arm/assembler-arm.cc

Issue 2546933002: [Turbofan] Add ARM NEON instructions for implementing SIMD. (Closed)
Patch Set: Fourth review comments. Created 4 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/arm/assembler-arm.h ('k') | src/arm/constants-arm.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/arm/assembler-arm.cc
diff --git a/src/arm/assembler-arm.cc b/src/arm/assembler-arm.cc
index 6e0b2db281f045c72c95561f3d3d479cd46a79aa..aa6be2110b28bb7621da4e24ef37c555a1900edb 100644
--- a/src/arm/assembler-arm.cc
+++ b/src/arm/assembler-arm.cc
@@ -483,30 +483,6 @@ void NeonMemOperand::SetAlignment(int align) {
}
}
-
-NeonListOperand::NeonListOperand(DoubleRegister base, int registers_count) {
- base_ = base;
- switch (registers_count) {
- case 1:
- type_ = nlt_1;
- break;
- case 2:
- type_ = nlt_2;
- break;
- case 3:
- type_ = nlt_3;
- break;
- case 4:
- type_ = nlt_4;
- break;
- default:
- UNREACHABLE();
- type_ = nlt_1;
- break;
- }
-}
-
-
// -----------------------------------------------------------------------------
// Specific instructions, constants, and masks.
@@ -2968,7 +2944,6 @@ void Assembler::vmov(const Register dst,
emit(cond | 0xE*B24 | B20 | sn*B16 | dst.code()*B12 | 0xA*B8 | n*B7 | B4);
}
-
// Type of data to read from or write to VFP register.
// Used as specifier in generic vcvt instruction.
enum VFPType { S32, U32, F32, F64 };
@@ -3902,6 +3877,57 @@ void Assembler::vmovl(NeonDataType dt, QwNeonRegister dst, DwVfpRegister src) {
(dt & NeonDataTypeSizeMask)*B19 | vd*B12 | 0xA*B8 | m*B5 | B4 | vm);
}
+static int EncodeScalar(NeonDataType dt, int index) {
+ int opc1_opc2 = 0;
+ DCHECK_LE(0, index);
+ switch (dt) {
+ case NeonS8:
+ case NeonU8:
+ DCHECK_GT(8, index);
+ opc1_opc2 = 0x8 | index;
+ break;
+ case NeonS16:
+ case NeonU16:
+ DCHECK_GT(4, index);
+ opc1_opc2 = 0x1 | (index << 1);
+ break;
+ case NeonS32:
+ case NeonU32:
+ DCHECK_GT(2, index);
+ opc1_opc2 = index << 2;
+ break;
+ default:
+ UNREACHABLE();
+ break;
+ }
+ return (opc1_opc2 >> 2) * B21 | (opc1_opc2 & 0x3) * B5;
+}
+
+void Assembler::vmov(NeonDataType dt, DwVfpRegister dst, int index,
+ Register src) {
+ // Instruction details available in ARM DDI 0406C.b, A8.8.940.
+ // vmov ARM core register to scalar.
+ DCHECK(dt == NeonS32 || dt == NeonU32 || IsEnabled(NEON));
+ int vd, d;
+ dst.split_code(&vd, &d);
+ int opc1_opc2 = EncodeScalar(dt, index);
+ emit(0xEEu * B24 | vd * B16 | src.code() * B12 | 0xB * B8 | d * B7 | B4 |
+ opc1_opc2);
+}
+
+void Assembler::vmov(NeonDataType dt, Register dst, DwVfpRegister src,
+ int index) {
+ // Instruction details available in ARM DDI 0406C.b, A8.8.942.
+ // vmov Arm scalar to core register.
+ DCHECK(dt == NeonS32 || dt == NeonU32 || IsEnabled(NEON));
+ int vn, n;
+ src.split_code(&vn, &n);
+ int opc1_opc2 = EncodeScalar(dt, index);
+ int u = (dt & NeonDataTypeUMask) != 0 ? 1 : 0;
+ emit(0xEEu * B24 | u * B23 | B20 | vn * B16 | dst.code() * B12 | 0xB * B8 |
+ n * B7 | B4 | opc1_opc2);
+}
+
void Assembler::vmov(const QwNeonRegister dst, const QwNeonRegister src) {
DCHECK(IsEnabled(NEON));
// Instruction details available in ARM DDI 0406C.b, A8-938.
@@ -3915,6 +3941,18 @@ void Assembler::vmov(const QwNeonRegister dst, const QwNeonRegister src) {
B6 | m * B5 | B4 | vm);
}
+void Assembler::vmvn(const QwNeonRegister dst, const QwNeonRegister src) {
+ DCHECK(IsEnabled(NEON));
+ // Instruction details available in ARM DDI 0406C.b, A8-966.
+ DCHECK(VfpRegisterIsAvailable(dst));
+ DCHECK(VfpRegisterIsAvailable(src));
+ int vd, d;
+ dst.split_code(&vd, &d);
+ int vm, m;
+ src.split_code(&vm, &m);
+ emit(0x1E7U * B23 | d * B22 | 3 * B20 | vd * B12 | 0x17 * B6 | m * B5 | vm);
+}
+
void Assembler::vswp(DwVfpRegister dst, DwVfpRegister src) {
// Instruction details available in ARM DDI 0406C.b, A8.8.418.
// 1111(31-28) | 00111(27-23) | D(22) | 110010(21-16) |
@@ -3940,8 +3978,105 @@ void Assembler::vswp(QwNeonRegister dst, QwNeonRegister src) {
vm);
}
+void Assembler::vdup(NeonSize size, const QwNeonRegister dst,
+ const Register src) {
+ DCHECK(IsEnabled(NEON));
+ // Instruction details available in ARM DDI 0406C.b, A8-886.
+ int B = 0, E = 0;
+ switch (size) {
+ case Neon8:
+ B = 1;
+ break;
+ case Neon16:
+ E = 1;
+ break;
+ case Neon32:
+ break;
+ default:
+ UNREACHABLE();
+ break;
+ }
+ int vd, d;
+ dst.split_code(&vd, &d);
+
+ emit(al | 0x1D * B23 | B * B22 | B21 | vd * B16 | src.code() * B12 |
+ 0xB * B8 | d * B7 | E * B5 | B4);
+}
+
+void Assembler::vdup(const QwNeonRegister dst, const SwVfpRegister src) {
+ DCHECK(IsEnabled(NEON));
+ // Instruction details available in ARM DDI 0406C.b, A8-884.
+ int index = src.code() & 1;
+ int d_reg = src.code() / 2;
+ int imm4 = 4 | index << 3; // esize = 32, index in bit 3.
+ int vd, d;
+ dst.split_code(&vd, &d);
+ int vm, m;
+ DwVfpRegister::from_code(d_reg).split_code(&vm, &m);
+
+ emit(0x1E7U * B23 | d * B22 | 0x3 * B20 | imm4 * B16 | vd * B12 | 0x18 * B7 |
+ B6 | m * B5 | vm);
+}
+
+// Encode NEON vcvt.src_type.dst_type instruction.
+static Instr EncodeNeonVCVT(const VFPType dst_type, const QwNeonRegister dst,
+ const VFPType src_type, const QwNeonRegister src) {
+ DCHECK(src_type != dst_type);
+ DCHECK(src_type == F32 || dst_type == F32);
+ // Instruction details available in ARM DDI 0406C.b, A8.8.868.
+ int vd, d;
+ dst.split_code(&vd, &d);
+ int vm, m;
+ src.split_code(&vm, &m);
+
+ int op = 0;
+ if (src_type == F32) {
+ DCHECK(dst_type == S32 || dst_type == U32);
+ op = dst_type == U32 ? 3 : 2;
+ } else {
+ DCHECK(src_type == S32 || src_type == U32);
+ op = src_type == U32 ? 1 : 0;
+ }
+
+ return 0x1E7U * B23 | d * B22 | 0x3B * B16 | vd * B12 | 0x3 * B9 | op * B7 |
+ B6 | m * B5 | vm;
+}
+
+void Assembler::vcvt_f32_s32(const QwNeonRegister dst,
+ const QwNeonRegister src) {
+ DCHECK(IsEnabled(NEON));
+ DCHECK(VfpRegisterIsAvailable(dst));
+ DCHECK(VfpRegisterIsAvailable(src));
+ emit(EncodeNeonVCVT(F32, dst, S32, src));
+}
+
+void Assembler::vcvt_f32_u32(const QwNeonRegister dst,
+ const QwNeonRegister src) {
+ DCHECK(IsEnabled(NEON));
+ DCHECK(VfpRegisterIsAvailable(dst));
+ DCHECK(VfpRegisterIsAvailable(src));
+ emit(EncodeNeonVCVT(F32, dst, U32, src));
+}
+
+void Assembler::vcvt_s32_f32(const QwNeonRegister dst,
+ const QwNeonRegister src) {
+ DCHECK(IsEnabled(NEON));
+ DCHECK(VfpRegisterIsAvailable(dst));
+ DCHECK(VfpRegisterIsAvailable(src));
+ emit(EncodeNeonVCVT(S32, dst, F32, src));
+}
+
+void Assembler::vcvt_u32_f32(const QwNeonRegister dst,
+ const QwNeonRegister src) {
+ DCHECK(IsEnabled(NEON));
+ DCHECK(VfpRegisterIsAvailable(dst));
+ DCHECK(VfpRegisterIsAvailable(src));
+ emit(EncodeNeonVCVT(U32, dst, F32, src));
+}
+
void Assembler::veor(DwVfpRegister dst, DwVfpRegister src1,
DwVfpRegister src2) {
+ // Dd = veor(Dn, Dm) 64 bit integer exclusive OR.
// Instruction details available in ARM DDI 0406C.b, A8.8.888.
DCHECK(IsEnabled(NEON));
int vd, d;
@@ -3956,6 +4091,7 @@ void Assembler::veor(DwVfpRegister dst, DwVfpRegister src1,
void Assembler::veor(QwNeonRegister dst, QwNeonRegister src1,
QwNeonRegister src2) {
+ // Qd = veor(Qn, Qm) SIMD integer exclusive OR.
// Instruction details available in ARM DDI 0406C.b, A8.8.888.
DCHECK(IsEnabled(NEON));
int vd, d;
@@ -3968,6 +4104,146 @@ void Assembler::veor(QwNeonRegister dst, QwNeonRegister src1,
m * B5 | B4 | vm);
}
+void Assembler::vadd(QwNeonRegister dst, const QwNeonRegister src1,
+ const QwNeonRegister src2) {
+ DCHECK(IsEnabled(NEON));
+ // Qd = vadd(Qn, Qm) SIMD floating point addition.
+ // Instruction details available in ARM DDI 0406C.b, A8-830.
+ int vd, d;
+ dst.split_code(&vd, &d);
+ int vn, n;
+ src1.split_code(&vn, &n);
+ int vm, m;
+ src2.split_code(&vm, &m);
+ emit(0x1E4U * B23 | d * B22 | vn * B16 | vd * B12 | 0xD * B8 | n * B7 | B6 |
+ m * B5 | vm);
+}
+
+void Assembler::vadd(NeonSize size, QwNeonRegister dst,
+ const QwNeonRegister src1, const QwNeonRegister src2) {
+ DCHECK(IsEnabled(NEON));
+ // Qd = vadd(Qn, Qm) SIMD integer addition.
+ // Instruction details available in ARM DDI 0406C.b, A8-828.
+ int vd, d;
+ dst.split_code(&vd, &d);
+ int vn, n;
+ src1.split_code(&vn, &n);
+ int vm, m;
+ src2.split_code(&vm, &m);
+ int sz = static_cast<int>(size);
+ emit(0x1E4U * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x8 * B8 |
+ n * B7 | B6 | m * B5 | vm);
+}
+
+void Assembler::vsub(QwNeonRegister dst, const QwNeonRegister src1,
+ const QwNeonRegister src2) {
+ DCHECK(IsEnabled(NEON));
+ // Qd = vsub(Qn, Qm) SIMD floating point subtraction.
+ // Instruction details available in ARM DDI 0406C.b, A8-1086.
+ int vd, d;
+ dst.split_code(&vd, &d);
+ int vn, n;
+ src1.split_code(&vn, &n);
+ int vm, m;
+ src2.split_code(&vm, &m);
+ emit(0x1E4U * B23 | d * B22 | B21 | vn * B16 | vd * B12 | 0xD * B8 | n * B7 |
+ B6 | m * B5 | vm);
+}
+
+void Assembler::vsub(NeonSize size, QwNeonRegister dst,
+ const QwNeonRegister src1, const QwNeonRegister src2) {
+ DCHECK(IsEnabled(NEON));
+ // Qd = vsub(Qn, Qm) SIMD integer subtraction.
+ // Instruction details available in ARM DDI 0406C.b, A8-1084.
+ int vd, d;
+ dst.split_code(&vd, &d);
+ int vn, n;
+ src1.split_code(&vn, &n);
+ int vm, m;
+ src2.split_code(&vm, &m);
+ int sz = static_cast<int>(size);
+ emit(0x1E6U * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x8 * B8 |
+ n * B7 | B6 | m * B5 | vm);
+}
+
+void Assembler::vtst(NeonSize size, QwNeonRegister dst,
+ const QwNeonRegister src1, const QwNeonRegister src2) {
+ DCHECK(IsEnabled(NEON));
+ // Qd = vtst(Qn, Qm) SIMD test integer operands.
+ // Instruction details available in ARM DDI 0406C.b, A8-1098.
+ int vd, d;
+ dst.split_code(&vd, &d);
+ int vn, n;
+ src1.split_code(&vn, &n);
+ int vm, m;
+ src2.split_code(&vm, &m);
+ int sz = static_cast<int>(size);
+ emit(0x1E4U * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x8 * B8 |
+ n * B7 | B6 | m * B5 | B4 | vm);
+}
+
+void Assembler::vceq(NeonSize size, QwNeonRegister dst,
+ const QwNeonRegister src1, const QwNeonRegister src2) {
+ DCHECK(IsEnabled(NEON));
+ // Qd = vceq(Qn, Qm) SIMD integer compare equal.
+ // Instruction details available in ARM DDI 0406C.b, A8-844.
+ int vd, d;
+ dst.split_code(&vd, &d);
+ int vn, n;
+ src1.split_code(&vn, &n);
+ int vm, m;
+ src2.split_code(&vm, &m);
+ int sz = static_cast<int>(size);
+ emit(0x1E6U * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x8 * B8 |
+ n * B7 | B6 | m * B5 | B4 | vm);
+}
+
+void Assembler::vbsl(QwNeonRegister dst, const QwNeonRegister src1,
+ const QwNeonRegister src2) {
+ DCHECK(IsEnabled(NEON));
+ // Qd = vbsl(Qn, Qm) SIMD bitwise select.
+ // Instruction details available in ARM DDI 0406C.b, A8-844.
+ int vd, d;
+ dst.split_code(&vd, &d);
+ int vn, n;
+ src1.split_code(&vn, &n);
+ int vm, m;
+ src2.split_code(&vm, &m);
+ int op = 1; // vbsl
+ emit(0x1E6U * B23 | d * B22 | op * B20 | vn * B16 | vd * B12 | 0x1 * B8 |
+ n * B7 | B6 | m * B5 | B4 | vm);
+}
+
+// Encode NEON vtbl / vtbx instruction.
+static Instr EncodeNeonVTB(const DwVfpRegister dst, const NeonListOperand& list,
+ const DwVfpRegister index, bool vtbx) {
+ // Dd = vtbl(table, Dm) SIMD vector permute, zero at out of range indices.
+ // Instruction details available in ARM DDI 0406C.b, A8-1094.
+ // Dd = vtbx(table, Dm) SIMD vector permute, skip out of range indices.
+ // Instruction details available in ARM DDI 0406C.b, A8-1094.
+ int vd, d;
+ dst.split_code(&vd, &d);
+ int vn, n;
+ list.base().split_code(&vn, &n);
+ int vm, m;
+ index.split_code(&vm, &m);
+ int op = vtbx ? 1 : 0; // vtbl = 0, vtbx = 1.
+ return 0x1E7U * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 | 0x2 * B10 |
+ list.length() * B8 | n * B7 | op * B6 | m * B5 | vm;
+}
+
+void Assembler::vtbl(const DwVfpRegister dst, const NeonListOperand& list,
+ const DwVfpRegister index) {
+ DCHECK(IsEnabled(NEON));
+ emit(EncodeNeonVTB(dst, list, index, false));
+}
+
+void Assembler::vtbx(const DwVfpRegister dst, const NeonListOperand& list,
+ const DwVfpRegister index) {
+ DCHECK(IsEnabled(NEON));
+ emit(EncodeNeonVTB(dst, list, index, true));
+}
+
// Pseudo instructions.
void Assembler::nop(int type) {
// ARMv6{K/T2} and v7 have an actual NOP instruction but it serializes
« no previous file with comments | « src/arm/assembler-arm.h ('k') | src/arm/constants-arm.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698