Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(520)

Unified Diff: src/arm/simulator-arm.cc

Issue 2546933002: [Turbofan] Add ARM NEON instructions for implementing SIMD. (Closed)
Patch Set: Fourth review comments. Created 4 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/arm/simulator-arm.h ('k') | test/cctest/test-assembler-arm.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/arm/simulator-arm.cc
diff --git a/src/arm/simulator-arm.cc b/src/arm/simulator-arm.cc
index 5b0c7d603eec3f47d852eda7503a38c1b7ee8f20..12654834f2877b53738ea63170819206375050ad 100644
--- a/src/arm/simulator-arm.cc
+++ b/src/arm/simulator-arm.cc
@@ -3067,6 +3067,7 @@ void Simulator::DecodeType7(Instruction* instr) {
// Dd = vsqrt(Dm)
// Sd = vsqrt(Sm)
// vmrs
+// vdup.size Qd, Rt.
void Simulator::DecodeTypeVFP(Instruction* instr) {
DCHECK((instr->TypeValue() == 7) && (instr->Bit(24) == 0x0) );
DCHECK(instr->Bits(11, 9) == 0x5);
@@ -3277,24 +3278,116 @@ void Simulator::DecodeTypeVFP(Instruction* instr) {
if ((instr->VCValue() == 0x0) &&
(instr->VAValue() == 0x0)) {
DecodeVMOVBetweenCoreAndSinglePrecisionRegisters(instr);
- } else if ((instr->VLValue() == 0x0) &&
- (instr->VCValue() == 0x1) &&
- (instr->Bit(23) == 0x0)) {
- // vmov (ARM core register to scalar)
- int vd = instr->Bits(19, 16) | (instr->Bit(7) << 4);
- uint32_t data[2];
- get_d_register(vd, data);
- data[instr->Bit(21)] = get_register(instr->RtValue());
- set_d_register(vd, data);
- } else if ((instr->VLValue() == 0x1) &&
- (instr->VCValue() == 0x1) &&
- (instr->Bit(23) == 0x0)) {
+ } else if ((instr->VLValue() == 0x0) && (instr->VCValue() == 0x1)) {
+ if (instr->Bit(23) == 0) {
+ // vmov (ARM core register to scalar)
+ int vd = instr->VFPNRegValue(kDoublePrecision);
+ int rt = instr->RtValue();
+ int opc1_opc2 = (instr->Bits(22, 21) << 2) | instr->Bits(6, 5);
+ if ((opc1_opc2 & 0xb) == 0) {
+ // NeonS32/NeonU32
+ uint32_t data[2];
+ get_d_register(vd, data);
+ data[instr->Bit(21)] = get_register(rt);
+ set_d_register(vd, data);
+ } else {
+ uint64_t data;
+ get_d_register(vd, &data);
+ uint64_t rt_value = get_register(rt);
+ if ((opc1_opc2 & 0x8) != 0) {
+ // NeonS8 / NeonU8
+ int i = opc1_opc2 & 0x7;
+ int shift = i * kBitsPerByte;
+ const uint64_t mask = 0xFF;
+ data &= ~(mask << shift);
+ data |= (rt_value & mask) << shift;
+ set_d_register(vd, &data);
+ } else if ((opc1_opc2 & 0x1) != 0) {
+ // NeonS16 / NeonU16
+ int i = (opc1_opc2 >> 1) & 0x3;
+ int shift = i * kBitsPerByte * kShortSize;
+ const uint64_t mask = 0xFFFF;
+ data &= ~(mask << shift);
+ data |= (rt_value & mask) << shift;
+ set_d_register(vd, &data);
+ } else {
+ UNREACHABLE(); // Not used by V8.
+ }
+ }
+ } else {
+ // vdup.size Qd, Rt.
+ NeonSize size = Neon32;
+ if (instr->Bit(5) != 0)
+ size = Neon16;
+ else if (instr->Bit(22) != 0)
+ size = Neon8;
+ int vd = instr->VFPNRegValue(kSimd128Precision);
+ int rt = instr->RtValue();
+ uint32_t rt_value = get_register(rt);
+ uint32_t q_data[4];
+ switch (size) {
+ case Neon8: {
+ rt_value &= 0xFF;
+ uint8_t* dst = reinterpret_cast<uint8_t*>(q_data);
+ for (int i = 0; i < 16; i++) {
+ dst[i] = rt_value;
+ }
+ break;
+ }
+ case Neon16: {
+ // Perform pairwise ops instead of casting to uint16_t.
+ rt_value &= 0xFFFFu;
+ uint32_t rt_rt = (rt_value << 16) | (rt_value & 0xFFFFu);
+ for (int i = 0; i < 4; i++) {
+ q_data[i] = rt_rt;
+ }
+ break;
+ }
+ case Neon32: {
+ for (int i = 0; i < 4; i++) {
+ q_data[i] = rt_value;
+ }
+ break;
+ }
+ default:
+ UNREACHABLE();
+ break;
+ }
+ set_q_register(vd, q_data);
+ }
+ } else if ((instr->VLValue() == 0x1) && (instr->VCValue() == 0x1)) {
// vmov (scalar to ARM core register)
- int vn = instr->Bits(19, 16) | (instr->Bit(7) << 4);
- double dn_value = get_double_from_d_register(vn);
- int32_t data[2];
- memcpy(data, &dn_value, 8);
- set_register(instr->RtValue(), data[instr->Bit(21)]);
+ int vn = instr->VFPNRegValue(kDoublePrecision);
+ int rt = instr->RtValue();
+ int opc1_opc2 = (instr->Bits(22, 21) << 2) | instr->Bits(6, 5);
+ if ((opc1_opc2 & 0xb) == 0) {
+ // NeonS32 / NeonU32
+ double dn_value = get_double_from_d_register(vn);
+ int32_t data[2];
+ memcpy(data, &dn_value, 8);
+ set_register(rt, data[instr->Bit(21)]);
+ } else {
+ uint64_t data;
+ get_d_register(vn, &data);
+ bool u = instr->Bit(23) != 0;
+ if ((opc1_opc2 & 0x8) != 0) {
+ // NeonS8 / NeonU8
+ int i = opc1_opc2 & 0x7;
+ int shift = i * kBitsPerByte;
+ uint32_t scalar = (data >> shift) & 0xFFu;
+ if (!u && (scalar & 0x80) != 0) scalar |= 0xffffff00;
+ set_register(rt, scalar);
+ } else if ((opc1_opc2 & 0x1) != 0) {
+ // NeonS16 / NeonU16
+ int i = (opc1_opc2 >> 1) & 0x3;
+ int shift = i * kBitsPerByte * kShortSize;
+ uint32_t scalar = (data >> shift) & 0xFFFFu;
+ if (!u && (scalar & 0x8000) != 0) scalar |= 0xffff0000;
+ set_register(rt, scalar);
+ } else {
+ UNREACHABLE(); // Not used by V8.
+ }
+ }
} else if ((instr->VLValue() == 0x1) &&
(instr->VCValue() == 0x0) &&
(instr->VAValue() == 0x7) &&
@@ -3520,6 +3613,48 @@ int VFPConversionSaturate(double val, bool unsigned_res) {
}
}
+int32_t Simulator::ConvertDoubleToInt(double val, bool unsigned_integer,
+ VFPRoundingMode mode) {
+ int32_t result =
+ unsigned_integer ? static_cast<uint32_t>(val) : static_cast<int32_t>(val);
+
+ inv_op_vfp_flag_ = get_inv_op_vfp_flag(mode, val, unsigned_integer);
+
+ double abs_diff = unsigned_integer
+ ? std::fabs(val - static_cast<uint32_t>(result))
+ : std::fabs(val - result);
+
+ inexact_vfp_flag_ = (abs_diff != 0);
+
+ if (inv_op_vfp_flag_) {
+ result = VFPConversionSaturate(val, unsigned_integer);
+ } else {
+ switch (mode) {
+ case RN: {
+ int val_sign = (val > 0) ? 1 : -1;
+ if (abs_diff > 0.5) {
+ result += val_sign;
+ } else if (abs_diff == 0.5) {
+ // Round to even if exactly halfway.
+ result = ((result % 2) == 0) ? result : result + val_sign;
+ }
+ break;
+ }
+
+ case RM:
+ result = result > val ? result - 1 : result;
+ break;
+
+ case RZ:
+ // Nothing to do.
+ break;
+
+ default:
+ UNREACHABLE();
+ }
+ }
+ return result;
+}
void Simulator::DecodeVCVTBetweenFloatingPointAndInteger(Instruction* instr) {
DCHECK((instr->Bit(4) == 0) && (instr->Opc1Value() == 0x7) &&
@@ -3556,44 +3691,7 @@ void Simulator::DecodeVCVTBetweenFloatingPointAndInteger(Instruction* instr) {
double val = double_precision ? get_double_from_d_register(src)
: get_float_from_s_register(src);
- int temp = unsigned_integer ? static_cast<uint32_t>(val)
- : static_cast<int32_t>(val);
-
- inv_op_vfp_flag_ = get_inv_op_vfp_flag(mode, val, unsigned_integer);
-
- double abs_diff =
- unsigned_integer ? std::fabs(val - static_cast<uint32_t>(temp))
- : std::fabs(val - temp);
-
- inexact_vfp_flag_ = (abs_diff != 0);
-
- if (inv_op_vfp_flag_) {
- temp = VFPConversionSaturate(val, unsigned_integer);
- } else {
- switch (mode) {
- case RN: {
- int val_sign = (val > 0) ? 1 : -1;
- if (abs_diff > 0.5) {
- temp += val_sign;
- } else if (abs_diff == 0.5) {
- // Round to even if exactly halfway.
- temp = ((temp % 2) == 0) ? temp : temp + val_sign;
- }
- break;
- }
-
- case RM:
- temp = temp > val ? temp - 1 : temp;
- break;
-
- case RZ:
- // Nothing to do.
- break;
-
- default:
- UNREACHABLE();
- }
- }
+ int32_t temp = ConvertDoubleToInt(val, unsigned_integer, mode);
// Update the destination register.
set_s_register_from_sinteger(dst, temp);
@@ -3740,6 +3838,16 @@ void Simulator::DecodeType6CoprocessorIns(Instruction* instr) {
}
}
+#define HIGH_16(x) ((x) >> 16)
+#define LOW_16(x) ((x)&0xFFFFu)
+#define COMBINE_32(high, low) ((high) << 16 | (low)&0xFFFFu)
+#define PAIRWISE_OP(x, y, OP) \
+ COMBINE_32(OP(HIGH_16((x)), HIGH_16((y))), OP(LOW_16((x)), LOW_16((y))))
+
+#define ADD_16(x, y) ((x) + (y))
+#define SUB_16(x, y) ((x) - (y))
+#define CEQ_16(x, y) ((x) == (y) ? 0xFFFFu : 0)
+#define TST_16(x, y) (((x) & (y)) != 0 ? 0xFFFFu : 0)
void Simulator::DecodeSpecialCondition(Instruction* instr) {
switch (instr->SpecialValue()) {
@@ -3752,6 +3860,91 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
uint32_t data[4];
get_q_register(Vm, data);
set_q_register(Vd, data);
+ } else if (instr->Bits(11, 8) == 8) {
+ // vadd/vtst
+ int size = static_cast<NeonSize>(instr->Bits(21, 20));
+ int Vd = instr->VFPDRegValue(kSimd128Precision);
+ int Vm = instr->VFPMRegValue(kSimd128Precision);
+ int Vn = instr->VFPNRegValue(kSimd128Precision);
+ uint32_t src1[4], src2[4];
+ get_q_register(Vn, src1);
+ get_q_register(Vm, src2);
+ if (instr->Bit(4) == 0) {
+ // vadd.i<size> Qd, Qm, Qn.
+ switch (size) {
+ case Neon8: {
+ uint8_t* s1 = reinterpret_cast<uint8_t*>(src1);
+ uint8_t* s2 = reinterpret_cast<uint8_t*>(src2);
+ for (int i = 0; i < 16; i++) {
+ s1[i] += s2[i];
+ }
+ break;
+ }
+ case Neon16: {
+ for (int i = 0; i < 4; i++) {
+ src1[i] = PAIRWISE_OP(src1[i], src2[i], ADD_16);
+ }
+ break;
+ }
+ case Neon32: {
+ for (int i = 0; i < 4; i++) {
+ src1[i] += src2[i];
+ }
+ break;
+ }
+ default:
+ UNREACHABLE();
+ break;
+ }
+ } else {
+ // vtst.i<size> Qd, Qm, Qn.
+ switch (size) {
+ case Neon8: {
+ uint8_t* s1 = reinterpret_cast<uint8_t*>(src1);
+ uint8_t* s2 = reinterpret_cast<uint8_t*>(src2);
+ for (int i = 0; i < 16; i++) {
+ s1[i] = (s1[i] & s2[i]) != 0 ? 0xFFu : 0;
+ }
+ break;
+ }
+ case Neon16: {
+ for (int i = 0; i < 4; i++) {
+ src1[i] = PAIRWISE_OP(src1[i], src2[i], TST_16);
+ }
+ break;
+ }
+ case Neon32: {
+ for (int i = 0; i < 4; i++) {
+ src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFFFFFFFu : 0;
+ }
+ break;
+ }
+ default:
+ UNREACHABLE();
+ break;
+ }
+ }
+ set_q_register(Vd, src1);
+ } else if (instr->Bit(20) == 0 && instr->Bits(11, 8) == 0xd &&
+ instr->Bit(4) == 0) {
+ int Vd = instr->VFPDRegValue(kSimd128Precision);
+ int Vm = instr->VFPMRegValue(kSimd128Precision);
+ int Vn = instr->VFPNRegValue(kSimd128Precision);
+ uint32_t src1[4], src2[4];
+ get_q_register(Vn, src1);
+ get_q_register(Vm, src2);
+ for (int i = 0; i < 4; i++) {
+ if (instr->Bit(21) == 0) {
+ // vadd.f32 Qd, Qm, Qn.
+ src1[i] = bit_cast<uint32_t>(bit_cast<float>(src1[i]) +
+ bit_cast<float>(src2[i]));
+ } else {
+ // vsub.f32 Qd, Qm, Qn.
+ src1[i] = bit_cast<uint32_t>(bit_cast<float>(src1[i]) -
+ bit_cast<float>(src2[i]));
+ }
+ }
+ set_q_register(Vd, src1);
} else {
UNIMPLEMENTED();
}
@@ -3781,8 +3974,92 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
}
break;
case 6:
- if (instr->Bits(21, 20) == 0 && instr->Bits(11, 8) == 1 &&
- instr->Bit(4) == 1) {
+ if (instr->Bits(11, 8) == 8 && instr->Bit(4) == 0) {
+ // vsub.size Qd, Qm, Qn.
+ int size = static_cast<NeonSize>(instr->Bits(21, 20));
+ int Vd = instr->VFPDRegValue(kSimd128Precision);
+ int Vm = instr->VFPMRegValue(kSimd128Precision);
+ int Vn = instr->VFPNRegValue(kSimd128Precision);
+ uint32_t src1[4], src2[4];
+ get_q_register(Vn, src1);
+ get_q_register(Vm, src2);
+ switch (size) {
+ case Neon8: {
+ uint8_t* s1 = reinterpret_cast<uint8_t*>(src1);
+ uint8_t* s2 = reinterpret_cast<uint8_t*>(src2);
+ for (int i = 0; i < 16; i++) {
+ s1[i] -= s2[i];
+ }
+ break;
+ }
+ case Neon16: {
+ for (int i = 0; i < 4; i++) {
+ src1[i] = PAIRWISE_OP(src1[i], src2[i], SUB_16);
+ }
+ break;
+ }
+ case Neon32: {
+ for (int i = 0; i < 4; i++) {
+ src1[i] -= src2[i];
+ }
+ break;
+ }
+ default:
+ UNREACHABLE();
+ break;
+ }
+ set_q_register(Vd, src1);
+ } else if (instr->Bits(11, 8) == 8 && instr->Bit(4) == 1) {
+ // vceq.size Qd, Qm, Qn.
+ int size = static_cast<NeonSize>(instr->Bits(21, 20));
+ int Vd = instr->VFPDRegValue(kSimd128Precision);
+ int Vm = instr->VFPMRegValue(kSimd128Precision);
+ int Vn = instr->VFPNRegValue(kSimd128Precision);
+ uint32_t src1[4], src2[4];
+ get_q_register(Vn, src1);
+ get_q_register(Vm, src2);
+ switch (size) {
+ case Neon8: {
+ uint8_t* s1 = reinterpret_cast<uint8_t*>(src1);
+ uint8_t* s2 = reinterpret_cast<uint8_t*>(src2);
+ for (int i = 0; i < 16; i++) {
+ s1[i] = s1[i] == s2[i] ? 0xFF : 0;
+ }
+ break;
+ }
+ case Neon16: {
+ for (int i = 0; i < 4; i++) {
+ src1[i] = PAIRWISE_OP(src1[i], src2[i], CEQ_16);
+ }
+ break;
+ }
+ case Neon32: {
+ for (int i = 0; i < 4; i++) {
+ src1[i] = src1[i] == src2[i] ? 0xFFFFFFFF : 0;
+ }
+ break;
+ }
+ default:
+ UNREACHABLE();
+ break;
+ }
+ set_q_register(Vd, src1);
+ } else if (instr->Bits(21, 20) == 1 && instr->Bits(11, 8) == 1 &&
+ instr->Bit(4) == 1) {
+ // vbsl.size Qd, Qm, Qn.
+ int Vd = instr->VFPDRegValue(kSimd128Precision);
+ int Vm = instr->VFPMRegValue(kSimd128Precision);
+ int Vn = instr->VFPNRegValue(kSimd128Precision);
+ uint32_t dst[4], src1[4], src2[4];
+ get_q_register(Vd, dst);
+ get_q_register(Vn, src1);
+ get_q_register(Vm, src2);
+ for (int i = 0; i < 4; i++) {
+ dst[i] = (dst[i] & src1[i]) | (~dst[i] & src2[i]);
+ }
+ set_q_register(Vd, dst);
+ } else if (instr->Bits(21, 20) == 0 && instr->Bits(11, 8) == 1 &&
+ instr->Bit(4) == 1) {
if (instr->Bit(6) == 0) {
// veor Dd, Dn, Dm
int Vd = instr->VFPDRegValue(kDoublePrecision);
@@ -3829,6 +4106,40 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
e++;
}
set_q_register(Vd, reinterpret_cast<uint64_t*>(to));
+ } else if (instr->Opc1Value() == 7 && instr->Bits(19, 16) == 0xB &&
+ instr->Bits(11, 9) == 0x3 && instr->Bit(6) == 1 &&
+ instr->Bit(4) == 0) {
+ // vcvt.<Td>.<Tm> Qd, Qm.
+ int Vd = instr->VFPDRegValue(kSimd128Precision);
+ int Vm = instr->VFPMRegValue(kSimd128Precision);
+ uint32_t q_data[4];
+ get_q_register(Vm, q_data);
+ int op = instr->Bits(8, 7);
+ for (int i = 0; i < 4; i++) {
+ switch (op) {
+ case 0:
+ // f32 <- s32, round towards nearest.
+ q_data[i] = bit_cast<uint32_t>(
+ std::round(static_cast<float>(bit_cast<int32_t>(q_data[i]))));
+ break;
+ case 1:
+ // f32 <- u32, round towards nearest.
+ q_data[i] =
+ bit_cast<uint32_t>(std::round(static_cast<float>(q_data[i])));
+ break;
+ case 2:
+ // s32 <- f32, round to zero.
+ q_data[i] = static_cast<uint32_t>(
+ ConvertDoubleToInt(bit_cast<float>(q_data[i]), false, RZ));
+ break;
+ case 3:
+ // u32 <- f32, round to zero.
+ q_data[i] = static_cast<uint32_t>(
+ ConvertDoubleToInt(bit_cast<float>(q_data[i]), true, RZ));
+ break;
+ }
+ }
+ set_q_register(Vd, q_data);
} else if ((instr->Bits(21, 16) == 0x32) && (instr->Bits(11, 7) == 0) &&
(instr->Bit(4) == 0)) {
if (instr->Bit(6) == 0) {
@@ -3850,6 +4161,49 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
set_q_register(vm, dval);
set_q_register(vd, mval);
}
+ } else if (instr->Opc1Value() == 0x7 && instr->Bits(11, 7) == 0x18 &&
+ instr->Bit(4) == 0x0) {
+ // vdup.32 Qd, Sm.
+ int vd = instr->VFPDRegValue(kSimd128Precision);
+ int vm = instr->VFPMRegValue(kDoublePrecision);
+ int index = instr->Bit(19);
+ uint32_t s_data = get_s_register(vm * 2 + index);
+ uint32_t q_data[4];
+ for (int i = 0; i < 4; i++) q_data[i] = s_data;
+ set_q_register(vd, q_data);
+ } else if (instr->Opc1Value() == 7 && instr->Bits(19, 16) == 0 &&
+ instr->Bits(11, 6) == 0x17 && instr->Bit(4) == 0) {
+ // vmvn Qd, Qm.
+ int vd = instr->VFPDRegValue(kSimd128Precision);
+ int vm = instr->VFPMRegValue(kSimd128Precision);
+ uint32_t q_data[4];
+ get_q_register(vm, q_data);
+ for (int i = 0; i < 4; i++) q_data[i] = ~q_data[i];
+ set_q_register(vd, q_data);
+ } else if (instr->Opc1Value() == 0x7 && instr->Bits(11, 10) == 0x2 &&
+ instr->Bit(4) == 0x0) {
+ // vtb[l,x] Dd, <list>, Dm.
+ int vd = instr->VFPDRegValue(kDoublePrecision);
+ int vn = instr->VFPNRegValue(kDoublePrecision);
+ int vm = instr->VFPMRegValue(kDoublePrecision);
+ int table_len = (instr->Bits(9, 8) + 1) * kDoubleSize;
+ bool vtbx = instr->Bit(6) != 0; // vtbl / vtbx
+ uint64_t destination = 0, indices = 0, result = 0;
+ get_d_register(vd, &destination);
+ get_d_register(vm, &indices);
+ for (int i = 0; i < kDoubleSize; i++) {
+ int shift = i * kBitsPerByte;
+ int index = (indices >> shift) & 0xFF;
+ if (index < table_len) {
+ uint64_t table;
+ get_d_register(vn + index / kDoubleSize, &table);
+ result |= ((table >> ((index % kDoubleSize) * kBitsPerByte)) & 0xFF)
+ << shift;
+ } else if (vtbx) {
+ result |= destination & (0xFFull << shift);
+ }
+ }
+ set_d_register(vd, &result);
} else {
UNIMPLEMENTED();
}
« no previous file with comments | « src/arm/simulator-arm.h ('k') | test/cctest/test-assembler-arm.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698