Index: src/arm/simulator-arm.cc |
diff --git a/src/arm/simulator-arm.cc b/src/arm/simulator-arm.cc |
index 5b0c7d603eec3f47d852eda7503a38c1b7ee8f20..e199f576d0e595ca72be46c0c62b5a4348742b0a 100644 |
--- a/src/arm/simulator-arm.cc |
+++ b/src/arm/simulator-arm.cc |
@@ -3067,6 +3067,7 @@ void Simulator::DecodeType7(Instruction* instr) { |
// Dd = vsqrt(Dm) |
// Sd = vsqrt(Sm) |
// vmrs |
+// vdup.size Qd, Rt. |
void Simulator::DecodeTypeVFP(Instruction* instr) { |
DCHECK((instr->TypeValue() == 7) && (instr->Bit(24) == 0x0) ); |
DCHECK(instr->Bits(11, 9) == 0x5); |
@@ -3286,6 +3287,44 @@ void Simulator::DecodeTypeVFP(Instruction* instr) { |
get_d_register(vd, data); |
data[instr->Bit(21)] = get_register(instr->RtValue()); |
set_d_register(vd, data); |
+ } else if (instr->VLValue() == 0x0 && instr->VCValue() == 0x1 && |
+ instr->Bit(23) == 0x1) { |
+ // vdup.size Qd, Rt. |
+ int size = 32; |
+ if (instr->Bit(5) != 0) |
+ size = 16; |
+ else if (instr->Bit(22) != 0) |
+ size = 8; |
+ int vd = instr->VFPNRegValue(kSimd128Precision); |
+ int rt = instr->RtValue(); |
+ uint32_t rt_value = get_register(rt); |
+ uint32_t q_data[4]; |
+ switch (size) { |
+ case 8: { |
+ rt_value &= 0xFF; |
+ uint8_t* dst = reinterpret_cast<uint8_t*>(q_data); |
+ for (int i = 0; i < 16; i++) { |
+ dst[i] = rt_value; |
+ } |
+ break; |
+ } |
+ case 16: { |
+ // Perform pairwise ops instead of casting to uint16_t. |
+ rt_value &= 0xFFFFu; |
+ uint32_t rt_rt = (rt_value << 16) | (rt_value & 0xFFFFu); |
+ for (int i = 0; i < 4; i++) { |
+ q_data[i] = rt_rt; |
+ } |
+ break; |
+ } |
+ case 32: { |
+ for (int i = 0; i < 4; i++) { |
+ q_data[i] = rt_value; |
+ } |
+ break; |
+ } |
+ } |
+ set_q_register(vd, q_data); |
} else if ((instr->VLValue() == 0x1) && |
(instr->VCValue() == 0x1) && |
(instr->Bit(23) == 0x0)) { |
@@ -3740,6 +3779,16 @@ void Simulator::DecodeType6CoprocessorIns(Instruction* instr) { |
} |
} |
+#define HIGH_16(x) ((x) >> 16) |
+#define LOW_16(x) ((x)&0xFFFFu) |
+#define COMBINE_32(high, low) ((high) << 16 | (low)&0xFFFFu) |
+#define PAIRWISE_OP(x, y, OP) \ |
+ COMBINE_32(OP(HIGH_16((x)), HIGH_16((y))), OP(LOW_16((x)), LOW_16((y)))) |
+ |
+#define ADD(x, y) ((x) + (y)) |
+#define SUB(x, y) ((x) - (y)) |
+#define CEQ(x, y) ((x) == (y) ? 0xFFFFu : 0) |
+#define TST(x, y) (((x) & (y)) == 0 ? 0xFFFFu : 0) |
Rodolph Perfetta (ARM)
2016/12/08 18:08:28
!= 0
bbudge
2016/12/10 21:33:04
Done. I need to figure out why the test didn't cat
|
void Simulator::DecodeSpecialCondition(Instruction* instr) { |
switch (instr->SpecialValue()) { |
@@ -3752,6 +3801,85 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
uint32_t data[4]; |
get_q_register(Vm, data); |
set_q_register(Vd, data); |
+ } else if (instr->Bits(11, 8) == 8) { |
+ // vadd/vtst |
+ int size = instr->Bits(21, 20); |
Rodolph Perfetta (ARM)
2016/12/08 18:08:28
NeonSize size = static_cast<NeonSize>(instr->Bit(2
bbudge
2016/12/10 21:33:04
Done. Here and 4 other switches.
|
+ int Vd = instr->VFPDRegValue(kSimd128Precision); |
+ int Vm = instr->VFPMRegValue(kSimd128Precision); |
+ int Vn = instr->VFPNRegValue(kSimd128Precision); |
+ uint32_t src1[4], src2[4]; |
+ get_q_register(Vn, src1); |
+ get_q_register(Vm, src2); |
+ if (instr->Bit(4) == 0) { |
+ // vadd.i<size> Qd, Qm, Qn. |
+ switch (size) { |
+ case 0: { |
+ uint8_t* s1 = reinterpret_cast<uint8_t*>(src1); |
+ uint8_t* s2 = reinterpret_cast<uint8_t*>(src2); |
+ for (int i = 0; i < 16; i++) { |
+ s1[i] += s2[i]; |
+ } |
+ break; |
+ } |
+ case 1: { |
+ for (int i = 0; i < 4; i++) { |
+ src1[i] = PAIRWISE_OP(src1[i], src2[i], ADD); |
+ } |
+ break; |
+ } |
+ case 2: { |
+ for (int i = 0; i < 4; i++) { |
+ src1[i] += src2[i]; |
+ } |
+ break; |
+ } |
+ } |
+ } else { |
+ // vtst.i<size> Qd, Qm, Qn. |
+ switch (size) { |
+ case 0: { |
+ uint8_t* s1 = reinterpret_cast<uint8_t*>(src1); |
+ uint8_t* s2 = reinterpret_cast<uint8_t*>(src2); |
+ for (int i = 0; i < 16; i++) { |
+ s1[i] = (s1[i] & s2[i]) != 0 ? 0xFFu : 0; |
+ } |
+ break; |
+ } |
+ case 1: { |
+ for (int i = 0; i < 4; i++) { |
+ src1[i] = PAIRWISE_OP(src1[i], src2[i], TST); |
+ } |
+ break; |
+ } |
+ case 2: { |
+ for (int i = 0; i < 4; i++) { |
+ src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFFFFFFFu : 0; |
+ } |
+ break; |
+ } |
+ } |
+ } |
+ set_q_register(Vd, src1); |
+ } else if (instr->Bit(20) == 0 && instr->Bits(11, 8) == 0xd && |
+ instr->Bit(4) == 0) { |
+ int Vd = instr->VFPDRegValue(kSimd128Precision); |
+ int Vm = instr->VFPMRegValue(kSimd128Precision); |
+ int Vn = instr->VFPNRegValue(kSimd128Precision); |
+ uint32_t src1[4], src2[4]; |
+ get_q_register(Vn, src1); |
+ get_q_register(Vm, src2); |
+ for (int i = 0; i < 4; i++) { |
+ if (instr->Bit(21) == 0) { |
+ // vadd.f32 Qd, Qm, Qn. |
+ src1[i] = bit_cast<uint32_t>(bit_cast<float>(src1[i]) + |
+ bit_cast<float>(src2[i])); |
+ } else { |
+ // vsub.f32 Qd, Qm, Qn. |
+ src1[i] = bit_cast<uint32_t>(bit_cast<float>(src1[i]) - |
+ bit_cast<float>(src2[i])); |
+ } |
+ } |
+ set_q_register(Vd, src1); |
} else { |
UNIMPLEMENTED(); |
} |
@@ -3781,8 +3909,86 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
} |
break; |
case 6: |
- if (instr->Bits(21, 20) == 0 && instr->Bits(11, 8) == 1 && |
- instr->Bit(4) == 1) { |
+ if (instr->Bits(11, 8) == 8 && instr->Bit(4) == 0) { |
+ // vsub.size Qd, Qm, Qn. |
+ int size = instr->Bits(21, 20); |
+ int Vd = instr->VFPDRegValue(kSimd128Precision); |
+ int Vm = instr->VFPMRegValue(kSimd128Precision); |
+ int Vn = instr->VFPNRegValue(kSimd128Precision); |
+ uint32_t src1[4], src2[4]; |
+ get_q_register(Vn, src1); |
+ get_q_register(Vm, src2); |
+ switch (size) { |
+ case 0: { |
+ uint8_t* s1 = reinterpret_cast<uint8_t*>(src1); |
+ uint8_t* s2 = reinterpret_cast<uint8_t*>(src2); |
+ for (int i = 0; i < 16; i++) { |
+ s1[i] -= s2[i]; |
+ } |
+ break; |
+ } |
+ case 1: { |
+ for (int i = 0; i < 4; i++) { |
+ src1[i] = PAIRWISE_OP(src1[i], src2[i], SUB); |
+ } |
+ break; |
+ } |
+ case 2: { |
+ for (int i = 0; i < 4; i++) { |
+ src1[i] -= src2[i]; |
+ } |
+ break; |
+ } |
+ } |
+ set_q_register(Vd, src1); |
+ } else if (instr->Bits(11, 8) == 8 && instr->Bit(4) == 1) { |
+ // vceq.size Qd, Qm, Qn. |
+ int size = instr->Bits(21, 20); |
+ int Vd = instr->VFPDRegValue(kSimd128Precision); |
+ int Vm = instr->VFPMRegValue(kSimd128Precision); |
+ int Vn = instr->VFPNRegValue(kSimd128Precision); |
+ uint32_t src1[4], src2[4]; |
+ get_q_register(Vn, src1); |
+ get_q_register(Vm, src2); |
+ switch (size) { |
+ case 0: { |
+ uint8_t* s1 = reinterpret_cast<uint8_t*>(src1); |
+ uint8_t* s2 = reinterpret_cast<uint8_t*>(src2); |
+ for (int i = 0; i < 16; i++) { |
+ s1[i] = s1[i] == s2[i] ? 0xFF : 0; |
+ } |
+ break; |
+ } |
+ case 1: { |
+ for (int i = 0; i < 4; i++) { |
+ src1[i] = PAIRWISE_OP(src1[i], src2[i], CEQ); |
+ } |
+ break; |
+ } |
+ case 2: { |
+ for (int i = 0; i < 4; i++) { |
+ src1[i] = src1[i] == src2[i] ? 0xFFFFFFFF : 0; |
+ } |
+ break; |
+ } |
+ } |
+ set_q_register(Vd, src1); |
+ } else if (instr->Bits(21, 20) == 1 && instr->Bits(11, 8) == 1 && |
+ instr->Bit(4) == 1) { |
+ // vbsl.size Qd, Qm, Qn. |
+ int Vd = instr->VFPDRegValue(kSimd128Precision); |
+ int Vm = instr->VFPMRegValue(kSimd128Precision); |
+ int Vn = instr->VFPNRegValue(kSimd128Precision); |
+ uint32_t dst[4], src1[4], src2[4]; |
+ get_q_register(Vd, dst); |
+ get_q_register(Vn, src1); |
+ get_q_register(Vm, src2); |
+ for (int i = 0; i < 4; i++) { |
+ dst[i] = (dst[i] & src1[i]) | (~dst[i] & src2[i]); |
+ } |
+ set_q_register(Vd, dst); |
+ } else if (instr->Bits(21, 20) == 0 && instr->Bits(11, 8) == 1 && |
+ instr->Bit(4) == 1) { |
if (instr->Bit(6) == 0) { |
// veor Dd, Dn, Dm |
int Vd = instr->VFPDRegValue(kDoublePrecision); |
@@ -3829,6 +4035,35 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
e++; |
} |
set_q_register(Vd, reinterpret_cast<uint64_t*>(to)); |
+ } else if (instr->Opc1Value() == 7 && instr->Bits(19, 16) == 0xB && |
+ instr->Bits(11, 9) == 0x3 && instr->Bit(6) == 1 && |
+ instr->Bit(4) == 0) { |
+ // vcvt.<Td>.<Tm> Qd, Qm. |
+ int Vd = instr->VFPDRegValue(kSimd128Precision); |
+ int Vm = instr->VFPMRegValue(kSimd128Precision); |
+ uint32_t q_data[4]; |
+ get_q_register(Vm, q_data); |
+ float* as_float = reinterpret_cast<float*>(q_data); |
Rodolph Perfetta (ARM)
2016/12/08 18:08:28
undefined
bbudge
2016/12/10 21:33:04
Removed.
|
+ int32_t* as_int = reinterpret_cast<int32_t*>(q_data); |
+ uint32_t* as_uint = reinterpret_cast<uint32_t*>(q_data); |
Rodolph Perfetta (ARM)
2016/12/08 18:08:28
cast not needed.
bbudge
2016/12/10 21:33:04
Done.
|
+ int op = instr->Bits(8, 7); |
+ for (int i = 0; i < 4; i++) { |
+ switch (op) { |
+ case 0: // s32 -> f32 |
+ as_float[i] = static_cast<float>(as_int[i]); // round towards 0. |
+ break; |
+ case 1: // u32 -> Ff2 |
+ as_float[i] = static_cast<float>(as_uint[i]); // round towards 0. |
+ break; |
+ case 2: // f32 -> s32 |
+ as_int[i] = static_cast<int32_t>(as_float[i]); |
+ break; |
+ case 3: // f32 -> u32 |
+ as_uint[i] = static_cast<uint32_t>(as_float[i]); |
Rodolph Perfetta (ARM)
2016/12/08 18:08:28
this is undefined if the float value is negative.
bbudge
2016/12/10 21:33:04
I've pulled a helper method out of the existing VC
|
+ break; |
+ } |
+ } |
+ set_q_register(Vd, q_data); |
} else if ((instr->Bits(21, 16) == 0x32) && (instr->Bits(11, 7) == 0) && |
(instr->Bit(4) == 0)) { |
if (instr->Bit(6) == 0) { |
@@ -3850,6 +4085,49 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
set_q_register(vm, dval); |
set_q_register(vd, mval); |
} |
+ } else if (instr->Opc1Value() == 0x7 && instr->Bits(11, 7) == 0x18 && |
+ instr->Bit(4) == 0x0) { |
+ // vdup.32 Qd, Sm. |
+ int vd = instr->VFPDRegValue(kSimd128Precision); |
+ int vm = instr->VFPMRegValue(kDoublePrecision); |
+ int index = instr->Bit(19); |
+ uint32_t s_data = get_s_register(vm * 2 + index); |
+ uint32_t q_data[4]; |
+ for (int i = 0; i < 4; i++) q_data[i] = s_data; |
+ set_q_register(vd, q_data); |
+ } else if (instr->Opc1Value() == 7 && instr->Bits(19, 16) == 0 && |
+ instr->Bits(11, 6) == 0x17 && instr->Bit(4) == 0) { |
+ // vmvn Qd, Qm. |
+ int vd = instr->VFPDRegValue(kSimd128Precision); |
+ int vm = instr->VFPMRegValue(kSimd128Precision); |
+ uint32_t q_data[4]; |
+ get_q_register(vm, q_data); |
+ for (int i = 0; i < 4; i++) q_data[i] = ~q_data[i]; |
+ set_q_register(vd, q_data); |
+ } else if (instr->Opc1Value() == 0x7 && instr->Bits(11, 10) == 0x2 && |
+ instr->Bit(4) == 0x0) { |
+ // vtb[l,x] Dd, <list>, Dm. |
+ int vd = instr->VFPDRegValue(kDoublePrecision); |
+ int vn = instr->VFPNRegValue(kDoublePrecision); |
+ int vm = instr->VFPMRegValue(kDoublePrecision); |
+ int table_len = (instr->Bits(9, 8) + 1) * kDoubleSize; |
+ bool vtbx = instr->Bit(6) != 0; // vtbl / vtbx |
+ uint64_t destination = 0, indices = 0, result = 0; |
+ get_d_register(vd, &destination); |
+ get_d_register(vm, &indices); |
+ for (int i = 0; i < kDoubleSize; i++) { |
+ int shift = i * kBitsPerByte; |
+ int index = (indices >> shift) & 0xFF; |
+ if (index < table_len) { |
+ uint64_t table; |
+ get_d_register(vn + index / kDoubleSize, &table); |
+ result |= ((table >> ((index % kDoubleSize) * kBitsPerByte)) & 0xFF) |
+ << shift; |
+ } else if (vtbx) { |
+ result |= destination & (0xFFull << shift); |
+ } |
+ } |
+ set_d_register(vd, &result); |
} else { |
UNIMPLEMENTED(); |
} |