Index: src/arm/simulator-arm.cc |
diff --git a/src/arm/simulator-arm.cc b/src/arm/simulator-arm.cc |
index 12654834f2877b53738ea63170819206375050ad..88720108f360b3979de8c786c44583afbe7a76bb 100644 |
--- a/src/arm/simulator-arm.cc |
+++ b/src/arm/simulator-arm.cc |
@@ -3335,7 +3335,7 @@ void Simulator::DecodeTypeVFP(Instruction* instr) { |
break; |
} |
case Neon16: { |
- // Perform pairwise ops instead of casting to uint16_t. |
+ // Perform pairwise op. |
rt_value &= 0xFFFFu; |
uint32_t rt_rt = (rt_value << 16) | (rt_value & 0xFFFFu); |
for (int i = 0; i < 4; i++) { |
@@ -3838,17 +3838,6 @@ void Simulator::DecodeType6CoprocessorIns(Instruction* instr) { |
} |
} |
-#define HIGH_16(x) ((x) >> 16) |
-#define LOW_16(x) ((x)&0xFFFFu) |
-#define COMBINE_32(high, low) ((high) << 16 | (low)&0xFFFFu) |
-#define PAIRWISE_OP(x, y, OP) \ |
- COMBINE_32(OP(HIGH_16((x)), HIGH_16((y))), OP(LOW_16((x)), LOW_16((y)))) |
- |
-#define ADD_16(x, y) ((x) + (y)) |
-#define SUB_16(x, y) ((x) - (y)) |
-#define CEQ_16(x, y) ((x) == (y) ? 0xFFFFu : 0) |
-#define TST_16(x, y) (((x) & (y)) != 0 ? 0xFFFFu : 0) |
- |
void Simulator::DecodeSpecialCondition(Instruction* instr) { |
switch (instr->SpecialValue()) { |
case 4: |
@@ -3881,9 +3870,13 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
break; |
} |
case Neon16: { |
- for (int i = 0; i < 4; i++) { |
- src1[i] = PAIRWISE_OP(src1[i], src2[i], ADD_16); |
+ uint16_t s1[8], s2[8]; |
+ memcpy(s1, src1, sizeof(s1)); |
+ memcpy(s2, src2, sizeof(s2)); |
+ for (int i = 0; i < 8; i++) { |
+ s1[i] += s2[i]; |
} |
+ memcpy(src1, s1, sizeof(src1)); |
break; |
} |
case Neon32: { |
@@ -3908,9 +3901,13 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
break; |
} |
case Neon16: { |
- for (int i = 0; i < 4; i++) { |
- src1[i] = PAIRWISE_OP(src1[i], src2[i], TST_16); |
+ uint16_t s1[8], s2[8]; |
+ memcpy(s1, src1, sizeof(s1)); |
+ memcpy(s2, src2, sizeof(s2)); |
+ for (int i = 0; i < 8; i++) { |
+ s1[i] = (s1[i] & s2[i]) != 0 ? 0xFFFFu : 0; |
} |
+ memcpy(src1, s1, sizeof(src1)); |
break; |
} |
case Neon32: { |
@@ -3945,6 +3942,46 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
} |
} |
set_q_register(Vd, src1); |
+ } else if (instr->Bits(11, 8) == 0x9 && instr->Bit(6) == 1 && |
+ instr->Bit(4) == 1) { |
+ // vmul.i<size> Qd, Qm, Qn. |
+ NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
+ int Vd = instr->VFPDRegValue(kSimd128Precision); |
+ int Vm = instr->VFPMRegValue(kSimd128Precision); |
+ int Vn = instr->VFPNRegValue(kSimd128Precision); |
+ uint32_t src1[4], src2[4]; |
+ get_q_register(Vn, src1); |
+ get_q_register(Vm, src2); |
+ switch (size) { |
+ case Neon8: { |
+ uint8_t* s1 = reinterpret_cast<uint8_t*>(src1); |
+ uint8_t* s2 = reinterpret_cast<uint8_t*>(src2); |
+ for (int i = 0; i < 16; i++) { |
+ s1[i] *= s2[i]; |
+ } |
+ break; |
+ } |
+ case Neon16: { |
+ uint16_t s1[8], s2[8]; |
+ memcpy(s1, src1, sizeof(s1)); |
+ memcpy(s2, src2, sizeof(s2)); |
+ for (int i = 0; i < 8; i++) { |
+ s1[i] *= s2[i]; |
+ } |
+ memcpy(src1, s1, sizeof(src1)); |
+ break; |
+ } |
+ case Neon32: { |
+ for (int i = 0; i < 4; i++) { |
+ src1[i] *= src2[i]; |
+ } |
+ break; |
+ } |
+ default: |
+ UNIMPLEMENTED(); |
+ break; |
+ } |
+ set_q_register(Vd, src1); |
} else { |
UNIMPLEMENTED(); |
} |
@@ -3969,6 +4006,27 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
e++; |
} |
set_q_register(Vd, reinterpret_cast<uint64_t*>(to)); |
+ } else if (instr->Bits(21, 20) == 3 && instr->Bit(4) == 0) { |
+ // vext. |
+ int imm4 = instr->Bits(11, 8); |
+ int Vd = instr->VFPDRegValue(kSimd128Precision); |
+ int Vm = instr->VFPMRegValue(kSimd128Precision); |
+ int Vn = instr->VFPNRegValue(kSimd128Precision); |
+ uint32_t src1[4], src2[4], dst[4]; |
+ get_q_register(Vn, src1); |
+ get_q_register(Vm, src2); |
+ uint8_t* s1 = reinterpret_cast<uint8_t*>(src1); |
+ uint8_t* s2 = reinterpret_cast<uint8_t*>(src2); |
+ uint8_t* d = reinterpret_cast<uint8_t*>(dst); |
+ int boundary = 16 - imm4; |
+ int i = 0; |
+ for (; i < boundary; i++) { |
+ d[i] = s1[i + imm4]; |
+ } |
+ for (; i < 16; i++) { |
+ d[i] = s2[i - boundary]; |
+ } |
+ set_q_register(Vd, dst); |
} else { |
UNIMPLEMENTED(); |
} |
@@ -3993,9 +4051,13 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
break; |
} |
case Neon16: { |
- for (int i = 0; i < 4; i++) { |
- src1[i] = PAIRWISE_OP(src1[i], src2[i], SUB_16); |
+ uint16_t s1[8], s2[8]; |
+ memcpy(s1, src1, sizeof(s1)); |
+ memcpy(s2, src2, sizeof(s2)); |
+ for (int i = 0; i < 8; i++) { |
+ s1[i] -= s2[i]; |
} |
+ memcpy(src1, s1, sizeof(src1)); |
break; |
} |
case Neon32: { |
@@ -4028,9 +4090,13 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
break; |
} |
case Neon16: { |
- for (int i = 0; i < 4; i++) { |
- src1[i] = PAIRWISE_OP(src1[i], src2[i], CEQ_16); |
+ uint16_t s1[8], s2[8]; |
+ memcpy(s1, src1, sizeof(s1)); |
+ memcpy(s2, src2, sizeof(s2)); |
+ for (int i = 0; i < 8; i++) { |
+ s1[i] = s1[i] == s2[i] ? 0xffffu : 0; |
} |
+ memcpy(src1, s1, sizeof(src1)); |
break; |
} |
case Neon32: { |
@@ -4065,23 +4131,37 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
int Vd = instr->VFPDRegValue(kDoublePrecision); |
int Vn = instr->VFPNRegValue(kDoublePrecision); |
int Vm = instr->VFPMRegValue(kDoublePrecision); |
- uint64_t n_data, m_data; |
- get_d_register(Vn, &n_data); |
- get_d_register(Vm, &m_data); |
- n_data ^= m_data; |
- set_d_register(Vd, &n_data); |
+ uint64_t src1, src2; |
+ get_d_register(Vn, &src1); |
+ get_d_register(Vm, &src2); |
+ src1 ^= src2; |
+ set_d_register(Vd, &src1); |
} else { |
// veor Qd, Qn, Qm |
int Vd = instr->VFPDRegValue(kSimd128Precision); |
int Vn = instr->VFPNRegValue(kSimd128Precision); |
int Vm = instr->VFPMRegValue(kSimd128Precision); |
- uint32_t n_data[4], m_data[4]; |
- get_q_register(Vn, n_data); |
- get_q_register(Vm, m_data); |
- for (int i = 0; i < 4; i++) n_data[i] ^= m_data[i]; |
- set_q_register(Vd, n_data); |
+ uint32_t src1[4], src2[4]; |
+ get_q_register(Vn, src1); |
+ get_q_register(Vm, src2); |
+ for (int i = 0; i < 4; i++) src1[i] ^= src2[i]; |
+ set_q_register(Vd, src1); |
} |
+ } else if (instr->Bit(21) == 0 && instr->Bits(11, 8) == 0xd && |
+ instr->Bit(6) == 1 && instr->Bit(4) == 1) { |
+ // vmul.f32 Qd, Qn, Qm |
+ int Vd = instr->VFPDRegValue(kSimd128Precision); |
+ int Vn = instr->VFPNRegValue(kSimd128Precision); |
+ int Vm = instr->VFPMRegValue(kSimd128Precision); |
+ uint32_t src1[4], src2[4]; |
+ get_q_register(Vn, src1); |
+ get_q_register(Vm, src2); |
+ for (int i = 0; i < 4; i++) { |
+ src1[i] = bit_cast<uint32_t>(bit_cast<float>(src1[i]) * |
+ bit_cast<float>(src2[i])); |
+ } |
+ set_q_register(Vd, src1); |
} else { |
UNIMPLEMENTED(); |
} |
@@ -4106,106 +4186,314 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
e++; |
} |
set_q_register(Vd, reinterpret_cast<uint64_t*>(to)); |
- } else if (instr->Opc1Value() == 7 && instr->Bits(19, 16) == 0xB && |
- instr->Bits(11, 9) == 0x3 && instr->Bit(6) == 1 && |
- instr->Bit(4) == 0) { |
- // vcvt.<Td>.<Tm> Qd, Qm. |
- int Vd = instr->VFPDRegValue(kSimd128Precision); |
- int Vm = instr->VFPMRegValue(kSimd128Precision); |
- uint32_t q_data[4]; |
- get_q_register(Vm, q_data); |
- int op = instr->Bits(8, 7); |
- for (int i = 0; i < 4; i++) { |
+ } else if (instr->Opc1Value() == 7 && instr->Bit(4) == 0) { |
+ if (instr->Bits(19, 16) == 0xB && instr->Bits(11, 9) == 0x3 && |
+ instr->Bit(6) == 1) { |
+ // vcvt.<Td>.<Tm> Qd, Qm. |
+ int Vd = instr->VFPDRegValue(kSimd128Precision); |
+ int Vm = instr->VFPMRegValue(kSimd128Precision); |
+ uint32_t q_data[4]; |
+ get_q_register(Vm, q_data); |
+ int op = instr->Bits(8, 7); |
+ for (int i = 0; i < 4; i++) { |
+ switch (op) { |
+ case 0: |
+ // f32 <- s32, round towards nearest. |
+ q_data[i] = bit_cast<uint32_t>(std::round( |
+ static_cast<float>(bit_cast<int32_t>(q_data[i])))); |
+ break; |
+ case 1: |
+ // f32 <- u32, round towards nearest. |
+ q_data[i] = bit_cast<uint32_t>( |
+ std::round(static_cast<float>(q_data[i]))); |
+ break; |
+ case 2: |
+ // s32 <- f32, round to zero. |
+ q_data[i] = static_cast<uint32_t>( |
+ ConvertDoubleToInt(bit_cast<float>(q_data[i]), false, RZ)); |
+ break; |
+ case 3: |
+ // u32 <- f32, round to zero. |
+ q_data[i] = static_cast<uint32_t>( |
+ ConvertDoubleToInt(bit_cast<float>(q_data[i]), true, RZ)); |
+ break; |
+ } |
+ } |
+ set_q_register(Vd, q_data); |
+ } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 7) == 0) { |
+ if (instr->Bit(6) == 0) { |
+ // vswp Dd, Dm. |
+ uint64_t dval, mval; |
+ int vd = instr->VFPDRegValue(kDoublePrecision); |
+ int vm = instr->VFPMRegValue(kDoublePrecision); |
+ get_d_register(vd, &dval); |
+ get_d_register(vm, &mval); |
+ set_d_register(vm, &dval); |
+ set_d_register(vd, &mval); |
+ } else { |
+ // vswp Qd, Qm. |
+ uint32_t dval[4], mval[4]; |
+ int vd = instr->VFPDRegValue(kSimd128Precision); |
+ int vm = instr->VFPMRegValue(kSimd128Precision); |
+ get_q_register(vd, dval); |
+ get_q_register(vm, mval); |
+ set_q_register(vm, dval); |
+ set_q_register(vd, mval); |
+ } |
+ } else if (instr->Bits(11, 7) == 0x18) { |
+ // vdup.32 Qd, Sm. |
+ int vd = instr->VFPDRegValue(kSimd128Precision); |
+ int vm = instr->VFPMRegValue(kDoublePrecision); |
+ int index = instr->Bit(19); |
+ uint32_t s_data = get_s_register(vm * 2 + index); |
+ uint32_t q_data[4]; |
+ for (int i = 0; i < 4; i++) q_data[i] = s_data; |
+ set_q_register(vd, q_data); |
+ } else if (instr->Bits(19, 16) == 0 && instr->Bits(11, 6) == 0x17) { |
+ // vmvn Qd, Qm. |
+ int vd = instr->VFPDRegValue(kSimd128Precision); |
+ int vm = instr->VFPMRegValue(kSimd128Precision); |
+ uint32_t q_data[4]; |
+ get_q_register(vm, q_data); |
+ for (int i = 0; i < 4; i++) q_data[i] = ~q_data[i]; |
+ set_q_register(vd, q_data); |
+ } else if (instr->Bits(11, 10) == 0x2) { |
+ // vtb[l,x] Dd, <list>, Dm. |
+ int vd = instr->VFPDRegValue(kDoublePrecision); |
+ int vn = instr->VFPNRegValue(kDoublePrecision); |
+ int vm = instr->VFPMRegValue(kDoublePrecision); |
+ int table_len = (instr->Bits(9, 8) + 1) * kDoubleSize; |
+ bool vtbx = instr->Bit(6) != 0; // vtbl / vtbx |
+ uint64_t destination = 0, indices = 0, result = 0; |
+ get_d_register(vd, &destination); |
+ get_d_register(vm, &indices); |
+ for (int i = 0; i < kDoubleSize; i++) { |
+ int shift = i * kBitsPerByte; |
+ int index = (indices >> shift) & 0xFF; |
+ if (index < table_len) { |
+ uint64_t table; |
+ get_d_register(vn + index / kDoubleSize, &table); |
+ result |= |
+ ((table >> ((index % kDoubleSize) * kBitsPerByte)) & 0xFF) |
+ << shift; |
+ } else if (vtbx) { |
+ result |= destination & (0xFFull << shift); |
+ } |
+ } |
+ set_d_register(vd, &result); |
+ } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 6) == 0x7) { |
+ // vzip.<size> Qd, Qm. |
+ int size = static_cast<NeonSize>(instr->Bits(19, 18)); |
+ int Vd = instr->VFPDRegValue(kSimd128Precision); |
+ int Vm = instr->VFPMRegValue(kSimd128Precision); |
+ uint32_t src1[4], src2[4], dst1[4], dst2[4]; |
+ get_q_register(Vd, src1); |
+ get_q_register(Vm, src2); |
+ switch (size) { |
+ case Neon8: { |
+ uint8_t* s1 = reinterpret_cast<uint8_t*>(src1); |
+ uint8_t* s2 = reinterpret_cast<uint8_t*>(src2); |
+ uint8_t* d1 = reinterpret_cast<uint8_t*>(dst1); |
+ uint8_t* d2 = reinterpret_cast<uint8_t*>(dst2); |
+ for (int i = 0; i < 8; i++) { |
+ d1[i * 2] = s1[i]; |
+ d1[i * 2 + 1] = s2[i]; |
+ d2[i * 2] = s1[i + 8]; |
+ d2[i * 2 + 1] = s2[i + 8]; |
+ } |
+ break; |
+ } |
+ case Neon16: { |
+ uint16_t s1[8], s2[8], d1[8], d2[8]; |
+ memcpy(s1, src1, sizeof(s1)); |
+ memcpy(s2, src2, sizeof(s2)); |
+ for (int i = 0; i < 8; i += 2) { |
+ d1[i] = s1[i / 2]; |
+ d1[i + 1] = s2[i / 2]; |
+ d2[i] = s1[i / 2 + 4]; |
+ d2[i + 1] = s2[i / 2 + 4]; |
+ } |
+ memcpy(dst1, d1, sizeof(dst1)); |
+ memcpy(dst2, d2, sizeof(dst2)); |
+ break; |
+ } |
+ case Neon32: { |
+ for (int i = 0; i < 2; i++) { |
+ dst1[i * 2] = src1[i]; |
+ dst1[i * 2 + 1] = src2[i]; |
+ dst2[i * 2] = src1[i + 2]; |
+ dst2[i * 2 + 1] = src2[i + 2]; |
+ } |
+ break; |
+ } |
+ default: |
+ UNREACHABLE(); |
+ break; |
+ } |
+ set_q_register(Vd, dst1); |
+ set_q_register(Vm, dst2); |
+ } else if (instr->Bits(17, 16) == 0 && instr->Bits(11, 9) == 0) { |
+ // vrev<op>.size Qd, Qm |
+ int Vd = instr->VFPDRegValue(kSimd128Precision); |
+ int Vm = instr->VFPMRegValue(kSimd128Precision); |
+ int size = static_cast<NeonSize>(instr->Bits(19, 18)); |
+ NeonSize op = static_cast<NeonSize>(static_cast<int>(Neon64) - |
+ instr->Bits(8, 7)); |
+ uint32_t src[4]; |
+ get_q_register(Vm, src); |
switch (op) { |
- case 0: |
- // f32 <- s32, round towards nearest. |
- q_data[i] = bit_cast<uint32_t>( |
- std::round(static_cast<float>(bit_cast<int32_t>(q_data[i])))); |
+ case Neon16: { |
+ DCHECK_EQ(Neon8, size); |
+ uint8_t* s = reinterpret_cast<uint8_t*>(src); |
+ for (int i = 0; i < 16; i += 2) { |
+ std::swap(s[i], s[i + 1]); |
+ } |
break; |
- case 1: |
- // f32 <- u32, round towards nearest. |
- q_data[i] = |
- bit_cast<uint32_t>(std::round(static_cast<float>(q_data[i]))); |
+ } |
+ case Neon32: { |
+ switch (size) { |
+ case Neon16: |
+ for (int i = 0; i < 4; i++) { |
+ src[i] = (src[i] >> 16) | (src[i] << 16); |
+ } |
+ break; |
+ case Neon8: { |
+ uint8_t* s = reinterpret_cast<uint8_t*>(src); |
+ for (int i = 0; i < 4; i++) { |
+ std::swap(s[i * 4], s[i * 4 + 3]); |
+ std::swap(s[i * 4 + 1], s[i * 4 + 2]); |
+ } |
+ break; |
+ } |
+ default: |
+ UNREACHABLE(); |
+ break; |
+ } |
break; |
- case 2: |
- // s32 <- f32, round to zero. |
- q_data[i] = static_cast<uint32_t>( |
- ConvertDoubleToInt(bit_cast<float>(q_data[i]), false, RZ)); |
+ } |
+ case Neon64: { |
+ switch (size) { |
+ case Neon32: { |
+ std::swap(src[0], src[1]); |
+ std::swap(src[2], src[3]); |
+ break; |
+ } |
+ case Neon16: { |
+ for (int i = 0; i <= 2; i += 2) { |
+ uint32_t w1 = src[i]; |
+ uint32_t w2 = src[i + 1]; |
+ src[i] = (w2 >> 16) | (w2 << 16); |
+ src[i + 1] = (w1 >> 16) | (w1 << 16); |
+ } |
+ break; |
+ } |
+ case Neon8: { |
+ uint8_t* s = reinterpret_cast<uint8_t*>(src); |
+ for (int i = 0; i < 4; i++) { |
+ std::swap(s[i], s[7 - i]); |
+ std::swap(s[i + 8], s[15 - i]); |
+ } |
+ break; |
+ } |
+ default: |
+ UNREACHABLE(); |
+ break; |
+ } |
break; |
- case 3: |
- // u32 <- f32, round to zero. |
- q_data[i] = static_cast<uint32_t>( |
- ConvertDoubleToInt(bit_cast<float>(q_data[i]), true, RZ)); |
+ } |
+ default: |
+ UNREACHABLE(); |
break; |
} |
- } |
- set_q_register(Vd, q_data); |
- } else if ((instr->Bits(21, 16) == 0x32) && (instr->Bits(11, 7) == 0) && |
- (instr->Bit(4) == 0)) { |
- if (instr->Bit(6) == 0) { |
- // vswp Dd, Dm. |
- uint64_t dval, mval; |
- int vd = instr->VFPDRegValue(kDoublePrecision); |
- int vm = instr->VFPMRegValue(kDoublePrecision); |
- get_d_register(vd, &dval); |
- get_d_register(vm, &mval); |
- set_d_register(vm, &dval); |
- set_d_register(vd, &mval); |
- } else { |
- // vswp Qd, Qm. |
- uint32_t dval[4], mval[4]; |
- int vd = instr->VFPDRegValue(kSimd128Precision); |
- int vm = instr->VFPMRegValue(kSimd128Precision); |
- get_q_register(vd, dval); |
- get_q_register(vm, mval); |
- set_q_register(vm, dval); |
- set_q_register(vd, mval); |
- } |
- } else if (instr->Opc1Value() == 0x7 && instr->Bits(11, 7) == 0x18 && |
- instr->Bit(4) == 0x0) { |
- // vdup.32 Qd, Sm. |
- int vd = instr->VFPDRegValue(kSimd128Precision); |
- int vm = instr->VFPMRegValue(kDoublePrecision); |
- int index = instr->Bit(19); |
- uint32_t s_data = get_s_register(vm * 2 + index); |
- uint32_t q_data[4]; |
- for (int i = 0; i < 4; i++) q_data[i] = s_data; |
- set_q_register(vd, q_data); |
- } else if (instr->Opc1Value() == 7 && instr->Bits(19, 16) == 0 && |
- instr->Bits(11, 6) == 0x17 && instr->Bit(4) == 0) { |
- // vmvn Qd, Qm. |
- int vd = instr->VFPDRegValue(kSimd128Precision); |
- int vm = instr->VFPMRegValue(kSimd128Precision); |
- uint32_t q_data[4]; |
- get_q_register(vm, q_data); |
- for (int i = 0; i < 4; i++) q_data[i] = ~q_data[i]; |
- set_q_register(vd, q_data); |
- } else if (instr->Opc1Value() == 0x7 && instr->Bits(11, 10) == 0x2 && |
- instr->Bit(4) == 0x0) { |
- // vtb[l,x] Dd, <list>, Dm. |
- int vd = instr->VFPDRegValue(kDoublePrecision); |
- int vn = instr->VFPNRegValue(kDoublePrecision); |
- int vm = instr->VFPMRegValue(kDoublePrecision); |
- int table_len = (instr->Bits(9, 8) + 1) * kDoubleSize; |
- bool vtbx = instr->Bit(6) != 0; // vtbl / vtbx |
- uint64_t destination = 0, indices = 0, result = 0; |
- get_d_register(vd, &destination); |
- get_d_register(vm, &indices); |
- for (int i = 0; i < kDoubleSize; i++) { |
- int shift = i * kBitsPerByte; |
- int index = (indices >> shift) & 0xFF; |
- if (index < table_len) { |
- uint64_t table; |
- get_d_register(vn + index / kDoubleSize, &table); |
- result |= ((table >> ((index % kDoubleSize) * kBitsPerByte)) & 0xFF) |
- << shift; |
- } else if (vtbx) { |
- result |= destination & (0xFFull << shift); |
+ set_q_register(Vd, src); |
+ } else if (instr->Bits(17, 16) == 0x1 && instr->Bit(11) == 0) { |
+ int Vd = instr->VFPDRegValue(kSimd128Precision); |
+ int Vm = instr->VFPMRegValue(kSimd128Precision); |
+ int size = static_cast<NeonSize>(instr->Bits(19, 18)); |
+ uint32_t src[4]; |
+ get_q_register(Vm, src); |
+ if (instr->Bits(9, 6) == 0xd) { |
+ // vabs<type>.<size> Qd, Qm |
+ if (instr->Bit(10) != 0) { |
+ // floating point (clear sign bits) |
+ for (int i = 0; i < 4; i++) { |
+ src[i] &= ~0x80000000; |
+ } |
+ } else { |
+ // signed integer |
+ switch (size) { |
+ case Neon8: { |
+ int8_t* s = reinterpret_cast<int8_t*>(src); |
+ for (int i = 0; i < 16; i++) { |
+ s[i] = std::abs(s[i]); |
+ } |
+ break; |
+ } |
+ case Neon16: { |
+ int16_t s[8]; |
+ memcpy(s, src, sizeof(s)); |
+ for (int i = 0; i < 8; i++) { |
+ s[i] = std::abs(s[i]); |
+ } |
+ memcpy(src, s, sizeof(src)); |
+ break; |
+ } |
+ case Neon32: { |
+ int32_t* as_signed = reinterpret_cast<int32_t*>(src); |
+ for (int i = 0; i < 4; i++) { |
+ as_signed[i] = std::abs(as_signed[i]); |
+ } |
+ break; |
+ } |
+ default: |
+ UNIMPLEMENTED(); |
+ break; |
+ } |
+ } |
+ } else if (instr->Bits(9, 6) == 0xf) { |
+ // vneg<type>.<size> Qd, Qm (signed integer) |
+ if (instr->Bit(10) != 0) { |
+ // floating point (toggle sign bits) |
+ for (int i = 0; i < 4; i++) { |
+ src[i] ^= 0x80000000; |
+ } |
+ } else { |
+ // signed integer |
+ switch (size) { |
+ case Neon8: { |
+ int8_t* s = reinterpret_cast<int8_t*>(src); |
+ for (int i = 0; i < 16; i++) { |
+ s[i] = -s[i]; |
+ } |
+ break; |
+ } |
+ case Neon16: |
+ int16_t s[8]; |
+ memcpy(s, src, sizeof(s)); |
+ for (int i = 0; i < 8; i++) { |
+ s[i] = -s[i]; |
+ } |
+ memcpy(src, s, sizeof(src)); |
+ break; |
+ case Neon32: { |
+ int32_t* as_signed = reinterpret_cast<int32_t*>(src); |
+ for (int i = 0; i < 4; i++) { |
+ as_signed[i] = -as_signed[i]; |
+ } |
+ break; |
+ } |
+ default: |
+ UNIMPLEMENTED(); |
+ break; |
+ } |
+ } |
+ } else { |
+ UNIMPLEMENTED(); |
} |
+ set_q_register(Vd, src); |
+ } else { |
+ UNIMPLEMENTED(); |
} |
- set_d_register(vd, &result); |
- } else { |
- UNIMPLEMENTED(); |
} |
break; |
case 8: |