| Index: src/arm/simulator-arm.cc
|
| diff --git a/src/arm/simulator-arm.cc b/src/arm/simulator-arm.cc
|
| index 12654834f2877b53738ea63170819206375050ad..88720108f360b3979de8c786c44583afbe7a76bb 100644
|
| --- a/src/arm/simulator-arm.cc
|
| +++ b/src/arm/simulator-arm.cc
|
| @@ -3335,7 +3335,7 @@ void Simulator::DecodeTypeVFP(Instruction* instr) {
|
| break;
|
| }
|
| case Neon16: {
|
| - // Perform pairwise ops instead of casting to uint16_t.
|
| + // Perform pairwise op.
|
| rt_value &= 0xFFFFu;
|
| uint32_t rt_rt = (rt_value << 16) | (rt_value & 0xFFFFu);
|
| for (int i = 0; i < 4; i++) {
|
| @@ -3838,17 +3838,6 @@ void Simulator::DecodeType6CoprocessorIns(Instruction* instr) {
|
| }
|
| }
|
|
|
| -#define HIGH_16(x) ((x) >> 16)
|
| -#define LOW_16(x) ((x)&0xFFFFu)
|
| -#define COMBINE_32(high, low) ((high) << 16 | (low)&0xFFFFu)
|
| -#define PAIRWISE_OP(x, y, OP) \
|
| - COMBINE_32(OP(HIGH_16((x)), HIGH_16((y))), OP(LOW_16((x)), LOW_16((y))))
|
| -
|
| -#define ADD_16(x, y) ((x) + (y))
|
| -#define SUB_16(x, y) ((x) - (y))
|
| -#define CEQ_16(x, y) ((x) == (y) ? 0xFFFFu : 0)
|
| -#define TST_16(x, y) (((x) & (y)) != 0 ? 0xFFFFu : 0)
|
| -
|
| void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
| switch (instr->SpecialValue()) {
|
| case 4:
|
| @@ -3881,9 +3870,13 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
| break;
|
| }
|
| case Neon16: {
|
| - for (int i = 0; i < 4; i++) {
|
| - src1[i] = PAIRWISE_OP(src1[i], src2[i], ADD_16);
|
| + uint16_t s1[8], s2[8];
|
| + memcpy(s1, src1, sizeof(s1));
|
| + memcpy(s2, src2, sizeof(s2));
|
| + for (int i = 0; i < 8; i++) {
|
| + s1[i] += s2[i];
|
| }
|
| + memcpy(src1, s1, sizeof(src1));
|
| break;
|
| }
|
| case Neon32: {
|
| @@ -3908,9 +3901,13 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
| break;
|
| }
|
| case Neon16: {
|
| - for (int i = 0; i < 4; i++) {
|
| - src1[i] = PAIRWISE_OP(src1[i], src2[i], TST_16);
|
| + uint16_t s1[8], s2[8];
|
| + memcpy(s1, src1, sizeof(s1));
|
| + memcpy(s2, src2, sizeof(s2));
|
| + for (int i = 0; i < 8; i++) {
|
| + s1[i] = (s1[i] & s2[i]) != 0 ? 0xFFFFu : 0;
|
| }
|
| + memcpy(src1, s1, sizeof(src1));
|
| break;
|
| }
|
| case Neon32: {
|
| @@ -3945,6 +3942,46 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
| }
|
| }
|
| set_q_register(Vd, src1);
|
| + } else if (instr->Bits(11, 8) == 0x9 && instr->Bit(6) == 1 &&
|
| + instr->Bit(4) == 1) {
|
| + // vmul.i<size> Qd, Qm, Qn.
|
| + NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
|
| + int Vd = instr->VFPDRegValue(kSimd128Precision);
|
| + int Vm = instr->VFPMRegValue(kSimd128Precision);
|
| + int Vn = instr->VFPNRegValue(kSimd128Precision);
|
| + uint32_t src1[4], src2[4];
|
| + get_q_register(Vn, src1);
|
| + get_q_register(Vm, src2);
|
| + switch (size) {
|
| + case Neon8: {
|
| + uint8_t* s1 = reinterpret_cast<uint8_t*>(src1);
|
| + uint8_t* s2 = reinterpret_cast<uint8_t*>(src2);
|
| + for (int i = 0; i < 16; i++) {
|
| + s1[i] *= s2[i];
|
| + }
|
| + break;
|
| + }
|
| + case Neon16: {
|
| + uint16_t s1[8], s2[8];
|
| + memcpy(s1, src1, sizeof(s1));
|
| + memcpy(s2, src2, sizeof(s2));
|
| + for (int i = 0; i < 8; i++) {
|
| + s1[i] *= s2[i];
|
| + }
|
| + memcpy(src1, s1, sizeof(src1));
|
| + break;
|
| + }
|
| + case Neon32: {
|
| + for (int i = 0; i < 4; i++) {
|
| + src1[i] *= src2[i];
|
| + }
|
| + break;
|
| + }
|
| + default:
|
| + UNIMPLEMENTED();
|
| + break;
|
| + }
|
| + set_q_register(Vd, src1);
|
| } else {
|
| UNIMPLEMENTED();
|
| }
|
| @@ -3969,6 +4006,27 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
| e++;
|
| }
|
| set_q_register(Vd, reinterpret_cast<uint64_t*>(to));
|
| + } else if (instr->Bits(21, 20) == 3 && instr->Bit(4) == 0) {
|
| + // vext.
|
| + int imm4 = instr->Bits(11, 8);
|
| + int Vd = instr->VFPDRegValue(kSimd128Precision);
|
| + int Vm = instr->VFPMRegValue(kSimd128Precision);
|
| + int Vn = instr->VFPNRegValue(kSimd128Precision);
|
| + uint32_t src1[4], src2[4], dst[4];
|
| + get_q_register(Vn, src1);
|
| + get_q_register(Vm, src2);
|
| + uint8_t* s1 = reinterpret_cast<uint8_t*>(src1);
|
| + uint8_t* s2 = reinterpret_cast<uint8_t*>(src2);
|
| + uint8_t* d = reinterpret_cast<uint8_t*>(dst);
|
| + int boundary = 16 - imm4;
|
| + int i = 0;
|
| + for (; i < boundary; i++) {
|
| + d[i] = s1[i + imm4];
|
| + }
|
| + for (; i < 16; i++) {
|
| + d[i] = s2[i - boundary];
|
| + }
|
| + set_q_register(Vd, dst);
|
| } else {
|
| UNIMPLEMENTED();
|
| }
|
| @@ -3993,9 +4051,13 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
| break;
|
| }
|
| case Neon16: {
|
| - for (int i = 0; i < 4; i++) {
|
| - src1[i] = PAIRWISE_OP(src1[i], src2[i], SUB_16);
|
| + uint16_t s1[8], s2[8];
|
| + memcpy(s1, src1, sizeof(s1));
|
| + memcpy(s2, src2, sizeof(s2));
|
| + for (int i = 0; i < 8; i++) {
|
| + s1[i] -= s2[i];
|
| }
|
| + memcpy(src1, s1, sizeof(src1));
|
| break;
|
| }
|
| case Neon32: {
|
| @@ -4028,9 +4090,13 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
| break;
|
| }
|
| case Neon16: {
|
| - for (int i = 0; i < 4; i++) {
|
| - src1[i] = PAIRWISE_OP(src1[i], src2[i], CEQ_16);
|
| + uint16_t s1[8], s2[8];
|
| + memcpy(s1, src1, sizeof(s1));
|
| + memcpy(s2, src2, sizeof(s2));
|
| + for (int i = 0; i < 8; i++) {
|
| + s1[i] = s1[i] == s2[i] ? 0xffffu : 0;
|
| }
|
| + memcpy(src1, s1, sizeof(src1));
|
| break;
|
| }
|
| case Neon32: {
|
| @@ -4065,23 +4131,37 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
| int Vd = instr->VFPDRegValue(kDoublePrecision);
|
| int Vn = instr->VFPNRegValue(kDoublePrecision);
|
| int Vm = instr->VFPMRegValue(kDoublePrecision);
|
| - uint64_t n_data, m_data;
|
| - get_d_register(Vn, &n_data);
|
| - get_d_register(Vm, &m_data);
|
| - n_data ^= m_data;
|
| - set_d_register(Vd, &n_data);
|
| + uint64_t src1, src2;
|
| + get_d_register(Vn, &src1);
|
| + get_d_register(Vm, &src2);
|
| + src1 ^= src2;
|
| + set_d_register(Vd, &src1);
|
|
|
| } else {
|
| // veor Qd, Qn, Qm
|
| int Vd = instr->VFPDRegValue(kSimd128Precision);
|
| int Vn = instr->VFPNRegValue(kSimd128Precision);
|
| int Vm = instr->VFPMRegValue(kSimd128Precision);
|
| - uint32_t n_data[4], m_data[4];
|
| - get_q_register(Vn, n_data);
|
| - get_q_register(Vm, m_data);
|
| - for (int i = 0; i < 4; i++) n_data[i] ^= m_data[i];
|
| - set_q_register(Vd, n_data);
|
| + uint32_t src1[4], src2[4];
|
| + get_q_register(Vn, src1);
|
| + get_q_register(Vm, src2);
|
| + for (int i = 0; i < 4; i++) src1[i] ^= src2[i];
|
| + set_q_register(Vd, src1);
|
| }
|
| + } else if (instr->Bit(21) == 0 && instr->Bits(11, 8) == 0xd &&
|
| + instr->Bit(6) == 1 && instr->Bit(4) == 1) {
|
| + // vmul.f32 Qd, Qn, Qm
|
| + int Vd = instr->VFPDRegValue(kSimd128Precision);
|
| + int Vn = instr->VFPNRegValue(kSimd128Precision);
|
| + int Vm = instr->VFPMRegValue(kSimd128Precision);
|
| + uint32_t src1[4], src2[4];
|
| + get_q_register(Vn, src1);
|
| + get_q_register(Vm, src2);
|
| + for (int i = 0; i < 4; i++) {
|
| + src1[i] = bit_cast<uint32_t>(bit_cast<float>(src1[i]) *
|
| + bit_cast<float>(src2[i]));
|
| + }
|
| + set_q_register(Vd, src1);
|
| } else {
|
| UNIMPLEMENTED();
|
| }
|
| @@ -4106,106 +4186,314 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
| e++;
|
| }
|
| set_q_register(Vd, reinterpret_cast<uint64_t*>(to));
|
| - } else if (instr->Opc1Value() == 7 && instr->Bits(19, 16) == 0xB &&
|
| - instr->Bits(11, 9) == 0x3 && instr->Bit(6) == 1 &&
|
| - instr->Bit(4) == 0) {
|
| - // vcvt.<Td>.<Tm> Qd, Qm.
|
| - int Vd = instr->VFPDRegValue(kSimd128Precision);
|
| - int Vm = instr->VFPMRegValue(kSimd128Precision);
|
| - uint32_t q_data[4];
|
| - get_q_register(Vm, q_data);
|
| - int op = instr->Bits(8, 7);
|
| - for (int i = 0; i < 4; i++) {
|
| + } else if (instr->Opc1Value() == 7 && instr->Bit(4) == 0) {
|
| + if (instr->Bits(19, 16) == 0xB && instr->Bits(11, 9) == 0x3 &&
|
| + instr->Bit(6) == 1) {
|
| + // vcvt.<Td>.<Tm> Qd, Qm.
|
| + int Vd = instr->VFPDRegValue(kSimd128Precision);
|
| + int Vm = instr->VFPMRegValue(kSimd128Precision);
|
| + uint32_t q_data[4];
|
| + get_q_register(Vm, q_data);
|
| + int op = instr->Bits(8, 7);
|
| + for (int i = 0; i < 4; i++) {
|
| + switch (op) {
|
| + case 0:
|
| + // f32 <- s32, round towards nearest.
|
| + q_data[i] = bit_cast<uint32_t>(std::round(
|
| + static_cast<float>(bit_cast<int32_t>(q_data[i]))));
|
| + break;
|
| + case 1:
|
| + // f32 <- u32, round towards nearest.
|
| + q_data[i] = bit_cast<uint32_t>(
|
| + std::round(static_cast<float>(q_data[i])));
|
| + break;
|
| + case 2:
|
| + // s32 <- f32, round to zero.
|
| + q_data[i] = static_cast<uint32_t>(
|
| + ConvertDoubleToInt(bit_cast<float>(q_data[i]), false, RZ));
|
| + break;
|
| + case 3:
|
| + // u32 <- f32, round to zero.
|
| + q_data[i] = static_cast<uint32_t>(
|
| + ConvertDoubleToInt(bit_cast<float>(q_data[i]), true, RZ));
|
| + break;
|
| + }
|
| + }
|
| + set_q_register(Vd, q_data);
|
| + } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 7) == 0) {
|
| + if (instr->Bit(6) == 0) {
|
| + // vswp Dd, Dm.
|
| + uint64_t dval, mval;
|
| + int vd = instr->VFPDRegValue(kDoublePrecision);
|
| + int vm = instr->VFPMRegValue(kDoublePrecision);
|
| + get_d_register(vd, &dval);
|
| + get_d_register(vm, &mval);
|
| + set_d_register(vm, &dval);
|
| + set_d_register(vd, &mval);
|
| + } else {
|
| + // vswp Qd, Qm.
|
| + uint32_t dval[4], mval[4];
|
| + int vd = instr->VFPDRegValue(kSimd128Precision);
|
| + int vm = instr->VFPMRegValue(kSimd128Precision);
|
| + get_q_register(vd, dval);
|
| + get_q_register(vm, mval);
|
| + set_q_register(vm, dval);
|
| + set_q_register(vd, mval);
|
| + }
|
| + } else if (instr->Bits(11, 7) == 0x18) {
|
| + // vdup.32 Qd, Sm.
|
| + int vd = instr->VFPDRegValue(kSimd128Precision);
|
| + int vm = instr->VFPMRegValue(kDoublePrecision);
|
| + int index = instr->Bit(19);
|
| + uint32_t s_data = get_s_register(vm * 2 + index);
|
| + uint32_t q_data[4];
|
| + for (int i = 0; i < 4; i++) q_data[i] = s_data;
|
| + set_q_register(vd, q_data);
|
| + } else if (instr->Bits(19, 16) == 0 && instr->Bits(11, 6) == 0x17) {
|
| + // vmvn Qd, Qm.
|
| + int vd = instr->VFPDRegValue(kSimd128Precision);
|
| + int vm = instr->VFPMRegValue(kSimd128Precision);
|
| + uint32_t q_data[4];
|
| + get_q_register(vm, q_data);
|
| + for (int i = 0; i < 4; i++) q_data[i] = ~q_data[i];
|
| + set_q_register(vd, q_data);
|
| + } else if (instr->Bits(11, 10) == 0x2) {
|
| + // vtb[l,x] Dd, <list>, Dm.
|
| + int vd = instr->VFPDRegValue(kDoublePrecision);
|
| + int vn = instr->VFPNRegValue(kDoublePrecision);
|
| + int vm = instr->VFPMRegValue(kDoublePrecision);
|
| + int table_len = (instr->Bits(9, 8) + 1) * kDoubleSize;
|
| + bool vtbx = instr->Bit(6) != 0; // vtbl / vtbx
|
| + uint64_t destination = 0, indices = 0, result = 0;
|
| + get_d_register(vd, &destination);
|
| + get_d_register(vm, &indices);
|
| + for (int i = 0; i < kDoubleSize; i++) {
|
| + int shift = i * kBitsPerByte;
|
| + int index = (indices >> shift) & 0xFF;
|
| + if (index < table_len) {
|
| + uint64_t table;
|
| + get_d_register(vn + index / kDoubleSize, &table);
|
| + result |=
|
| + ((table >> ((index % kDoubleSize) * kBitsPerByte)) & 0xFF)
|
| + << shift;
|
| + } else if (vtbx) {
|
| + result |= destination & (0xFFull << shift);
|
| + }
|
| + }
|
| + set_d_register(vd, &result);
|
| + } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 6) == 0x7) {
|
| + // vzip.<size> Qd, Qm.
|
| + int size = static_cast<NeonSize>(instr->Bits(19, 18));
|
| + int Vd = instr->VFPDRegValue(kSimd128Precision);
|
| + int Vm = instr->VFPMRegValue(kSimd128Precision);
|
| + uint32_t src1[4], src2[4], dst1[4], dst2[4];
|
| + get_q_register(Vd, src1);
|
| + get_q_register(Vm, src2);
|
| + switch (size) {
|
| + case Neon8: {
|
| + uint8_t* s1 = reinterpret_cast<uint8_t*>(src1);
|
| + uint8_t* s2 = reinterpret_cast<uint8_t*>(src2);
|
| + uint8_t* d1 = reinterpret_cast<uint8_t*>(dst1);
|
| + uint8_t* d2 = reinterpret_cast<uint8_t*>(dst2);
|
| + for (int i = 0; i < 8; i++) {
|
| + d1[i * 2] = s1[i];
|
| + d1[i * 2 + 1] = s2[i];
|
| + d2[i * 2] = s1[i + 8];
|
| + d2[i * 2 + 1] = s2[i + 8];
|
| + }
|
| + break;
|
| + }
|
| + case Neon16: {
|
| + uint16_t s1[8], s2[8], d1[8], d2[8];
|
| + memcpy(s1, src1, sizeof(s1));
|
| + memcpy(s2, src2, sizeof(s2));
|
| + for (int i = 0; i < 8; i += 2) {
|
| + d1[i] = s1[i / 2];
|
| + d1[i + 1] = s2[i / 2];
|
| + d2[i] = s1[i / 2 + 4];
|
| + d2[i + 1] = s2[i / 2 + 4];
|
| + }
|
| + memcpy(dst1, d1, sizeof(dst1));
|
| + memcpy(dst2, d2, sizeof(dst2));
|
| + break;
|
| + }
|
| + case Neon32: {
|
| + for (int i = 0; i < 2; i++) {
|
| + dst1[i * 2] = src1[i];
|
| + dst1[i * 2 + 1] = src2[i];
|
| + dst2[i * 2] = src1[i + 2];
|
| + dst2[i * 2 + 1] = src2[i + 2];
|
| + }
|
| + break;
|
| + }
|
| + default:
|
| + UNREACHABLE();
|
| + break;
|
| + }
|
| + set_q_register(Vd, dst1);
|
| + set_q_register(Vm, dst2);
|
| + } else if (instr->Bits(17, 16) == 0 && instr->Bits(11, 9) == 0) {
|
| + // vrev<op>.size Qd, Qm
|
| + int Vd = instr->VFPDRegValue(kSimd128Precision);
|
| + int Vm = instr->VFPMRegValue(kSimd128Precision);
|
| + int size = static_cast<NeonSize>(instr->Bits(19, 18));
|
| + NeonSize op = static_cast<NeonSize>(static_cast<int>(Neon64) -
|
| + instr->Bits(8, 7));
|
| + uint32_t src[4];
|
| + get_q_register(Vm, src);
|
| switch (op) {
|
| - case 0:
|
| - // f32 <- s32, round towards nearest.
|
| - q_data[i] = bit_cast<uint32_t>(
|
| - std::round(static_cast<float>(bit_cast<int32_t>(q_data[i]))));
|
| + case Neon16: {
|
| + DCHECK_EQ(Neon8, size);
|
| + uint8_t* s = reinterpret_cast<uint8_t*>(src);
|
| + for (int i = 0; i < 16; i += 2) {
|
| + std::swap(s[i], s[i + 1]);
|
| + }
|
| break;
|
| - case 1:
|
| - // f32 <- u32, round towards nearest.
|
| - q_data[i] =
|
| - bit_cast<uint32_t>(std::round(static_cast<float>(q_data[i])));
|
| + }
|
| + case Neon32: {
|
| + switch (size) {
|
| + case Neon16:
|
| + for (int i = 0; i < 4; i++) {
|
| + src[i] = (src[i] >> 16) | (src[i] << 16);
|
| + }
|
| + break;
|
| + case Neon8: {
|
| + uint8_t* s = reinterpret_cast<uint8_t*>(src);
|
| + for (int i = 0; i < 4; i++) {
|
| + std::swap(s[i * 4], s[i * 4 + 3]);
|
| + std::swap(s[i * 4 + 1], s[i * 4 + 2]);
|
| + }
|
| + break;
|
| + }
|
| + default:
|
| + UNREACHABLE();
|
| + break;
|
| + }
|
| break;
|
| - case 2:
|
| - // s32 <- f32, round to zero.
|
| - q_data[i] = static_cast<uint32_t>(
|
| - ConvertDoubleToInt(bit_cast<float>(q_data[i]), false, RZ));
|
| + }
|
| + case Neon64: {
|
| + switch (size) {
|
| + case Neon32: {
|
| + std::swap(src[0], src[1]);
|
| + std::swap(src[2], src[3]);
|
| + break;
|
| + }
|
| + case Neon16: {
|
| + for (int i = 0; i <= 2; i += 2) {
|
| + uint32_t w1 = src[i];
|
| + uint32_t w2 = src[i + 1];
|
| + src[i] = (w2 >> 16) | (w2 << 16);
|
| + src[i + 1] = (w1 >> 16) | (w1 << 16);
|
| + }
|
| + break;
|
| + }
|
| + case Neon8: {
|
| + uint8_t* s = reinterpret_cast<uint8_t*>(src);
|
| + for (int i = 0; i < 4; i++) {
|
| + std::swap(s[i], s[7 - i]);
|
| + std::swap(s[i + 8], s[15 - i]);
|
| + }
|
| + break;
|
| + }
|
| + default:
|
| + UNREACHABLE();
|
| + break;
|
| + }
|
| break;
|
| - case 3:
|
| - // u32 <- f32, round to zero.
|
| - q_data[i] = static_cast<uint32_t>(
|
| - ConvertDoubleToInt(bit_cast<float>(q_data[i]), true, RZ));
|
| + }
|
| + default:
|
| + UNREACHABLE();
|
| break;
|
| }
|
| - }
|
| - set_q_register(Vd, q_data);
|
| - } else if ((instr->Bits(21, 16) == 0x32) && (instr->Bits(11, 7) == 0) &&
|
| - (instr->Bit(4) == 0)) {
|
| - if (instr->Bit(6) == 0) {
|
| - // vswp Dd, Dm.
|
| - uint64_t dval, mval;
|
| - int vd = instr->VFPDRegValue(kDoublePrecision);
|
| - int vm = instr->VFPMRegValue(kDoublePrecision);
|
| - get_d_register(vd, &dval);
|
| - get_d_register(vm, &mval);
|
| - set_d_register(vm, &dval);
|
| - set_d_register(vd, &mval);
|
| - } else {
|
| - // vswp Qd, Qm.
|
| - uint32_t dval[4], mval[4];
|
| - int vd = instr->VFPDRegValue(kSimd128Precision);
|
| - int vm = instr->VFPMRegValue(kSimd128Precision);
|
| - get_q_register(vd, dval);
|
| - get_q_register(vm, mval);
|
| - set_q_register(vm, dval);
|
| - set_q_register(vd, mval);
|
| - }
|
| - } else if (instr->Opc1Value() == 0x7 && instr->Bits(11, 7) == 0x18 &&
|
| - instr->Bit(4) == 0x0) {
|
| - // vdup.32 Qd, Sm.
|
| - int vd = instr->VFPDRegValue(kSimd128Precision);
|
| - int vm = instr->VFPMRegValue(kDoublePrecision);
|
| - int index = instr->Bit(19);
|
| - uint32_t s_data = get_s_register(vm * 2 + index);
|
| - uint32_t q_data[4];
|
| - for (int i = 0; i < 4; i++) q_data[i] = s_data;
|
| - set_q_register(vd, q_data);
|
| - } else if (instr->Opc1Value() == 7 && instr->Bits(19, 16) == 0 &&
|
| - instr->Bits(11, 6) == 0x17 && instr->Bit(4) == 0) {
|
| - // vmvn Qd, Qm.
|
| - int vd = instr->VFPDRegValue(kSimd128Precision);
|
| - int vm = instr->VFPMRegValue(kSimd128Precision);
|
| - uint32_t q_data[4];
|
| - get_q_register(vm, q_data);
|
| - for (int i = 0; i < 4; i++) q_data[i] = ~q_data[i];
|
| - set_q_register(vd, q_data);
|
| - } else if (instr->Opc1Value() == 0x7 && instr->Bits(11, 10) == 0x2 &&
|
| - instr->Bit(4) == 0x0) {
|
| - // vtb[l,x] Dd, <list>, Dm.
|
| - int vd = instr->VFPDRegValue(kDoublePrecision);
|
| - int vn = instr->VFPNRegValue(kDoublePrecision);
|
| - int vm = instr->VFPMRegValue(kDoublePrecision);
|
| - int table_len = (instr->Bits(9, 8) + 1) * kDoubleSize;
|
| - bool vtbx = instr->Bit(6) != 0; // vtbl / vtbx
|
| - uint64_t destination = 0, indices = 0, result = 0;
|
| - get_d_register(vd, &destination);
|
| - get_d_register(vm, &indices);
|
| - for (int i = 0; i < kDoubleSize; i++) {
|
| - int shift = i * kBitsPerByte;
|
| - int index = (indices >> shift) & 0xFF;
|
| - if (index < table_len) {
|
| - uint64_t table;
|
| - get_d_register(vn + index / kDoubleSize, &table);
|
| - result |= ((table >> ((index % kDoubleSize) * kBitsPerByte)) & 0xFF)
|
| - << shift;
|
| - } else if (vtbx) {
|
| - result |= destination & (0xFFull << shift);
|
| + set_q_register(Vd, src);
|
| + } else if (instr->Bits(17, 16) == 0x1 && instr->Bit(11) == 0) {
|
| + int Vd = instr->VFPDRegValue(kSimd128Precision);
|
| + int Vm = instr->VFPMRegValue(kSimd128Precision);
|
| + int size = static_cast<NeonSize>(instr->Bits(19, 18));
|
| + uint32_t src[4];
|
| + get_q_register(Vm, src);
|
| + if (instr->Bits(9, 6) == 0xd) {
|
| + // vabs<type>.<size> Qd, Qm
|
| + if (instr->Bit(10) != 0) {
|
| + // floating point (clear sign bits)
|
| + for (int i = 0; i < 4; i++) {
|
| + src[i] &= ~0x80000000;
|
| + }
|
| + } else {
|
| + // signed integer
|
| + switch (size) {
|
| + case Neon8: {
|
| + int8_t* s = reinterpret_cast<int8_t*>(src);
|
| + for (int i = 0; i < 16; i++) {
|
| + s[i] = std::abs(s[i]);
|
| + }
|
| + break;
|
| + }
|
| + case Neon16: {
|
| + int16_t s[8];
|
| + memcpy(s, src, sizeof(s));
|
| + for (int i = 0; i < 8; i++) {
|
| + s[i] = std::abs(s[i]);
|
| + }
|
| + memcpy(src, s, sizeof(src));
|
| + break;
|
| + }
|
| + case Neon32: {
|
| + int32_t* as_signed = reinterpret_cast<int32_t*>(src);
|
| + for (int i = 0; i < 4; i++) {
|
| + as_signed[i] = std::abs(as_signed[i]);
|
| + }
|
| + break;
|
| + }
|
| + default:
|
| + UNIMPLEMENTED();
|
| + break;
|
| + }
|
| + }
|
| + } else if (instr->Bits(9, 6) == 0xf) {
|
| + // vneg<type>.<size> Qd, Qm (signed integer)
|
| + if (instr->Bit(10) != 0) {
|
| + // floating point (toggle sign bits)
|
| + for (int i = 0; i < 4; i++) {
|
| + src[i] ^= 0x80000000;
|
| + }
|
| + } else {
|
| + // signed integer
|
| + switch (size) {
|
| + case Neon8: {
|
| + int8_t* s = reinterpret_cast<int8_t*>(src);
|
| + for (int i = 0; i < 16; i++) {
|
| + s[i] = -s[i];
|
| + }
|
| + break;
|
| + }
|
| + case Neon16:
|
| + int16_t s[8];
|
| + memcpy(s, src, sizeof(s));
|
| + for (int i = 0; i < 8; i++) {
|
| + s[i] = -s[i];
|
| + }
|
| + memcpy(src, s, sizeof(src));
|
| + break;
|
| + case Neon32: {
|
| + int32_t* as_signed = reinterpret_cast<int32_t*>(src);
|
| + for (int i = 0; i < 4; i++) {
|
| + as_signed[i] = -as_signed[i];
|
| + }
|
| + break;
|
| + }
|
| + default:
|
| + UNIMPLEMENTED();
|
| + break;
|
| + }
|
| + }
|
| + } else {
|
| + UNIMPLEMENTED();
|
| }
|
| + set_q_register(Vd, src);
|
| + } else {
|
| + UNIMPLEMENTED();
|
| }
|
| - set_d_register(vd, &result);
|
| - } else {
|
| - UNIMPLEMENTED();
|
| }
|
| break;
|
| case 8:
|
|
|