Chromium Code Reviews| Index: src/arm/simulator-arm.cc |
| diff --git a/src/arm/simulator-arm.cc b/src/arm/simulator-arm.cc |
| index 2bf2a6a3a52b549a548bc384081e975344b63a69..5bac15cbace866b2f66e26e56aea25efd344d8a9 100644 |
| --- a/src/arm/simulator-arm.cc |
| +++ b/src/arm/simulator-arm.cc |
| @@ -5082,108 +5082,207 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
| } |
| } |
| set_d_register(vd, &result); |
| - } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 8) == 0x1 && |
| - instr->Bit(6) == 1) { |
| + } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 8) == 0x1) { |
| NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); |
| - int Vd = instr->VFPDRegValue(kSimd128Precision); |
| - int Vm = instr->VFPMRegValue(kSimd128Precision); |
| - if (instr->Bit(7) == 1) { |
| - // vzip.<size> Qd, Qm. |
| - switch (size) { |
| - case Neon8: { |
| - uint8_t src1[16], src2[16], dst1[16], dst2[16]; |
| - get_q_register(Vd, src1); |
| - get_q_register(Vm, src2); |
| - for (int i = 0; i < 8; i++) { |
| - dst1[i * 2] = src1[i]; |
| - dst1[i * 2 + 1] = src2[i]; |
| - dst2[i * 2] = src1[i + 8]; |
| - dst2[i * 2 + 1] = src2[i + 8]; |
| + if (instr->Bit(6) == 0) { |
| + int Vd = instr->VFPDRegValue(kDoublePrecision); |
| + int Vm = instr->VFPMRegValue(kDoublePrecision); |
| + if (instr->Bit(7) == 1) { |
| + // vzip.<size> Dd, Dm. |
| + switch (size) { |
| + case Neon8: { |
|
martyn.capewell
2017/04/06 13:09:49
It would be good to factorise the zip and uzp oper
bbudge
2017/04/06 17:49:43
Done.
I rewrote the get_ and set_q_register templ
|
| + uint8_t src1[8], src2[8], dst1[8], dst2[8]; |
| + get_d_register(Vd, src1); |
| + get_d_register(Vm, src2); |
| + for (int i = 0; i < 4; i++) { |
| + dst1[i * 2] = src1[i]; |
| + dst1[i * 2 + 1] = src2[i]; |
| + dst2[i * 2] = src1[i + 4]; |
| + dst2[i * 2 + 1] = src2[i + 4]; |
| + } |
| + set_d_register(Vd, dst1); |
| + set_d_register(Vm, dst2); |
| + break; |
| } |
| - set_q_register(Vd, dst1); |
| - set_q_register(Vm, dst2); |
| - break; |
| - } |
| - case Neon16: { |
| - uint16_t src1[8], src2[8], dst1[8], dst2[8]; |
| - get_q_register(Vd, src1); |
| - get_q_register(Vm, src2); |
| - for (int i = 0; i < 4; i++) { |
| - dst1[i * 2] = src1[i]; |
| - dst1[i * 2 + 1] = src2[i]; |
| - dst2[i * 2] = src1[i + 4]; |
| - dst2[i * 2 + 1] = src2[i + 4]; |
| + case Neon16: { |
| + uint16_t src1[4], src2[4], dst1[4], dst2[4]; |
| + get_d_register(Vd, src1); |
| + get_d_register(Vm, src2); |
| + for (int i = 0; i < 2; i++) { |
| + dst1[i * 2] = src1[i]; |
| + dst1[i * 2 + 1] = src2[i]; |
| + dst2[i * 2] = src1[i + 2]; |
| + dst2[i * 2 + 1] = src2[i + 2]; |
| + } |
| + set_d_register(Vd, dst1); |
| + set_d_register(Vm, dst2); |
| + break; |
| } |
| - set_q_register(Vd, dst1); |
| - set_q_register(Vm, dst2); |
| - break; |
| + case Neon32: { |
| + uint32_t src1[2], src2[2], dst1[2], dst2[2]; |
| + get_d_register(Vd, src1); |
| + get_d_register(Vm, src2); |
| + dst1[0] = src1[0]; |
| + dst1[1] = src2[0]; |
| + dst2[0] = src1[1]; |
| + dst2[1] = src2[1]; |
| + set_d_register(Vd, dst1); |
| + set_d_register(Vm, dst2); |
| + break; |
| + } |
| + default: |
| + UNREACHABLE(); |
| + break; |
| } |
| - case Neon32: { |
| - uint32_t src1[4], src2[4], dst1[4], dst2[4]; |
| - get_q_register(Vd, src1); |
| - get_q_register(Vm, src2); |
| - for (int i = 0; i < 2; i++) { |
| - dst1[i * 2] = src1[i]; |
| - dst1[i * 2 + 1] = src2[i]; |
| - dst2[i * 2] = src1[i + 2]; |
| - dst2[i * 2 + 1] = src2[i + 2]; |
| + } else { |
| + // vuzp.<size> Dd, Dm. |
| + switch (size) { |
| + case Neon8: { |
| + uint8_t src1[8], src2[8], dst1[8], dst2[8]; |
| + get_d_register(Vd, src1); |
| + get_d_register(Vm, src2); |
| + for (int i = 0; i < 4; i++) { |
| + dst1[i] = src1[i * 2]; |
| + dst1[i + 4] = src2[i * 2]; |
| + dst2[i] = src1[i * 2 + 1]; |
| + dst2[i + 4] = src2[i * 2 + 1]; |
| + } |
| + set_d_register(Vd, dst1); |
| + set_d_register(Vm, dst2); |
| + break; |
| } |
| - set_q_register(Vd, dst1); |
| - set_q_register(Vm, dst2); |
| - break; |
| + case Neon16: { |
| + uint16_t src1[4], src2[4], dst1[4], dst2[4]; |
| + get_d_register(Vd, src1); |
| + get_d_register(Vm, src2); |
| + for (int i = 0; i < 2; i++) { |
| + dst1[i] = src1[i * 2]; |
| + dst1[i + 2] = src2[i * 2]; |
| + dst2[i] = src1[i * 2 + 1]; |
| + dst2[i + 2] = src2[i * 2 + 1]; |
| + } |
| + set_d_register(Vd, dst1); |
| + set_d_register(Vm, dst2); |
| + break; |
| + } |
| + case Neon32: { |
| + uint32_t src1[2], src2[2], dst1[2], dst2[2]; |
| + get_d_register(Vd, src1); |
| + get_d_register(Vm, src2); |
| + dst1[0] = src1[0]; |
| + dst1[1] = src2[0]; |
| + dst2[0] = src1[1]; |
| + dst2[1] = src2[1]; |
| + set_d_register(Vd, dst1); |
| + set_d_register(Vm, dst2); |
| + break; |
| + } |
| + default: |
| + UNREACHABLE(); |
| + break; |
| } |
| - default: |
| - UNREACHABLE(); |
| - break; |
| } |
| } else { |
| - // vuzp.<size> Qd, Qm. |
| - switch (size) { |
| - case Neon8: { |
| - uint8_t src1[16], src2[16], dst1[16], dst2[16]; |
| - get_q_register(Vd, src1); |
| - get_q_register(Vm, src2); |
| - for (int i = 0; i < 8; i++) { |
| - dst1[i] = src1[i * 2]; |
| - dst1[i + 8] = src2[i * 2]; |
| - dst2[i] = src1[i * 2 + 1]; |
| - dst2[i + 8] = src2[i * 2 + 1]; |
| + int Vd = instr->VFPDRegValue(kSimd128Precision); |
| + int Vm = instr->VFPMRegValue(kSimd128Precision); |
| + if (instr->Bit(7) == 1) { |
| + // vzip.<size> Qd, Qm. |
| + switch (size) { |
| + case Neon8: { |
| + uint8_t src1[16], src2[16], dst1[16], dst2[16]; |
| + get_q_register(Vd, src1); |
| + get_q_register(Vm, src2); |
| + for (int i = 0; i < 8; i++) { |
| + dst1[i * 2] = src1[i]; |
| + dst1[i * 2 + 1] = src2[i]; |
| + dst2[i * 2] = src1[i + 8]; |
| + dst2[i * 2 + 1] = src2[i + 8]; |
| + } |
| + set_q_register(Vd, dst1); |
| + set_q_register(Vm, dst2); |
| + break; |
| } |
| - set_q_register(Vd, dst1); |
| - set_q_register(Vm, dst2); |
| - break; |
| - } |
| - case Neon16: { |
| - uint16_t src1[8], src2[8], dst1[8], dst2[8]; |
| - get_q_register(Vd, src1); |
| - get_q_register(Vm, src2); |
| - for (int i = 0; i < 4; i++) { |
| - dst1[i] = src1[i * 2]; |
| - dst1[i + 4] = src2[i * 2]; |
| - dst2[i] = src1[i * 2 + 1]; |
| - dst2[i + 4] = src2[i * 2 + 1]; |
| + case Neon16: { |
| + uint16_t src1[8], src2[8], dst1[8], dst2[8]; |
| + get_q_register(Vd, src1); |
| + get_q_register(Vm, src2); |
| + for (int i = 0; i < 4; i++) { |
| + dst1[i * 2] = src1[i]; |
| + dst1[i * 2 + 1] = src2[i]; |
| + dst2[i * 2] = src1[i + 4]; |
| + dst2[i * 2 + 1] = src2[i + 4]; |
| + } |
| + set_q_register(Vd, dst1); |
| + set_q_register(Vm, dst2); |
| + break; |
| } |
| - set_q_register(Vd, dst1); |
| - set_q_register(Vm, dst2); |
| - break; |
| + case Neon32: { |
| + uint32_t src1[4], src2[4], dst1[4], dst2[4]; |
| + get_q_register(Vd, src1); |
| + get_q_register(Vm, src2); |
| + for (int i = 0; i < 2; i++) { |
| + dst1[i * 2] = src1[i]; |
| + dst1[i * 2 + 1] = src2[i]; |
| + dst2[i * 2] = src1[i + 2]; |
| + dst2[i * 2 + 1] = src2[i + 2]; |
| + } |
| + set_q_register(Vd, dst1); |
| + set_q_register(Vm, dst2); |
| + break; |
| + } |
| + default: |
| + UNREACHABLE(); |
| + break; |
| } |
| - case Neon32: { |
| - uint32_t src1[4], src2[4], dst1[4], dst2[4]; |
| - get_q_register(Vd, src1); |
| - get_q_register(Vm, src2); |
| - for (int i = 0; i < 2; i++) { |
| - dst1[i] = src1[i * 2]; |
| - dst1[i + 2] = src2[i * 2]; |
| - dst2[i] = src1[i * 2 + 1]; |
| - dst2[i + 2] = src2[i * 2 + 1]; |
| + } else { |
| + // vuzp.<size> Qd, Qm. |
| + switch (size) { |
| + case Neon8: { |
| + uint8_t src1[16], src2[16], dst1[16], dst2[16]; |
| + get_q_register(Vd, src1); |
| + get_q_register(Vm, src2); |
| + for (int i = 0; i < 8; i++) { |
| + dst1[i] = src1[i * 2]; |
| + dst1[i + 8] = src2[i * 2]; |
| + dst2[i] = src1[i * 2 + 1]; |
| + dst2[i + 8] = src2[i * 2 + 1]; |
| + } |
| + set_q_register(Vd, dst1); |
| + set_q_register(Vm, dst2); |
| + break; |
| } |
| - set_q_register(Vd, dst1); |
| - set_q_register(Vm, dst2); |
| - break; |
| + case Neon16: { |
| + uint16_t src1[8], src2[8], dst1[8], dst2[8]; |
| + get_q_register(Vd, src1); |
| + get_q_register(Vm, src2); |
| + for (int i = 0; i < 4; i++) { |
| + dst1[i] = src1[i * 2]; |
| + dst1[i + 4] = src2[i * 2]; |
| + dst2[i] = src1[i * 2 + 1]; |
| + dst2[i + 4] = src2[i * 2 + 1]; |
| + } |
| + set_q_register(Vd, dst1); |
| + set_q_register(Vm, dst2); |
| + break; |
| + } |
| + case Neon32: { |
| + uint32_t src1[4], src2[4], dst1[4], dst2[4]; |
| + get_q_register(Vd, src1); |
| + get_q_register(Vm, src2); |
| + for (int i = 0; i < 2; i++) { |
| + dst1[i] = src1[i * 2]; |
| + dst1[i + 2] = src2[i * 2]; |
| + dst2[i] = src1[i * 2 + 1]; |
| + dst2[i + 2] = src2[i * 2 + 1]; |
| + } |
| + set_q_register(Vd, dst1); |
| + set_q_register(Vm, dst2); |
| + break; |
| + } |
| + default: |
| + UNREACHABLE(); |
| + break; |
| } |
| - default: |
| - UNREACHABLE(); |
| - break; |
| } |
| } |
| } else if (instr->Bits(17, 16) == 0 && instr->Bits(11, 9) == 0) { |
| @@ -5271,48 +5370,90 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
| UNREACHABLE(); |
| break; |
| } |
| - } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 6) == 0x3) { |
| - int Vd = instr->VFPDRegValue(kSimd128Precision); |
| - int Vm = instr->VFPMRegValue(kSimd128Precision); |
| + } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 7) == 0x1) { |
| NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); |
| - // vtrn.<size> Qd, Qm. |
| - switch (size) { |
| - case Neon8: { |
| - uint8_t src[16], dst[16]; |
| - get_q_register(Vd, dst); |
| - get_q_register(Vm, src); |
| - for (int i = 0; i < 8; i++) { |
| - std::swap(dst[2 * i + 1], src[2 * i]); |
| + if (instr->Bit(6) == 0) { |
| + int Vd = instr->VFPDRegValue(kDoublePrecision); |
| + int Vm = instr->VFPMRegValue(kDoublePrecision); |
| + // vtrn.<size> Dd, Dm. |
| + switch (size) { |
| + case Neon8: { |
| + uint8_t src[8], dst[8]; |
| + get_d_register(Vd, dst); |
| + get_d_register(Vm, src); |
| + for (int i = 0; i < 4; i++) { |
| + std::swap(dst[2 * i + 1], src[2 * i]); |
| + } |
| + set_d_register(Vd, dst); |
| + set_d_register(Vm, src); |
| + break; |
| } |
| - set_q_register(Vd, dst); |
| - set_q_register(Vm, src); |
| - break; |
| - } |
| - case Neon16: { |
| - uint16_t src[8], dst[8]; |
| - get_q_register(Vd, dst); |
| - get_q_register(Vm, src); |
| - for (int i = 0; i < 4; i++) { |
| - std::swap(dst[2 * i + 1], src[2 * i]); |
| + case Neon16: { |
| + uint16_t src[4], dst[4]; |
| + get_d_register(Vd, dst); |
| + get_d_register(Vm, src); |
| + for (int i = 0; i < 2; i++) { |
| + std::swap(dst[2 * i + 1], src[2 * i]); |
| + } |
| + set_d_register(Vd, dst); |
| + set_d_register(Vm, src); |
| + break; |
| } |
| - set_q_register(Vd, dst); |
| - set_q_register(Vm, src); |
| - break; |
| + case Neon32: { |
| + uint32_t src[2], dst[2]; |
| + get_d_register(Vd, dst); |
| + get_d_register(Vm, src); |
| + std::swap(dst[1], src[0]); |
| + set_d_register(Vd, dst); |
| + set_d_register(Vm, src); |
| + break; |
| + } |
| + default: |
| + UNREACHABLE(); |
| + break; |
| } |
| - case Neon32: { |
| - uint32_t src[4], dst[4]; |
| - get_q_register(Vd, dst); |
| - get_q_register(Vm, src); |
| - for (int i = 0; i < 2; i++) { |
| - std::swap(dst[2 * i + 1], src[2 * i]); |
| + } else { |
| + int Vd = instr->VFPDRegValue(kSimd128Precision); |
| + int Vm = instr->VFPMRegValue(kSimd128Precision); |
| + // vtrn.<size> Qd, Qm. |
| + switch (size) { |
| + case Neon8: { |
| + uint8_t src[16], dst[16]; |
| + get_q_register(Vd, dst); |
| + get_q_register(Vm, src); |
| + for (int i = 0; i < 8; i++) { |
| + std::swap(dst[2 * i + 1], src[2 * i]); |
| + } |
| + set_q_register(Vd, dst); |
| + set_q_register(Vm, src); |
| + break; |
| } |
| - set_q_register(Vd, dst); |
| - set_q_register(Vm, src); |
| - break; |
| + case Neon16: { |
| + uint16_t src[8], dst[8]; |
| + get_q_register(Vd, dst); |
| + get_q_register(Vm, src); |
| + for (int i = 0; i < 4; i++) { |
| + std::swap(dst[2 * i + 1], src[2 * i]); |
| + } |
| + set_q_register(Vd, dst); |
| + set_q_register(Vm, src); |
| + break; |
| + } |
| + case Neon32: { |
| + uint32_t src[4], dst[4]; |
| + get_q_register(Vd, dst); |
| + get_q_register(Vm, src); |
| + for (int i = 0; i < 2; i++) { |
| + std::swap(dst[2 * i + 1], src[2 * i]); |
| + } |
| + set_q_register(Vd, dst); |
| + set_q_register(Vm, src); |
| + break; |
| + } |
| + default: |
| + UNREACHABLE(); |
| + break; |
| } |
| - default: |
| - UNREACHABLE(); |
| - break; |
| } |
| } else if (instr->Bits(17, 16) == 0x1 && instr->Bit(11) == 0) { |
| int Vd = instr->VFPDRegValue(kSimd128Precision); |