Chromium Code Reviews| Index: src/arm/simulator-arm.cc |
| diff --git a/src/arm/simulator-arm.cc b/src/arm/simulator-arm.cc |
| index ad7c4daae19d9323935de5f2adeca98dcea13292..96d08ab9d59da8509eb708c195158d76f27b9550 100644 |
| --- a/src/arm/simulator-arm.cc |
| +++ b/src/arm/simulator-arm.cc |
| @@ -3990,59 +3990,194 @@ void Simulator::DecodeType6CoprocessorIns(Instruction* instr) { |
| void Simulator::DecodeSpecialCondition(Instruction* instr) { |
| switch (instr->SpecialValue()) { |
| - case 4: |
| - if (instr->Bits(11, 8) == 1 && instr->Bits(21, 20) == 2 && |
| - instr->Bit(4) == 1) { |
| - // vmov Qd, Qm. |
| - // vorr, Qd, Qm, Qn. |
| - int Vd = instr->VFPDRegValue(kSimd128Precision); |
| - int Vm = instr->VFPMRegValue(kSimd128Precision); |
| - int Vn = instr->VFPNRegValue(kSimd128Precision); |
| - uint32_t src1[4]; |
| - get_q_register(Vm, src1); |
| - if (Vm != Vn) { |
| - uint32_t src2[4]; |
| - get_q_register(Vn, src2); |
| - for (int i = 0; i < 4; i++) { |
| - src1[i] = src1[i] | src2[i]; |
| + case 4: { |
| + int Vd, Vm, Vn; |
| + if (instr->Bit(6) == 0) { |
| + Vd = instr->VFPDRegValue(kDoublePrecision); |
| + Vm = instr->VFPMRegValue(kDoublePrecision); |
| + Vn = instr->VFPNRegValue(kDoublePrecision); |
| + } else { |
| + Vd = instr->VFPDRegValue(kSimd128Precision); |
| + Vm = instr->VFPMRegValue(kSimd128Precision); |
| + Vn = instr->VFPNRegValue(kSimd128Precision); |
| + } |
| + switch (instr->Bits(11, 8)) { |
| + case 0x0: { |
| + if (instr->Bit(4) == 1) { |
| + // vqadd.s<size> Qd, Qm, Qn. |
| + NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
| + switch (size) { |
| + case Neon8: { |
| + int8_t src1[16], src2[16]; |
|
martyn.capewell
2017/01/30 13:50:47
It might be possible to factor out simulator opera
bbudge
2017/01/31 19:58:24
Done for vqadd/vqsub, and added a TODO to do this
|
| + get_q_register(Vn, src1); |
| + get_q_register(Vm, src2); |
| + for (int i = 0; i < 16; i++) { |
| + int16_t s1 = static_cast<int16_t>(src1[i]); |
| + int16_t s2 = static_cast<int16_t>(src2[i]); |
| + int16_t sum = std::max(0xff80, std::min(0x7f, s1 + s2)); |
| + src1[i] = static_cast<int8_t>(sum); |
| + } |
| + set_q_register(Vd, src1); |
| + break; |
| + } |
| + case Neon16: { |
| + int16_t src1[8], src2[8]; |
| + get_q_register(Vn, src1); |
| + get_q_register(Vm, src2); |
| + for (int i = 0; i < 8; i++) { |
| + int32_t s1 = static_cast<int32_t>(src1[i]); |
| + int32_t s2 = static_cast<int32_t>(src2[i]); |
| + int32_t sum = std::max(-0x8000, std::min(0x7fff, s1 + s2)); |
| + src1[i] = static_cast<int16_t>(sum); |
| + } |
| + set_q_register(Vd, src1); |
| + break; |
| + } |
| + case Neon32: { |
| + int32_t src1[4], src2[4]; |
| + get_q_register(Vn, src1); |
| + get_q_register(Vm, src2); |
| + for (int i = 0; i < 4; i++) { |
| + int64_t s1 = static_cast<int64_t>(src1[i]); |
| + int64_t s2 = static_cast<int64_t>(src2[i]); |
| + int64_t sum = |
| + std::max(-0x80000000LL, std::min(0x7fffffffLL, s1 + s2)); |
| + src1[i] = static_cast<int32_t>(sum); |
| + } |
| + set_q_register(Vd, src1); |
| + break; |
| + } |
| + default: |
| + UNREACHABLE(); |
| + break; |
| + } |
| + } else { |
| + UNIMPLEMENTED(); |
| } |
| + break; |
| } |
| - set_q_register(Vd, src1); |
| - } else if (instr->Bits(11, 8) == 8) { |
| - // vadd/vtst |
| - NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
| - int Vd = instr->VFPDRegValue(kSimd128Precision); |
| - int Vm = instr->VFPMRegValue(kSimd128Precision); |
| - int Vn = instr->VFPNRegValue(kSimd128Precision); |
| - if (instr->Bit(4) == 0) { |
| - // vadd.i<size> Qd, Qm, Qn. |
| + case 0x1: { |
| + if (instr->Bits(21, 20) == 2 && instr->Bit(6) == 1 && |
| + instr->Bit(4) == 1) { |
| + // vmov Qd, Qm. |
| + // vorr, Qd, Qm, Qn. |
| + uint32_t src1[4]; |
| + get_q_register(Vm, src1); |
| + if (Vm != Vn) { |
| + uint32_t src2[4]; |
| + get_q_register(Vn, src2); |
| + for (int i = 0; i < 4; i++) { |
| + src1[i] = src1[i] | src2[i]; |
| + } |
| + } |
| + set_q_register(Vd, src1); |
| + } else if (instr->Bits(21, 20) == 0 && instr->Bit(6) == 1 && |
| + instr->Bit(4) == 1) { |
| + // vand Qd, Qm, Qn. |
| + uint32_t src1[4], src2[4]; |
| + get_q_register(Vn, src1); |
| + get_q_register(Vm, src2); |
| + for (int i = 0; i < 4; i++) { |
| + src1[i] = src1[i] & src2[i]; |
| + } |
| + set_q_register(Vd, src1); |
| + } else { |
| + UNIMPLEMENTED(); |
| + } |
| + break; |
| + } |
| + case 0x2: { |
| + if (instr->Bit(4) == 1) { |
| + // vqsub.s<size> Qd, Qm, Qn. |
| + NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
| + switch (size) { |
| + case Neon8: { |
| + int8_t src1[16], src2[16]; |
| + get_q_register(Vn, src1); |
| + get_q_register(Vm, src2); |
| + for (int i = 0; i < 16; i++) { |
| + int16_t diff = |
| + std::max(-0x80, std::min(0x7f, src1[i] - src2[i])); |
| + src1[i] = static_cast<int8_t>(diff); |
| + } |
| + set_q_register(Vd, src1); |
| + break; |
| + } |
| + case Neon16: { |
| + int16_t src1[8], src2[8]; |
| + get_q_register(Vn, src1); |
| + get_q_register(Vm, src2); |
| + for (int i = 0; i < 8; i++) { |
| + int32_t diff = |
| + std::max(-0x8000, std::min(0x7fff, src1[i] - src2[i])); |
| + src1[i] = static_cast<int16_t>(diff); |
| + } |
| + set_q_register(Vd, src1); |
| + break; |
| + } |
| + case Neon32: { |
| + int32_t src1[4], src2[4]; |
| + get_q_register(Vn, src1); |
| + get_q_register(Vm, src2); |
| + for (int i = 0; i < 4; i++) { |
| + int64_t s1 = src1[i]; |
| + int64_t s2 = src2[i]; |
| + int64_t sum = |
| + std::max(-0x80000000LL, std::min(0x7fffffffLL, s1 - s2)); |
| + src1[i] = static_cast<int32_t>(sum); |
| + } |
| + set_q_register(Vd, src1); |
| + break; |
| + } |
| + default: |
| + UNREACHABLE(); |
| + break; |
| + } |
| + } else { |
| + UNIMPLEMENTED(); |
| + } |
| + break; |
| + } |
| + case 0x3: { |
| + // vcge/vcgt.s<size> Qd, Qm, Qn. |
| + bool ge = instr->Bit(4) == 1; |
| + NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
| switch (size) { |
| case Neon8: { |
| - uint8_t src1[16], src2[16]; |
| + int8_t src1[16], src2[16]; |
| get_q_register(Vn, src1); |
| get_q_register(Vm, src2); |
| for (int i = 0; i < 16; i++) { |
| - src1[i] += src2[i]; |
| + if (ge) |
| + src1[i] = src1[i] >= src2[i] ? 0xFF : 0; |
| + else |
| + src1[i] = src1[i] > src2[i] ? 0xFF : 0; |
| } |
| set_q_register(Vd, src1); |
| break; |
| } |
| case Neon16: { |
| - uint16_t src1[8], src2[8]; |
| + int16_t src1[8], src2[8]; |
| get_q_register(Vn, src1); |
| get_q_register(Vm, src2); |
| for (int i = 0; i < 8; i++) { |
| - src1[i] += src2[i]; |
| + if (ge) |
| + src1[i] = src1[i] >= src2[i] ? 0xFFFF : 0; |
| + else |
| + src1[i] = src1[i] > src2[i] ? 0xFFFF : 0; |
| } |
| set_q_register(Vd, src1); |
| break; |
| } |
| case Neon32: { |
| - uint32_t src1[4], src2[4]; |
| + int32_t src1[4], src2[4]; |
| get_q_register(Vn, src1); |
| get_q_register(Vm, src2); |
| for (int i = 0; i < 4; i++) { |
| - src1[i] += src2[i]; |
| + if (ge) |
| + src1[i] = src1[i] >= src2[i] ? 0xFFFFFFFF : 0; |
| + else |
| + src1[i] = src1[i] > src2[i] ? 0xFFFFFFFF : 0; |
| } |
| set_q_register(Vd, src1); |
| break; |
| @@ -4051,35 +4186,48 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
| UNREACHABLE(); |
| break; |
| } |
| - } else { |
| - // vtst.i<size> Qd, Qm, Qn. |
| + break; |
| + } |
| + case 0x6: { |
| + // vmin/vmax.s<size> Qd, Qm, Qn. |
| + NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
| + bool min = instr->Bit(4) != 0; |
| switch (size) { |
| case Neon8: { |
| - uint8_t src1[16], src2[16]; |
| + int8_t src1[16], src2[16]; |
| get_q_register(Vn, src1); |
| get_q_register(Vm, src2); |
| for (int i = 0; i < 16; i++) { |
| - src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFu : 0; |
| + if (min) |
| + src1[i] = std::min(src1[i], src2[i]); |
| + else |
| + src1[i] = std::max(src1[i], src2[i]); |
| } |
| set_q_register(Vd, src1); |
| break; |
| } |
| case Neon16: { |
| - uint16_t src1[8], src2[8]; |
| + int16_t src1[8], src2[8]; |
| get_q_register(Vn, src1); |
| get_q_register(Vm, src2); |
| for (int i = 0; i < 8; i++) { |
| - src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFFFu : 0; |
| + if (min) |
| + src1[i] = std::min(src1[i], src2[i]); |
| + else |
| + src1[i] = std::max(src1[i], src2[i]); |
| } |
| set_q_register(Vd, src1); |
| break; |
| } |
| case Neon32: { |
| - uint32_t src1[4], src2[4]; |
| + int32_t src1[4], src2[4]; |
| get_q_register(Vn, src1); |
| get_q_register(Vm, src2); |
| for (int i = 0; i < 4; i++) { |
| - src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFFFFFFFu : 0; |
| + if (min) |
| + src1[i] = std::min(src1[i], src2[i]); |
| + else |
| + src1[i] = std::max(src1[i], src2[i]); |
| } |
| set_q_register(Vd, src1); |
| break; |
| @@ -4088,234 +4236,210 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
| UNREACHABLE(); |
| break; |
| } |
| + break; |
| } |
| - } else if (instr->Bits(11, 8) == 0xd && instr->Bit(20) == 0 && |
| - instr->Bit(4) == 0) { |
| - int Vd = instr->VFPDRegValue(kSimd128Precision); |
| - int Vm = instr->VFPMRegValue(kSimd128Precision); |
| - int Vn = instr->VFPNRegValue(kSimd128Precision); |
| - float src1[4], src2[4]; |
| - get_q_register(Vn, src1); |
| - get_q_register(Vm, src2); |
| - for (int i = 0; i < 4; i++) { |
| - if (instr->Bit(21) == 0) { |
| - // vadd.f32 Qd, Qm, Qn. |
| - src1[i] = src1[i] + src2[i]; |
| + case 0x8: { |
| + // vadd/vtst |
| + NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
| + if (instr->Bit(4) == 0) { |
| + // vadd.i<size> Qd, Qm, Qn. |
| + switch (size) { |
| + case Neon8: { |
| + uint8_t src1[16], src2[16]; |
| + get_q_register(Vn, src1); |
| + get_q_register(Vm, src2); |
| + for (int i = 0; i < 16; i++) { |
| + src1[i] += src2[i]; |
| + } |
| + set_q_register(Vd, src1); |
| + break; |
| + } |
| + case Neon16: { |
| + uint16_t src1[8], src2[8]; |
| + get_q_register(Vn, src1); |
| + get_q_register(Vm, src2); |
| + for (int i = 0; i < 8; i++) { |
| + src1[i] += src2[i]; |
| + } |
| + set_q_register(Vd, src1); |
| + break; |
| + } |
| + case Neon32: { |
| + uint32_t src1[4], src2[4]; |
| + get_q_register(Vn, src1); |
| + get_q_register(Vm, src2); |
| + for (int i = 0; i < 4; i++) { |
| + src1[i] += src2[i]; |
| + } |
| + set_q_register(Vd, src1); |
| + break; |
| + } |
| + default: |
| + UNREACHABLE(); |
| + break; |
| + } |
| } else { |
| - // vsub.f32 Qd, Qm, Qn. |
| - src1[i] = src1[i] - src2[i]; |
| - } |
| - } |
| - set_q_register(Vd, src1); |
| - } else if (instr->Bits(11, 8) == 0x9 && instr->Bit(6) == 1 && |
| - instr->Bit(4) == 1) { |
| - // vmul.i<size> Qd, Qm, Qn. |
| - NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
| - int Vd = instr->VFPDRegValue(kSimd128Precision); |
| - int Vm = instr->VFPMRegValue(kSimd128Precision); |
| - int Vn = instr->VFPNRegValue(kSimd128Precision); |
| - switch (size) { |
| - case Neon8: { |
| - uint8_t src1[16], src2[16]; |
| - get_q_register(Vn, src1); |
| - get_q_register(Vm, src2); |
| - for (int i = 0; i < 16; i++) { |
| - src1[i] *= src2[i]; |
| + // vtst.i<size> Qd, Qm, Qn. |
| + switch (size) { |
| + case Neon8: { |
| + uint8_t src1[16], src2[16]; |
| + get_q_register(Vn, src1); |
| + get_q_register(Vm, src2); |
| + for (int i = 0; i < 16; i++) { |
| + src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFu : 0; |
| + } |
| + set_q_register(Vd, src1); |
| + break; |
| + } |
| + case Neon16: { |
| + uint16_t src1[8], src2[8]; |
| + get_q_register(Vn, src1); |
| + get_q_register(Vm, src2); |
| + for (int i = 0; i < 8; i++) { |
| + src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFFFu : 0; |
| + } |
| + set_q_register(Vd, src1); |
| + break; |
| + } |
| + case Neon32: { |
| + uint32_t src1[4], src2[4]; |
| + get_q_register(Vn, src1); |
| + get_q_register(Vm, src2); |
| + for (int i = 0; i < 4; i++) { |
| + src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFFFFFFFu : 0; |
| + } |
| + set_q_register(Vd, src1); |
| + break; |
| + } |
| + default: |
| + UNREACHABLE(); |
| + break; |
| } |
| - set_q_register(Vd, src1); |
| - break; |
| } |
| - case Neon16: { |
| - uint16_t src1[8], src2[8]; |
| - get_q_register(Vn, src1); |
| - get_q_register(Vm, src2); |
| - for (int i = 0; i < 8; i++) { |
| - src1[i] *= src2[i]; |
| + break; |
| + } |
| + case 0x9: { |
| + if (instr->Bit(6) == 1 && instr->Bit(4) == 1) { |
| + // vmul.i<size> Qd, Qm, Qn. |
| + NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
| + switch (size) { |
| + case Neon8: { |
| + uint8_t src1[16], src2[16]; |
| + get_q_register(Vn, src1); |
| + get_q_register(Vm, src2); |
| + for (int i = 0; i < 16; i++) { |
| + src1[i] *= src2[i]; |
| + } |
| + set_q_register(Vd, src1); |
| + break; |
| + } |
| + case Neon16: { |
| + uint16_t src1[8], src2[8]; |
| + get_q_register(Vn, src1); |
| + get_q_register(Vm, src2); |
| + for (int i = 0; i < 8; i++) { |
| + src1[i] *= src2[i]; |
| + } |
| + set_q_register(Vd, src1); |
| + break; |
| + } |
| + case Neon32: { |
| + uint32_t src1[4], src2[4]; |
| + get_q_register(Vn, src1); |
| + get_q_register(Vm, src2); |
| + for (int i = 0; i < 4; i++) { |
| + src1[i] *= src2[i]; |
| + } |
| + set_q_register(Vd, src1); |
| + break; |
| + } |
| + default: |
| + UNREACHABLE(); |
| + break; |
| } |
| - set_q_register(Vd, src1); |
| - break; |
| + } else { |
| + UNIMPLEMENTED(); |
| } |
| - case Neon32: { |
| - uint32_t src1[4], src2[4]; |
| + break; |
| + } |
| + case 0xd: { |
| + if (instr->Bit(4) == 0) { |
| + float src1[4], src2[4]; |
| get_q_register(Vn, src1); |
| get_q_register(Vm, src2); |
| for (int i = 0; i < 4; i++) { |
| - src1[i] *= src2[i]; |
| + if (instr->Bit(21) == 0) { |
| + // vadd.f32 Qd, Qm, Qn. |
| + src1[i] = src1[i] + src2[i]; |
| + } else { |
| + // vsub.f32 Qd, Qm, Qn. |
| + src1[i] = src1[i] - src2[i]; |
| + } |
| } |
| set_q_register(Vd, src1); |
| - break; |
| - } |
| - default: |
| + } else { |
| UNIMPLEMENTED(); |
| - break; |
| - } |
| - } else if (instr->Bits(11, 8) == 0xe && instr->Bits(21, 20) == 0 && |
| - instr->Bit(4) == 0) { |
| - // vceq.f32. |
| - int Vd = instr->VFPDRegValue(kSimd128Precision); |
| - int Vm = instr->VFPMRegValue(kSimd128Precision); |
| - int Vn = instr->VFPNRegValue(kSimd128Precision); |
| - float src1[4], src2[4]; |
| - get_q_register(Vn, src1); |
| - get_q_register(Vm, src2); |
| - uint32_t dst[4]; |
| - for (int i = 0; i < 4; i++) { |
| - dst[i] = (src1[i] == src2[i]) ? 0xFFFFFFFF : 0; |
| - } |
| - set_q_register(Vd, dst); |
| - } else if (instr->Bits(11, 8) == 1 && instr->Bits(21, 20) == 0 && |
| - instr->Bit(6) == 1 && instr->Bit(4) == 1) { |
| - int Vd = instr->VFPDRegValue(kSimd128Precision); |
| - int Vm = instr->VFPMRegValue(kSimd128Precision); |
| - int Vn = instr->VFPNRegValue(kSimd128Precision); |
| - // vand Qd, Qm, Qn. |
| - uint32_t src1[4], src2[4]; |
| - get_q_register(Vn, src1); |
| - get_q_register(Vm, src2); |
| - for (int i = 0; i < 4; i++) { |
| - src1[i] = src1[i] & src2[i]; |
| - } |
| - set_q_register(Vd, src1); |
| - } else if (instr->Bits(11, 8) == 0x3) { |
| - // vcge/vcgt.s<size> Qd, Qm, Qn. |
| - bool ge = instr->Bit(4) == 1; |
| - NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
| - int Vd = instr->VFPDRegValue(kSimd128Precision); |
| - int Vm = instr->VFPMRegValue(kSimd128Precision); |
| - int Vn = instr->VFPNRegValue(kSimd128Precision); |
| - switch (size) { |
| - case Neon8: { |
| - int8_t src1[16], src2[16]; |
| - get_q_register(Vn, src1); |
| - get_q_register(Vm, src2); |
| - for (int i = 0; i < 16; i++) { |
| - if (ge) |
| - src1[i] = src1[i] >= src2[i] ? 0xFF : 0; |
| - else |
| - src1[i] = src1[i] > src2[i] ? 0xFF : 0; |
| - } |
| - set_q_register(Vd, src1); |
| - break; |
| - } |
| - case Neon16: { |
| - int16_t src1[8], src2[8]; |
| - get_q_register(Vn, src1); |
| - get_q_register(Vm, src2); |
| - for (int i = 0; i < 8; i++) { |
| - if (ge) |
| - src1[i] = src1[i] >= src2[i] ? 0xFFFF : 0; |
| - else |
| - src1[i] = src1[i] > src2[i] ? 0xFFFF : 0; |
| - } |
| - set_q_register(Vd, src1); |
| - break; |
| } |
| - case Neon32: { |
| - int32_t src1[4], src2[4]; |
| + break; |
| + } |
| + case 0xe: { |
| + if (instr->Bits(21, 20) == 0 && instr->Bit(4) == 0) { |
| + // vceq.f32. |
| + float src1[4], src2[4]; |
| get_q_register(Vn, src1); |
| get_q_register(Vm, src2); |
| + uint32_t dst[4]; |
| for (int i = 0; i < 4; i++) { |
| - if (ge) |
| - src1[i] = src1[i] >= src2[i] ? 0xFFFFFFFF : 0; |
| - else |
| - src1[i] = src1[i] > src2[i] ? 0xFFFFFFFF : 0; |
| - } |
| - set_q_register(Vd, src1); |
| - break; |
| - } |
| - default: |
| - UNREACHABLE(); |
| - break; |
| - } |
| - } else if (instr->Bits(11, 8) == 0xf && instr->Bit(20) == 0 && |
| - instr->Bit(6) == 1) { |
| - int Vd = instr->VFPDRegValue(kSimd128Precision); |
| - int Vm = instr->VFPMRegValue(kSimd128Precision); |
| - int Vn = instr->VFPNRegValue(kSimd128Precision); |
| - float src1[4], src2[4]; |
| - get_q_register(Vn, src1); |
| - get_q_register(Vm, src2); |
| - if (instr->Bit(4) == 1) { |
| - if (instr->Bit(21) == 0) { |
| - // vrecps.f32 Qd, Qm, Qn. |
| - for (int i = 0; i < 4; i++) { |
| - src1[i] = 2.0f - src1[i] * src2[i]; |
| - } |
| - } else { |
| - // vrsqrts.f32 Qd, Qm, Qn. |
| - for (int i = 0; i < 4; i++) { |
| - src1[i] = (3.0f - src1[i] * src2[i]) * 0.5f; |
| - } |
| - } |
| - } else { |
| - if (instr->Bit(21) == 1) { |
| - // vmin.f32 Qd, Qm, Qn. |
| - for (int i = 0; i < 4; i++) { |
| - src1[i] = std::min(src1[i], src2[i]); |
| + dst[i] = (src1[i] == src2[i]) ? 0xFFFFFFFF : 0; |
| } |
| + set_q_register(Vd, dst); |
| } else { |
| - // vmax.f32 Qd, Qm, Qn. |
| - for (int i = 0; i < 4; i++) { |
| - src1[i] = std::max(src1[i], src2[i]); |
| - } |
| + UNIMPLEMENTED(); |
| } |
| + break; |
| } |
| - set_q_register(Vd, src1); |
| - } else if (instr->Bits(11, 8) == 0x6) { |
| - // vmin/vmax.s<size> Qd, Qm, Qn. |
| - NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
| - int Vd = instr->VFPDRegValue(kSimd128Precision); |
| - int Vm = instr->VFPMRegValue(kSimd128Precision); |
| - int Vn = instr->VFPNRegValue(kSimd128Precision); |
| - bool min = instr->Bit(4) != 0; |
| - switch (size) { |
| - case Neon8: { |
| - int8_t src1[16], src2[16]; |
| - get_q_register(Vn, src1); |
| - get_q_register(Vm, src2); |
| - for (int i = 0; i < 16; i++) { |
| - if (min) |
| - src1[i] = std::min(src1[i], src2[i]); |
| - else |
| - src1[i] = std::max(src1[i], src2[i]); |
| - } |
| - set_q_register(Vd, src1); |
| - break; |
| - } |
| - case Neon16: { |
| - int16_t src1[8], src2[8]; |
| - get_q_register(Vn, src1); |
| - get_q_register(Vm, src2); |
| - for (int i = 0; i < 8; i++) { |
| - if (min) |
| - src1[i] = std::min(src1[i], src2[i]); |
| - else |
| - src1[i] = std::max(src1[i], src2[i]); |
| - } |
| - set_q_register(Vd, src1); |
| - break; |
| - } |
| - case Neon32: { |
| - int32_t src1[4], src2[4]; |
| + case 0xf: { |
| + if (instr->Bit(20) == 0 && instr->Bit(6) == 1) { |
| + float src1[4], src2[4]; |
| get_q_register(Vn, src1); |
| get_q_register(Vm, src2); |
| - for (int i = 0; i < 4; i++) { |
| - if (min) |
| - src1[i] = std::min(src1[i], src2[i]); |
| - else |
| - src1[i] = std::max(src1[i], src2[i]); |
| + if (instr->Bit(4) == 1) { |
| + if (instr->Bit(21) == 0) { |
| + // vrecps.f32 Qd, Qm, Qn. |
| + for (int i = 0; i < 4; i++) { |
| + src1[i] = 2.0f - src1[i] * src2[i]; |
| + } |
| + } else { |
| + // vrsqrts.f32 Qd, Qm, Qn. |
| + for (int i = 0; i < 4; i++) { |
| + src1[i] = (3.0f - src1[i] * src2[i]) * 0.5f; |
| + } |
| + } |
| + } else { |
| + if (instr->Bit(21) == 1) { |
| + // vmin.f32 Qd, Qm, Qn. |
| + for (int i = 0; i < 4; i++) { |
| + src1[i] = std::min(src1[i], src2[i]); |
| + } |
| + } else { |
| + // vmax.f32 Qd, Qm, Qn. |
| + for (int i = 0; i < 4; i++) { |
| + src1[i] = std::max(src1[i], src2[i]); |
| + } |
| + } |
| } |
| set_q_register(Vd, src1); |
| - break; |
| + } else { |
| + UNIMPLEMENTED(); |
| } |
| - default: |
| - UNREACHABLE(); |
| - break; |
| + break; |
| } |
| - } else { |
| - UNIMPLEMENTED(); |
| + default: |
| + UNIMPLEMENTED(); |
| + break; |
| } |
| break; |
| + } |
| case 5: |
| if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) && |
| (instr->Bit(4) == 1)) { |
| @@ -4436,265 +4560,372 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
| UNIMPLEMENTED(); |
| } |
| break; |
| - case 6: |
| - if (instr->Bits(11, 8) == 8 && instr->Bit(4) == 0) { |
| - // vsub.size Qd, Qm, Qn. |
| - NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
| - int Vd = instr->VFPDRegValue(kSimd128Precision); |
| - int Vm = instr->VFPMRegValue(kSimd128Precision); |
| - int Vn = instr->VFPNRegValue(kSimd128Precision); |
| - switch (size) { |
| - case Neon8: { |
| - uint8_t src1[16], src2[16]; |
| - get_q_register(Vn, src1); |
| - get_q_register(Vm, src2); |
| - for (int i = 0; i < 16; i++) { |
| - src1[i] -= src2[i]; |
| - } |
| - set_q_register(Vd, src1); |
| - break; |
| - } |
| - case Neon16: { |
| - uint16_t src1[8], src2[8]; |
| - get_q_register(Vn, src1); |
| - get_q_register(Vm, src2); |
| - for (int i = 0; i < 8; i++) { |
| - src1[i] -= src2[i]; |
| + case 6: { |
| + int Vd, Vm, Vn; |
| + if (instr->Bit(6) == 0) { |
| + Vd = instr->VFPDRegValue(kDoublePrecision); |
| + Vm = instr->VFPMRegValue(kDoublePrecision); |
| + Vn = instr->VFPNRegValue(kDoublePrecision); |
| + } else { |
| + Vd = instr->VFPDRegValue(kSimd128Precision); |
| + Vm = instr->VFPMRegValue(kSimd128Precision); |
| + Vn = instr->VFPNRegValue(kSimd128Precision); |
| + } |
| + switch (instr->Bits(11, 8)) { |
| + case 0x0: { |
| + if (instr->Bit(4) == 1) { |
| + // vqadd.u<size> Qd, Qm, Qn. |
| + NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
| + switch (size) { |
| + case Neon8: { |
| + uint8_t src1[16], src2[16]; |
| + get_q_register(Vn, src1); |
| + get_q_register(Vm, src2); |
| + for (int i = 0; i < 16; i++) { |
| + uint16_t sum = std::min(0x7f, src1[i] + src2[i]); |
| + src1[i] = static_cast<uint8_t>(sum); |
| + } |
| + set_q_register(Vd, src1); |
| + break; |
| + } |
| + case Neon16: { |
| + uint16_t src1[8], src2[8]; |
| + get_q_register(Vn, src1); |
| + get_q_register(Vm, src2); |
| + for (int i = 0; i < 8; i++) { |
| + uint32_t sum = std::min(0x7fff, src1[i] + src2[i]); |
| + src1[i] = static_cast<uint16_t>(sum); |
| + } |
| + set_q_register(Vd, src1); |
| + break; |
| + } |
| + case Neon32: { |
| + uint32_t src1[4], src2[4]; |
| + get_q_register(Vn, src1); |
| + get_q_register(Vm, src2); |
| + for (int i = 0; i < 4; i++) { |
| + uint64_t s1 = src1[i]; |
| + uint64_t s2 = src2[i]; |
| + uint64_t sum = std::min(0x7fffffffULL, s1 + s2); |
| + src1[i] = static_cast<uint32_t>(sum); |
| + } |
| + set_q_register(Vd, src1); |
| + break; |
| + } |
| + default: |
| + UNREACHABLE(); |
| + break; |
| } |
| - set_q_register(Vd, src1); |
| - break; |
| + } else { |
| + UNIMPLEMENTED(); |
| } |
| - case Neon32: { |
| - uint32_t src1[4], src2[4]; |
| + break; |
| + } |
| + case 0x1: { |
| + if (instr->Bits(21, 20) == 1 && instr->Bit(4) == 1) { |
| + // vbsl.size Qd, Qm, Qn. |
| + uint32_t dst[4], src1[4], src2[4]; |
| + get_q_register(Vd, dst); |
| get_q_register(Vn, src1); |
| get_q_register(Vm, src2); |
| for (int i = 0; i < 4; i++) { |
| - src1[i] -= src2[i]; |
| + dst[i] = (dst[i] & src1[i]) | (~dst[i] & src2[i]); |
| } |
| - set_q_register(Vd, src1); |
| - break; |
| + set_q_register(Vd, dst); |
| + } else if (instr->Bits(21, 20) == 0 && instr->Bit(4) == 1) { |
| + if (instr->Bit(6) == 0) { |
| + // veor Dd, Dn, Dm |
| + uint64_t src1, src2; |
| + get_d_register(Vn, &src1); |
| + get_d_register(Vm, &src2); |
| + src1 ^= src2; |
| + set_d_register(Vd, &src1); |
| + |
| + } else { |
| + // veor Qd, Qn, Qm |
| + uint32_t src1[4], src2[4]; |
| + get_q_register(Vn, src1); |
| + get_q_register(Vm, src2); |
| + for (int i = 0; i < 4; i++) src1[i] ^= src2[i]; |
| + set_q_register(Vd, src1); |
| + } |
| + } else { |
| + UNIMPLEMENTED(); |
| } |
| - default: |
| - UNREACHABLE(); |
| - break; |
| + break; |
| } |
| - } else if (instr->Bits(11, 8) == 8 && instr->Bit(4) == 1) { |
| - // vceq.size Qd, Qm, Qn. |
| - NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
| - int Vd = instr->VFPDRegValue(kSimd128Precision); |
| - int Vm = instr->VFPMRegValue(kSimd128Precision); |
| - int Vn = instr->VFPNRegValue(kSimd128Precision); |
| - switch (size) { |
| - case Neon8: { |
| - uint8_t src1[16], src2[16]; |
| - get_q_register(Vn, src1); |
| - get_q_register(Vm, src2); |
| - for (int i = 0; i < 16; i++) { |
| - src1[i] = (src1[i] == src2[i]) ? 0xFFu : 0; |
| + case 0x2: { |
| + if (instr->Bit(4) == 1) { |
| + // vqsub.u<size> Qd, Qm, Qn. |
| + NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
| + switch (size) { |
| + case Neon8: { |
| + uint8_t src1[16], src2[16]; |
| + get_q_register(Vn, src1); |
| + get_q_register(Vm, src2); |
| + for (int i = 0; i < 16; i++) { |
| + int16_t diff = src1[i] - src2[i]; |
| + diff = std::max(0, std::min(0x7f, s1 - s2)); |
| + src1[i] = static_cast<uint8_t>(diff); |
| + } |
| + set_q_register(Vd, src1); |
| + break; |
| + } |
| + case Neon16: { |
| + uint16_t src1[8], src2[8]; |
| + get_q_register(Vn, src1); |
| + get_q_register(Vm, src2); |
| + for (int i = 0; i < 8; i++) { |
| + int32_t diff = src1[i] - src2[i]; |
| + diff = std::max(0, std::min(0x7fff, s1 - s2)); |
| + src1[i] = static_cast<uint16_t>(diff); |
| + } |
| + set_q_register(Vd, src1); |
| + break; |
| + } |
| + case Neon32: { |
| + uint32_t src1[4], src2[4]; |
| + get_q_register(Vn, src1); |
| + get_q_register(Vm, src2); |
| + for (int i = 0; i < 4; i++) { |
| + int64_t diff = src1[i] - src2[i]; |
| + diff = std::max(0, std::min(0x7fffffff, s1 - s2)); |
| + src1[i] = static_cast<uint16_t>(diff); |
| + } |
| + set_q_register(Vd, src1); |
| + break; |
| + } |
| + default: |
| + UNREACHABLE(); |
| + break; |
| } |
| - set_q_register(Vd, src1); |
| - break; |
| + } else { |
| + UNIMPLEMENTED(); |
| } |
| - case Neon16: { |
| - uint16_t src1[8], src2[8]; |
| - get_q_register(Vn, src1); |
| - get_q_register(Vm, src2); |
| - for (int i = 0; i < 8; i++) { |
| - src1[i] = (src1[i] == src2[i]) ? 0xFFFFu : 0; |
| + break; |
| + } |
| + case 0x3: { |
| + // vcge/vcgt.u<size> Qd, Qm, Qn. |
| + bool ge = instr->Bit(4) == 1; |
| + NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
| + switch (size) { |
| + case Neon8: { |
| + uint8_t src1[16], src2[16]; |
| + get_q_register(Vn, src1); |
| + get_q_register(Vm, src2); |
| + for (int i = 0; i < 16; i++) { |
| + if (ge) |
| + src1[i] = src1[i] >= src2[i] ? 0xFFu : 0; |
| + else |
| + src1[i] = src1[i] > src2[i] ? 0xFFu : 0; |
| + } |
| + set_q_register(Vd, src1); |
| + break; |
| } |
| - set_q_register(Vd, src1); |
| - break; |
| - } |
| - case Neon32: { |
| - uint32_t src1[4], src2[4]; |
| - get_q_register(Vn, src1); |
| - get_q_register(Vm, src2); |
| - for (int i = 0; i < 4; i++) { |
| - src1[i] = (src1[i] == src2[i]) ? 0xFFFFFFFFu : 0; |
| + case Neon16: { |
| + uint16_t src1[8], src2[8]; |
| + get_q_register(Vn, src1); |
| + get_q_register(Vm, src2); |
| + for (int i = 0; i < 8; i++) { |
| + if (ge) |
| + src1[i] = src1[i] >= src2[i] ? 0xFFFFu : 0; |
| + else |
| + src1[i] = src1[i] > src2[i] ? 0xFFFFu : 0; |
| + } |
| + set_q_register(Vd, src1); |
| + break; |
| } |
| - set_q_register(Vd, src1); |
| - break; |
| - } |
| - default: |
| - UNREACHABLE(); |
| - break; |
| - } |
| - } else if (instr->Bits(11, 8) == 1 && instr->Bits(21, 20) == 1 && |
| - instr->Bit(4) == 1) { |
| - // vbsl.size Qd, Qm, Qn. |
| - int Vd = instr->VFPDRegValue(kSimd128Precision); |
| - int Vm = instr->VFPMRegValue(kSimd128Precision); |
| - int Vn = instr->VFPNRegValue(kSimd128Precision); |
| - uint32_t dst[4], src1[4], src2[4]; |
| - get_q_register(Vd, dst); |
| - get_q_register(Vn, src1); |
| - get_q_register(Vm, src2); |
| - for (int i = 0; i < 4; i++) { |
| - dst[i] = (dst[i] & src1[i]) | (~dst[i] & src2[i]); |
| - } |
| - set_q_register(Vd, dst); |
| - } else if (instr->Bits(11, 8) == 1 && instr->Bits(21, 20) == 0 && |
| - instr->Bit(4) == 1) { |
| - if (instr->Bit(6) == 0) { |
| - // veor Dd, Dn, Dm |
| - int Vd = instr->VFPDRegValue(kDoublePrecision); |
| - int Vn = instr->VFPNRegValue(kDoublePrecision); |
| - int Vm = instr->VFPMRegValue(kDoublePrecision); |
| - uint64_t src1, src2; |
| - get_d_register(Vn, &src1); |
| - get_d_register(Vm, &src2); |
| - src1 ^= src2; |
| - set_d_register(Vd, &src1); |
| - |
| - } else { |
| - // veor Qd, Qn, Qm |
| - int Vd = instr->VFPDRegValue(kSimd128Precision); |
| - int Vn = instr->VFPNRegValue(kSimd128Precision); |
| - int Vm = instr->VFPMRegValue(kSimd128Precision); |
| - uint32_t src1[4], src2[4]; |
| - get_q_register(Vn, src1); |
| - get_q_register(Vm, src2); |
| - for (int i = 0; i < 4; i++) src1[i] ^= src2[i]; |
| - set_q_register(Vd, src1); |
| - } |
| - } else if (instr->Bits(11, 8) == 0xd && instr->Bit(21) == 0 && |
| - instr->Bit(6) == 1 && instr->Bit(4) == 1) { |
| - // vmul.f32 Qd, Qn, Qm |
| - int Vd = instr->VFPDRegValue(kSimd128Precision); |
| - int Vn = instr->VFPNRegValue(kSimd128Precision); |
| - int Vm = instr->VFPMRegValue(kSimd128Precision); |
| - float src1[4], src2[4]; |
| - get_q_register(Vn, src1); |
| - get_q_register(Vm, src2); |
| - for (int i = 0; i < 4; i++) { |
| - src1[i] = src1[i] * src2[i]; |
| - } |
| - set_q_register(Vd, src1); |
| - } else if (instr->Bits(11, 8) == 0xe && instr->Bit(20) == 0 && |
| - instr->Bit(4) == 0) { |
| - // vcge/vcgt.f32 Qd, Qm, Qn |
| - bool ge = instr->Bit(21) == 0; |
| - int Vd = instr->VFPDRegValue(kSimd128Precision); |
| - int Vm = instr->VFPMRegValue(kSimd128Precision); |
| - int Vn = instr->VFPNRegValue(kSimd128Precision); |
| - float src1[4], src2[4]; |
| - get_q_register(Vn, src1); |
| - get_q_register(Vm, src2); |
| - uint32_t dst[4]; |
| - for (int i = 0; i < 4; i++) { |
| - if (ge) { |
| - dst[i] = src1[i] >= src2[i] ? 0xFFFFFFFFu : 0; |
| - } else { |
| - dst[i] = src1[i] > src2[i] ? 0xFFFFFFFFu : 0; |
| + case Neon32: { |
| + uint32_t src1[4], src2[4]; |
| + get_q_register(Vn, src1); |
| + get_q_register(Vm, src2); |
| + for (int i = 0; i < 4; i++) { |
| + if (ge) |
| + src1[i] = src1[i] >= src2[i] ? 0xFFFFFFFFu : 0; |
| + else |
| + src1[i] = src1[i] > src2[i] ? 0xFFFFFFFFu : 0; |
| + } |
| + set_q_register(Vd, src1); |
| + break; |
| + } |
| + default: |
| + UNREACHABLE(); |
| + break; |
| } |
| + break; |
| } |
| - set_q_register(Vd, dst); |
| - } else if (instr->Bits(11, 8) == 0x3) { |
| - // vcge/vcgt.u<size> Qd, Qm, Qn. |
| - bool ge = instr->Bit(4) == 1; |
| - NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
| - int Vd = instr->VFPDRegValue(kSimd128Precision); |
| - int Vm = instr->VFPMRegValue(kSimd128Precision); |
| - int Vn = instr->VFPNRegValue(kSimd128Precision); |
| - switch (size) { |
| - case Neon8: { |
| - uint8_t src1[16], src2[16]; |
| - get_q_register(Vn, src1); |
| - get_q_register(Vm, src2); |
| - for (int i = 0; i < 16; i++) { |
| - if (ge) |
| - src1[i] = src1[i] >= src2[i] ? 0xFFu : 0; |
| - else |
| - src1[i] = src1[i] > src2[i] ? 0xFFu : 0; |
| + case 0x6: { |
| + // vmin/vmax.u<size> Qd, Qm, Qn. |
| + NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
| + bool min = instr->Bit(4) != 0; |
| + switch (size) { |
| + case Neon8: { |
| + uint8_t src1[16], src2[16]; |
| + get_q_register(Vn, src1); |
| + get_q_register(Vm, src2); |
| + for (int i = 0; i < 16; i++) { |
| + if (min) |
| + src1[i] = std::min(src1[i], src2[i]); |
| + else |
| + src1[i] = std::max(src1[i], src2[i]); |
| + } |
| + set_q_register(Vd, src1); |
| + break; |
| } |
| - set_q_register(Vd, src1); |
| - break; |
| - } |
| - case Neon16: { |
| - uint16_t src1[8], src2[8]; |
| - get_q_register(Vn, src1); |
| - get_q_register(Vm, src2); |
| - for (int i = 0; i < 8; i++) { |
| - if (ge) |
| - src1[i] = src1[i] >= src2[i] ? 0xFFFFu : 0; |
| - else |
| - src1[i] = src1[i] > src2[i] ? 0xFFFFu : 0; |
| + case Neon16: { |
| + uint16_t src1[8], src2[8]; |
| + get_q_register(Vn, src1); |
| + get_q_register(Vm, src2); |
| + for (int i = 0; i < 8; i++) { |
| + if (min) |
| + src1[i] = std::min(src1[i], src2[i]); |
| + else |
| + src1[i] = std::max(src1[i], src2[i]); |
| + } |
| + set_q_register(Vd, src1); |
| + break; |
| } |
| - set_q_register(Vd, src1); |
| - break; |
| - } |
| - case Neon32: { |
| - uint32_t src1[4], src2[4]; |
| - get_q_register(Vn, src1); |
| - get_q_register(Vm, src2); |
| - for (int i = 0; i < 4; i++) { |
| - if (ge) |
| - src1[i] = src1[i] >= src2[i] ? 0xFFFFFFFFu : 0; |
| - else |
| - src1[i] = src1[i] > src2[i] ? 0xFFFFFFFFu : 0; |
| + case Neon32: { |
| + uint32_t src1[4], src2[4]; |
| + get_q_register(Vn, src1); |
| + get_q_register(Vm, src2); |
| + for (int i = 0; i < 4; i++) { |
| + if (min) |
| + src1[i] = std::min(src1[i], src2[i]); |
| + else |
| + src1[i] = std::max(src1[i], src2[i]); |
| + } |
| + set_q_register(Vd, src1); |
| + break; |
| } |
| - set_q_register(Vd, src1); |
| - break; |
| + default: |
| + UNREACHABLE(); |
| + break; |
| } |
| - default: |
| - UNREACHABLE(); |
| - break; |
| + break; |
| } |
| - } else if (instr->Bits(11, 8) == 0x6) { |
| - // vmin/vmax.u<size> Qd, Qm, Qn. |
| - NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
| - int Vd = instr->VFPDRegValue(kSimd128Precision); |
| - int Vm = instr->VFPMRegValue(kSimd128Precision); |
| - int Vn = instr->VFPNRegValue(kSimd128Precision); |
| - bool min = instr->Bit(4) != 0; |
| - switch (size) { |
| - case Neon8: { |
| - uint8_t src1[16], src2[16]; |
| - get_q_register(Vn, src1); |
| - get_q_register(Vm, src2); |
| - for (int i = 0; i < 16; i++) { |
| - if (min) |
| - src1[i] = std::min(src1[i], src2[i]); |
| - else |
| - src1[i] = std::max(src1[i], src2[i]); |
| + case 0x8: { |
| + if (instr->Bit(4) == 0) { |
| + // vsub.size Qd, Qm, Qn. |
| + NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
| + switch (size) { |
| + case Neon8: { |
| + uint8_t src1[16], src2[16]; |
| + get_q_register(Vn, src1); |
| + get_q_register(Vm, src2); |
| + for (int i = 0; i < 16; i++) { |
| + src1[i] -= src2[i]; |
| + } |
| + set_q_register(Vd, src1); |
| + break; |
| + } |
| + case Neon16: { |
| + uint16_t src1[8], src2[8]; |
| + get_q_register(Vn, src1); |
| + get_q_register(Vm, src2); |
| + for (int i = 0; i < 8; i++) { |
| + src1[i] -= src2[i]; |
| + } |
| + set_q_register(Vd, src1); |
| + break; |
| + } |
| + case Neon32: { |
| + uint32_t src1[4], src2[4]; |
| + get_q_register(Vn, src1); |
| + get_q_register(Vm, src2); |
| + for (int i = 0; i < 4; i++) { |
| + src1[i] -= src2[i]; |
| + } |
| + set_q_register(Vd, src1); |
| + break; |
| + } |
| + default: |
| + UNREACHABLE(); |
| + break; |
| + } |
| + } else { |
| + // vceq.size Qd, Qm, Qn. |
| + NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
| + switch (size) { |
| + case Neon8: { |
| + uint8_t src1[16], src2[16]; |
| + get_q_register(Vn, src1); |
| + get_q_register(Vm, src2); |
| + for (int i = 0; i < 16; i++) { |
| + src1[i] = (src1[i] == src2[i]) ? 0xFFu : 0; |
| + } |
| + set_q_register(Vd, src1); |
| + break; |
| + } |
| + case Neon16: { |
| + uint16_t src1[8], src2[8]; |
| + get_q_register(Vn, src1); |
| + get_q_register(Vm, src2); |
| + for (int i = 0; i < 8; i++) { |
| + src1[i] = (src1[i] == src2[i]) ? 0xFFFFu : 0; |
| + } |
| + set_q_register(Vd, src1); |
| + break; |
| + } |
| + case Neon32: { |
| + uint32_t src1[4], src2[4]; |
| + get_q_register(Vn, src1); |
| + get_q_register(Vm, src2); |
| + for (int i = 0; i < 4; i++) { |
| + src1[i] = (src1[i] == src2[i]) ? 0xFFFFFFFFu : 0; |
| + } |
| + set_q_register(Vd, src1); |
| + break; |
| + } |
| + default: |
| + UNREACHABLE(); |
| + break; |
| } |
| - set_q_register(Vd, src1); |
| - break; |
| } |
| - case Neon16: { |
| - uint16_t src1[8], src2[8]; |
| + break; |
| + } |
| + case 0xd: { |
| + if (instr->Bit(21) == 0 && instr->Bit(6) == 1 && instr->Bit(4) == 1) { |
| + // vmul.f32 Qd, Qn, Qm |
| + float src1[4], src2[4]; |
| get_q_register(Vn, src1); |
| get_q_register(Vm, src2); |
| - for (int i = 0; i < 8; i++) { |
| - if (min) |
| - src1[i] = std::min(src1[i], src2[i]); |
| - else |
| - src1[i] = std::max(src1[i], src2[i]); |
| + for (int i = 0; i < 4; i++) { |
| + src1[i] = src1[i] * src2[i]; |
| } |
| set_q_register(Vd, src1); |
| - break; |
| + } else { |
| + UNIMPLEMENTED(); |
| } |
| - case Neon32: { |
| - uint32_t src1[4], src2[4]; |
| + break; |
| + } |
| + case 0xe: { |
| + if (instr->Bit(20) == 0 && instr->Bit(4) == 0) { |
| + // vcge/vcgt.f32 Qd, Qm, Qn |
| + bool ge = instr->Bit(21) == 0; |
| + float src1[4], src2[4]; |
| get_q_register(Vn, src1); |
| get_q_register(Vm, src2); |
| + uint32_t dst[4]; |
| for (int i = 0; i < 4; i++) { |
| - if (min) |
| - src1[i] = std::min(src1[i], src2[i]); |
| - else |
| - src1[i] = std::max(src1[i], src2[i]); |
| + if (ge) { |
| + dst[i] = src1[i] >= src2[i] ? 0xFFFFFFFFu : 0; |
| + } else { |
| + dst[i] = src1[i] > src2[i] ? 0xFFFFFFFFu : 0; |
| + } |
| } |
| - set_q_register(Vd, src1); |
| - break; |
| + set_q_register(Vd, dst); |
| + } else { |
| + UNIMPLEMENTED(); |
| } |
| - default: |
| - UNREACHABLE(); |
| - break; |
| + break; |
| } |
| - } else { |
| - UNIMPLEMENTED(); |
| + default: |
| + UNREACHABLE(); |
| + break; |
| } |
| break; |
| + } |
| case 7: |
| if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) && |
| (instr->Bit(4) == 1)) { |