| Index: src/arm/simulator-arm.cc
|
| diff --git a/src/arm/simulator-arm.cc b/src/arm/simulator-arm.cc
|
| index 2bf2a6a3a52b549a548bc384081e975344b63a69..c1e6fc1f4db743a152e0952b8ae16ea55a98fa41 100644
|
| --- a/src/arm/simulator-arm.cc
|
| +++ b/src/arm/simulator-arm.cc
|
| @@ -896,28 +896,20 @@ void Simulator::set_d_register(int dreg, const uint32_t* value) {
|
| memcpy(vfp_registers_ + dreg * 2, value, sizeof(*value) * 2);
|
| }
|
|
|
| -template <typename T>
|
| -void Simulator::get_d_register(int dreg, T* value) {
|
| - DCHECK((dreg >= 0) && (dreg < num_d_registers));
|
| - memcpy(value, vfp_registers_ + dreg * 2, kDoubleSize);
|
| +template <typename T, int SIZE>
|
| +void Simulator::get_neon_register(int reg, T (&value)[SIZE / sizeof(T)]) {
|
| + DCHECK(SIZE == kSimd128Size || SIZE == kDoubleSize);
|
| + DCHECK_LE(0, reg);
|
| + DCHECK_GT(SIZE == kSimd128Size ? num_q_registers : num_d_registers, reg);
|
| + memcpy(value, vfp_registers_ + reg * (SIZE / 4), SIZE);
|
| }
|
|
|
| -template <typename T>
|
| -void Simulator::set_d_register(int dreg, const T* value) {
|
| - DCHECK((dreg >= 0) && (dreg < num_d_registers));
|
| - memcpy(vfp_registers_ + dreg * 2, value, kDoubleSize);
|
| -}
|
| -
|
| -template <typename T>
|
| -void Simulator::get_q_register(int qreg, T* value) {
|
| - DCHECK((qreg >= 0) && (qreg < num_q_registers));
|
| - memcpy(value, vfp_registers_ + qreg * 4, kSimd128Size);
|
| -}
|
| -
|
| -template <typename T>
|
| -void Simulator::set_q_register(int qreg, const T* value) {
|
| - DCHECK((qreg >= 0) && (qreg < num_q_registers));
|
| - memcpy(vfp_registers_ + qreg * 4, value, kSimd128Size);
|
| +template <typename T, int SIZE>
|
| +void Simulator::set_neon_register(int reg, const T (&value)[SIZE / sizeof(T)]) {
|
| + DCHECK(SIZE == kSimd128Size || SIZE == kDoubleSize);
|
| + DCHECK_LE(0, reg);
|
| + DCHECK_GT(SIZE == kSimd128Size ? num_q_registers : num_d_registers, reg);
|
| + memcpy(vfp_registers_ + reg * (SIZE / 4), value, SIZE);
|
| }
|
|
|
| // Raw access to the PC register.
|
| @@ -3508,7 +3500,7 @@ void Simulator::DecodeTypeVFP(Instruction* instr) {
|
| UNREACHABLE();
|
| break;
|
| }
|
| - set_q_register(vd, q_data);
|
| + set_neon_register(vd, q_data);
|
| }
|
| } else if ((instr->VLValue() == 0x1) && (instr->VCValue() == 0x1)) {
|
| // vmov (scalar to ARM core register)
|
| @@ -3995,7 +3987,6 @@ void Simulator::DecodeType6CoprocessorIns(Instruction* instr) {
|
| }
|
|
|
| // Templated operations for NEON instructions.
|
| -// TODO(bbudge) Add more templates for use in DecodeSpecialCondition.
|
| template <typename T, typename U>
|
| U Widen(T value) {
|
| static_assert(sizeof(int64_t) > sizeof(T), "T must be int32_t or smaller");
|
| @@ -4025,21 +4016,38 @@ T Clamp(int64_t value) {
|
| return static_cast<T>(clamped);
|
| }
|
|
|
| -template <typename T>
|
| -T MinMax(T a, T b, bool is_min) {
|
| - return is_min ? std::min(a, b) : std::max(a, b);
|
| -}
|
| -
|
| template <typename T, typename U>
|
| void Widen(Simulator* simulator, int Vd, int Vm) {
|
| static const int kLanes = 8 / sizeof(T);
|
| T src[kLanes];
|
| U dst[kLanes];
|
| - simulator->get_d_register(Vm, src);
|
| + simulator->get_neon_register<T, kDoubleSize>(Vm, src);
|
| for (int i = 0; i < kLanes; i++) {
|
| dst[i] = Widen<T, U>(src[i]);
|
| }
|
| - simulator->set_q_register(Vd, dst);
|
| + simulator->set_neon_register(Vd, dst);
|
| +}
|
| +
|
| +template <typename T, int SIZE>
|
| +void Abs(Simulator* simulator, int Vd, int Vm) {
|
| + static const int kElems = SIZE / sizeof(T);
|
| + T src[kElems];
|
| + simulator->get_neon_register<T, SIZE>(Vm, src);
|
| + for (int i = 0; i < kElems; i++) {
|
| + src[i] = std::abs(src[i]);
|
| + }
|
| + simulator->set_neon_register<T, SIZE>(Vd, src);
|
| +}
|
| +
|
| +template <typename T, int SIZE>
|
| +void Neg(Simulator* simulator, int Vd, int Vm) {
|
| + static const int kElems = SIZE / sizeof(T);
|
| + T src[kElems];
|
| + simulator->get_neon_register<T, SIZE>(Vm, src);
|
| + for (int i = 0; i < kElems; i++) {
|
| + src[i] = -src[i];
|
| + }
|
| + simulator->set_neon_register<T, SIZE>(Vd, src);
|
| }
|
|
|
| template <typename T, typename U>
|
| @@ -4047,35 +4055,222 @@ void SaturatingNarrow(Simulator* simulator, int Vd, int Vm) {
|
| static const int kLanes = 16 / sizeof(T);
|
| T src[kLanes];
|
| U dst[kLanes];
|
| - simulator->get_q_register(Vm, src);
|
| + simulator->get_neon_register(Vm, src);
|
| for (int i = 0; i < kLanes; i++) {
|
| dst[i] = Narrow<T, U>(Clamp<U>(src[i]));
|
| }
|
| - simulator->set_d_register(Vd, dst);
|
| + simulator->set_neon_register<U, kDoubleSize>(Vd, dst);
|
| }
|
|
|
| template <typename T>
|
| void AddSaturate(Simulator* simulator, int Vd, int Vm, int Vn) {
|
| static const int kLanes = 16 / sizeof(T);
|
| T src1[kLanes], src2[kLanes];
|
| - simulator->get_q_register(Vn, src1);
|
| - simulator->get_q_register(Vm, src2);
|
| + simulator->get_neon_register(Vn, src1);
|
| + simulator->get_neon_register(Vm, src2);
|
| for (int i = 0; i < kLanes; i++) {
|
| src1[i] = Clamp<T>(Widen<T, int64_t>(src1[i]) + Widen<T, int64_t>(src2[i]));
|
| }
|
| - simulator->set_q_register(Vd, src1);
|
| + simulator->set_neon_register(Vd, src1);
|
| }
|
|
|
| template <typename T>
|
| void SubSaturate(Simulator* simulator, int Vd, int Vm, int Vn) {
|
| static const int kLanes = 16 / sizeof(T);
|
| T src1[kLanes], src2[kLanes];
|
| - simulator->get_q_register(Vn, src1);
|
| - simulator->get_q_register(Vm, src2);
|
| + simulator->get_neon_register(Vn, src1);
|
| + simulator->get_neon_register(Vm, src2);
|
| for (int i = 0; i < kLanes; i++) {
|
| src1[i] = Clamp<T>(Widen<T, int64_t>(src1[i]) - Widen<T, int64_t>(src2[i]));
|
| }
|
| - simulator->set_q_register(Vd, src1);
|
| + simulator->set_neon_register(Vd, src1);
|
| +}
|
| +
|
| +template <typename T, int SIZE>
|
| +void Zip(Simulator* simulator, int Vd, int Vm) {
|
| + static const int kElems = SIZE / sizeof(T);
|
| + static const int kPairs = kElems / 2;
|
| + T src1[kElems], src2[kElems], dst1[kElems], dst2[kElems];
|
| + simulator->get_neon_register<T, SIZE>(Vd, src1);
|
| + simulator->get_neon_register<T, SIZE>(Vm, src2);
|
| + for (int i = 0; i < kPairs; i++) {
|
| + dst1[i * 2] = src1[i];
|
| + dst1[i * 2 + 1] = src2[i];
|
| + dst2[i * 2] = src1[i + kPairs];
|
| + dst2[i * 2 + 1] = src2[i + kPairs];
|
| + }
|
| + simulator->set_neon_register<T, SIZE>(Vd, dst1);
|
| + simulator->set_neon_register<T, SIZE>(Vm, dst2);
|
| +}
|
| +
|
| +template <typename T, int SIZE>
|
| +void Unzip(Simulator* simulator, int Vd, int Vm) {
|
| + static const int kElems = SIZE / sizeof(T);
|
| + static const int kPairs = kElems / 2;
|
| + T src1[kElems], src2[kElems], dst1[kElems], dst2[kElems];
|
| + simulator->get_neon_register<T, SIZE>(Vd, src1);
|
| + simulator->get_neon_register<T, SIZE>(Vm, src2);
|
| + for (int i = 0; i < kPairs; i++) {
|
| + dst1[i] = src1[i * 2];
|
| + dst1[i + kPairs] = src2[i * 2];
|
| + dst2[i] = src1[i * 2 + 1];
|
| + dst2[i + kPairs] = src2[i * 2 + 1];
|
| + }
|
| + simulator->set_neon_register<T, SIZE>(Vd, dst1);
|
| + simulator->set_neon_register<T, SIZE>(Vm, dst2);
|
| +}
|
| +
|
| +template <typename T, int SIZE>
|
| +void Transpose(Simulator* simulator, int Vd, int Vm) {
|
| + static const int kElems = SIZE / sizeof(T);
|
| + static const int kPairs = kElems / 2;
|
| + T src1[kElems], src2[kElems];
|
| + simulator->get_neon_register<T, SIZE>(Vd, src1);
|
| + simulator->get_neon_register<T, SIZE>(Vm, src2);
|
| + for (int i = 0; i < kPairs; i++) {
|
| + std::swap(src1[2 * i + 1], src2[2 * i]);
|
| + }
|
| + simulator->set_neon_register<T, SIZE>(Vd, src1);
|
| + simulator->set_neon_register<T, SIZE>(Vm, src2);
|
| +}
|
| +
|
| +template <typename T, int SIZE>
|
| +void Test(Simulator* simulator, int Vd, int Vm, int Vn) {
|
| + static const int kElems = SIZE / sizeof(T);
|
| + T src1[kElems], src2[kElems];
|
| + simulator->get_neon_register<T, SIZE>(Vn, src1);
|
| + simulator->get_neon_register<T, SIZE>(Vm, src2);
|
| + for (int i = 0; i < kElems; i++) {
|
| + src1[i] = (src1[i] & src2[i]) != 0 ? -1 : 0;
|
| + }
|
| + simulator->set_neon_register<T, SIZE>(Vd, src1);
|
| +}
|
| +
|
| +template <typename T, int SIZE>
|
| +void Add(Simulator* simulator, int Vd, int Vm, int Vn) {
|
| + static const int kElems = SIZE / sizeof(T);
|
| + T src1[kElems], src2[kElems];
|
| + simulator->get_neon_register<T, SIZE>(Vn, src1);
|
| + simulator->get_neon_register<T, SIZE>(Vm, src2);
|
| + for (int i = 0; i < kElems; i++) {
|
| + src1[i] += src2[i];
|
| + }
|
| + simulator->set_neon_register<T, SIZE>(Vd, src1);
|
| +}
|
| +
|
| +template <typename T, int SIZE>
|
| +void Sub(Simulator* simulator, int Vd, int Vm, int Vn) {
|
| + static const int kElems = SIZE / sizeof(T);
|
| + T src1[kElems], src2[kElems];
|
| + simulator->get_neon_register<T, SIZE>(Vn, src1);
|
| + simulator->get_neon_register<T, SIZE>(Vm, src2);
|
| + for (int i = 0; i < kElems; i++) {
|
| + src1[i] -= src2[i];
|
| + }
|
| + simulator->set_neon_register<T, SIZE>(Vd, src1);
|
| +}
|
| +
|
| +template <typename T, int SIZE>
|
| +void Mul(Simulator* simulator, int Vd, int Vm, int Vn) {
|
| + static const int kElems = SIZE / sizeof(T);
|
| + T src1[kElems], src2[kElems];
|
| + simulator->get_neon_register<T, SIZE>(Vn, src1);
|
| + simulator->get_neon_register<T, SIZE>(Vm, src2);
|
| + for (int i = 0; i < kElems; i++) {
|
| + src1[i] *= src2[i];
|
| + }
|
| + simulator->set_neon_register<T, SIZE>(Vd, src1);
|
| +}
|
| +
|
| +template <typename T, int SIZE>
|
| +void ShiftLeft(Simulator* simulator, int Vd, int Vm, int shift) {
|
| + static const int kElems = SIZE / sizeof(T);
|
| + T src[kElems];
|
| + simulator->get_neon_register<T, SIZE>(Vm, src);
|
| + for (int i = 0; i < kElems; i++) {
|
| + src[i] <<= shift;
|
| + }
|
| + simulator->set_neon_register<T, SIZE>(Vd, src);
|
| +}
|
| +
|
| +template <typename T, int SIZE>
|
| +void ShiftRight(Simulator* simulator, int Vd, int Vm, int shift) {
|
| + static const int kElems = SIZE / sizeof(T);
|
| + T src[kElems];
|
| + simulator->get_neon_register<T, SIZE>(Vm, src);
|
| + for (int i = 0; i < kElems; i++) {
|
| + src[i] >>= shift;
|
| + }
|
| + simulator->set_neon_register<T, SIZE>(Vd, src);
|
| +}
|
| +
|
| +template <typename T, int SIZE>
|
| +void ArithmeticShiftRight(Simulator* simulator, int Vd, int Vm, int shift) {
|
| + static const int kElems = SIZE / sizeof(T);
|
| + T src[kElems];
|
| + simulator->get_neon_register<T, SIZE>(Vm, src);
|
| + for (int i = 0; i < kElems; i++) {
|
| + src[i] = ArithmeticShiftRight(src[i], shift);
|
| + }
|
| + simulator->set_neon_register<T, SIZE>(Vd, src);
|
| +}
|
| +
|
| +template <typename T, int SIZE>
|
| +void CompareEqual(Simulator* simulator, int Vd, int Vm, int Vn) {
|
| + static const int kElems = SIZE / sizeof(T);
|
| + T src1[kElems], src2[kElems];
|
| + simulator->get_neon_register<T, SIZE>(Vn, src1);
|
| + simulator->get_neon_register<T, SIZE>(Vm, src2);
|
| + for (int i = 0; i < kElems; i++) {
|
| + src1[i] = src1[i] == src2[i] ? -1 : 0;
|
| + }
|
| + simulator->set_neon_register<T, SIZE>(Vd, src1);
|
| +}
|
| +
|
| +template <typename T, int SIZE>
|
| +void CompareGreater(Simulator* simulator, int Vd, int Vm, int Vn, bool ge) {
|
| + static const int kElems = SIZE / sizeof(T);
|
| + T src1[kElems], src2[kElems];
|
| + simulator->get_neon_register<T, SIZE>(Vn, src1);
|
| + simulator->get_neon_register<T, SIZE>(Vm, src2);
|
| + for (int i = 0; i < kElems; i++) {
|
| + if (ge)
|
| + src1[i] = src1[i] >= src2[i] ? -1 : 0;
|
| + else
|
| + src1[i] = src1[i] > src2[i] ? -1 : 0;
|
| + }
|
| + simulator->set_neon_register<T, SIZE>(Vd, src1);
|
| +}
|
| +
|
| +template <typename T>
|
| +T MinMax(T a, T b, bool is_min) {
|
| + return is_min ? std::min(a, b) : std::max(a, b);
|
| +}
|
| +
|
| +template <typename T, int SIZE>
|
| +void MinMax(Simulator* simulator, int Vd, int Vm, int Vn, bool min) {
|
| + static const int kElems = SIZE / sizeof(T);
|
| + T src1[kElems], src2[kElems];
|
| + simulator->get_neon_register<T, SIZE>(Vn, src1);
|
| + simulator->get_neon_register<T, SIZE>(Vm, src2);
|
| + for (int i = 0; i < kElems; i++) {
|
| + src1[i] = MinMax(src1[i], src2[i], min);
|
| + }
|
| + simulator->set_neon_register<T, SIZE>(Vd, src1);
|
| +}
|
| +
|
| +template <typename T>
|
| +void PairwiseMinMax(Simulator* simulator, int Vd, int Vm, int Vn, bool min) {
|
| + static const int kElems = kDoubleSize / sizeof(T);
|
| + static const int kPairs = kElems / 2;
|
| + T dst[kElems], src1[kElems], src2[kElems];
|
| + simulator->get_neon_register<T, kDoubleSize>(Vn, src1);
|
| + simulator->get_neon_register<T, kDoubleSize>(Vm, src2);
|
| + for (int i = 0; i < kPairs; i++) {
|
| + dst[i] = MinMax(src1[i * 2], src1[i * 2 + 1], min);
|
| + dst[i + kPairs] = MinMax(src2[i * 2], src2[i * 2 + 1], min);
|
| + }
|
| + simulator->set_neon_register<T, kDoubleSize>(Vd, dst);
|
| }
|
|
|
| void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
| @@ -4121,25 +4316,25 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
| // vmov Qd, Qm.
|
| // vorr, Qd, Qm, Qn.
|
| uint32_t src1[4];
|
| - get_q_register(Vm, src1);
|
| + get_neon_register(Vm, src1);
|
| if (Vm != Vn) {
|
| uint32_t src2[4];
|
| - get_q_register(Vn, src2);
|
| + get_neon_register(Vn, src2);
|
| for (int i = 0; i < 4; i++) {
|
| src1[i] = src1[i] | src2[i];
|
| }
|
| }
|
| - set_q_register(Vd, src1);
|
| + set_neon_register(Vd, src1);
|
| } else if (instr->Bits(21, 20) == 0 && instr->Bit(6) == 1 &&
|
| instr->Bit(4) == 1) {
|
| // vand Qd, Qm, Qn.
|
| uint32_t src1[4], src2[4];
|
| - get_q_register(Vn, src1);
|
| - get_q_register(Vm, src2);
|
| + get_neon_register(Vn, src1);
|
| + get_neon_register(Vm, src2);
|
| for (int i = 0; i < 4; i++) {
|
| src1[i] = src1[i] & src2[i];
|
| }
|
| - set_q_register(Vd, src1);
|
| + set_neon_register(Vd, src1);
|
| } else {
|
| UNIMPLEMENTED();
|
| }
|
| @@ -4173,45 +4368,15 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
| bool ge = instr->Bit(4) == 1;
|
| NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
|
| switch (size) {
|
| - case Neon8: {
|
| - int8_t src1[16], src2[16];
|
| - get_q_register(Vn, src1);
|
| - get_q_register(Vm, src2);
|
| - for (int i = 0; i < 16; i++) {
|
| - if (ge)
|
| - src1[i] = src1[i] >= src2[i] ? 0xFF : 0;
|
| - else
|
| - src1[i] = src1[i] > src2[i] ? 0xFF : 0;
|
| - }
|
| - set_q_register(Vd, src1);
|
| + case Neon8:
|
| + CompareGreater<int8_t, kSimd128Size>(this, Vd, Vm, Vn, ge);
|
| break;
|
| - }
|
| - case Neon16: {
|
| - int16_t src1[8], src2[8];
|
| - get_q_register(Vn, src1);
|
| - get_q_register(Vm, src2);
|
| - for (int i = 0; i < 8; i++) {
|
| - if (ge)
|
| - src1[i] = src1[i] >= src2[i] ? 0xFFFF : 0;
|
| - else
|
| - src1[i] = src1[i] > src2[i] ? 0xFFFF : 0;
|
| - }
|
| - set_q_register(Vd, src1);
|
| + case Neon16:
|
| + CompareGreater<int16_t, kSimd128Size>(this, Vd, Vm, Vn, ge);
|
| break;
|
| - }
|
| - case Neon32: {
|
| - int32_t src1[4], src2[4];
|
| - get_q_register(Vn, src1);
|
| - get_q_register(Vm, src2);
|
| - for (int i = 0; i < 4; i++) {
|
| - if (ge)
|
| - src1[i] = src1[i] >= src2[i] ? 0xFFFFFFFF : 0;
|
| - else
|
| - src1[i] = src1[i] > src2[i] ? 0xFFFFFFFF : 0;
|
| - }
|
| - set_q_register(Vd, src1);
|
| + case Neon32:
|
| + CompareGreater<int32_t, kSimd128Size>(this, Vd, Vm, Vn, ge);
|
| break;
|
| - }
|
| default:
|
| UNREACHABLE();
|
| break;
|
| @@ -4223,36 +4388,15 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
| NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
|
| bool min = instr->Bit(4) != 0;
|
| switch (size) {
|
| - case Neon8: {
|
| - int8_t src1[16], src2[16];
|
| - get_q_register(Vn, src1);
|
| - get_q_register(Vm, src2);
|
| - for (int i = 0; i < 16; i++) {
|
| - src1[i] = MinMax(src1[i], src2[i], min);
|
| - }
|
| - set_q_register(Vd, src1);
|
| + case Neon8:
|
| + MinMax<int8_t, kSimd128Size>(this, Vd, Vm, Vn, min);
|
| break;
|
| - }
|
| - case Neon16: {
|
| - int16_t src1[8], src2[8];
|
| - get_q_register(Vn, src1);
|
| - get_q_register(Vm, src2);
|
| - for (int i = 0; i < 8; i++) {
|
| - src1[i] = MinMax(src1[i], src2[i], min);
|
| - }
|
| - set_q_register(Vd, src1);
|
| + case Neon16:
|
| + MinMax<int16_t, kSimd128Size>(this, Vd, Vm, Vn, min);
|
| break;
|
| - }
|
| - case Neon32: {
|
| - int32_t src1[4], src2[4];
|
| - get_q_register(Vn, src1);
|
| - get_q_register(Vm, src2);
|
| - for (int i = 0; i < 4; i++) {
|
| - src1[i] = MinMax(src1[i], src2[i], min);
|
| - }
|
| - set_q_register(Vd, src1);
|
| + case Neon32:
|
| + MinMax<int32_t, kSimd128Size>(this, Vd, Vm, Vn, min);
|
| break;
|
| - }
|
| default:
|
| UNREACHABLE();
|
| break;
|
| @@ -4265,36 +4409,15 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
| if (instr->Bit(4) == 0) {
|
| // vadd.i<size> Qd, Qm, Qn.
|
| switch (size) {
|
| - case Neon8: {
|
| - uint8_t src1[16], src2[16];
|
| - get_q_register(Vn, src1);
|
| - get_q_register(Vm, src2);
|
| - for (int i = 0; i < 16; i++) {
|
| - src1[i] += src2[i];
|
| - }
|
| - set_q_register(Vd, src1);
|
| + case Neon8:
|
| + Add<uint8_t, kSimd128Size>(this, Vd, Vm, Vn);
|
| break;
|
| - }
|
| - case Neon16: {
|
| - uint16_t src1[8], src2[8];
|
| - get_q_register(Vn, src1);
|
| - get_q_register(Vm, src2);
|
| - for (int i = 0; i < 8; i++) {
|
| - src1[i] += src2[i];
|
| - }
|
| - set_q_register(Vd, src1);
|
| + case Neon16:
|
| + Add<uint16_t, kSimd128Size>(this, Vd, Vm, Vn);
|
| break;
|
| - }
|
| - case Neon32: {
|
| - uint32_t src1[4], src2[4];
|
| - get_q_register(Vn, src1);
|
| - get_q_register(Vm, src2);
|
| - for (int i = 0; i < 4; i++) {
|
| - src1[i] += src2[i];
|
| - }
|
| - set_q_register(Vd, src1);
|
| + case Neon32:
|
| + Add<uint32_t, kSimd128Size>(this, Vd, Vm, Vn);
|
| break;
|
| - }
|
| default:
|
| UNREACHABLE();
|
| break;
|
| @@ -4302,36 +4425,15 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
| } else {
|
| // vtst.i<size> Qd, Qm, Qn.
|
| switch (size) {
|
| - case Neon8: {
|
| - uint8_t src1[16], src2[16];
|
| - get_q_register(Vn, src1);
|
| - get_q_register(Vm, src2);
|
| - for (int i = 0; i < 16; i++) {
|
| - src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFu : 0;
|
| - }
|
| - set_q_register(Vd, src1);
|
| + case Neon8:
|
| + Test<uint8_t, kSimd128Size>(this, Vd, Vm, Vn);
|
| break;
|
| - }
|
| - case Neon16: {
|
| - uint16_t src1[8], src2[8];
|
| - get_q_register(Vn, src1);
|
| - get_q_register(Vm, src2);
|
| - for (int i = 0; i < 8; i++) {
|
| - src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFFFu : 0;
|
| - }
|
| - set_q_register(Vd, src1);
|
| + case Neon16:
|
| + Test<uint16_t, kSimd128Size>(this, Vd, Vm, Vn);
|
| break;
|
| - }
|
| - case Neon32: {
|
| - uint32_t src1[4], src2[4];
|
| - get_q_register(Vn, src1);
|
| - get_q_register(Vm, src2);
|
| - for (int i = 0; i < 4; i++) {
|
| - src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFFFFFFFu : 0;
|
| - }
|
| - set_q_register(Vd, src1);
|
| + case Neon32:
|
| + Test<uint32_t, kSimd128Size>(this, Vd, Vm, Vn);
|
| break;
|
| - }
|
| default:
|
| UNREACHABLE();
|
| break;
|
| @@ -4344,36 +4446,15 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
| // vmul.i<size> Qd, Qm, Qn.
|
| NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
|
| switch (size) {
|
| - case Neon8: {
|
| - uint8_t src1[16], src2[16];
|
| - get_q_register(Vn, src1);
|
| - get_q_register(Vm, src2);
|
| - for (int i = 0; i < 16; i++) {
|
| - src1[i] *= src2[i];
|
| - }
|
| - set_q_register(Vd, src1);
|
| + case Neon8:
|
| + Mul<uint8_t, kSimd128Size>(this, Vd, Vm, Vn);
|
| break;
|
| - }
|
| - case Neon16: {
|
| - uint16_t src1[8], src2[8];
|
| - get_q_register(Vn, src1);
|
| - get_q_register(Vm, src2);
|
| - for (int i = 0; i < 8; i++) {
|
| - src1[i] *= src2[i];
|
| - }
|
| - set_q_register(Vd, src1);
|
| + case Neon16:
|
| + Mul<uint16_t, kSimd128Size>(this, Vd, Vm, Vn);
|
| break;
|
| - }
|
| - case Neon32: {
|
| - uint32_t src1[4], src2[4];
|
| - get_q_register(Vn, src1);
|
| - get_q_register(Vm, src2);
|
| - for (int i = 0; i < 4; i++) {
|
| - src1[i] *= src2[i];
|
| - }
|
| - set_q_register(Vd, src1);
|
| + case Neon32:
|
| + Mul<uint32_t, kSimd128Size>(this, Vd, Vm, Vn);
|
| break;
|
| - }
|
| default:
|
| UNREACHABLE();
|
| break;
|
| @@ -4388,37 +4469,15 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
| NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
|
| bool min = instr->Bit(4) != 0;
|
| switch (size) {
|
| - case Neon8: {
|
| - int8_t dst[8], src1[8], src2[8];
|
| - get_d_register(Vn, src1);
|
| - get_d_register(Vm, src2);
|
| - for (int i = 0; i < 4; i++) {
|
| - dst[i + 0] = MinMax(src1[i * 2], src1[i * 2 + 1], min);
|
| - dst[i + 4] = MinMax(src2[i * 2], src2[i * 2 + 1], min);
|
| - }
|
| - set_d_register(Vd, dst);
|
| + case Neon8:
|
| + PairwiseMinMax<int8_t>(this, Vd, Vm, Vn, min);
|
| break;
|
| - }
|
| - case Neon16: {
|
| - int16_t dst[4], src1[4], src2[4];
|
| - get_d_register(Vn, src1);
|
| - get_d_register(Vm, src2);
|
| - for (int i = 0; i < 2; i++) {
|
| - dst[i + 0] = MinMax(src1[i * 2], src1[i * 2 + 1], min);
|
| - dst[i + 2] = MinMax(src2[i * 2], src2[i * 2 + 1], min);
|
| - }
|
| - set_d_register(Vd, dst);
|
| + case Neon16:
|
| + PairwiseMinMax<int16_t>(this, Vd, Vm, Vn, min);
|
| break;
|
| - }
|
| - case Neon32: {
|
| - int32_t dst[2], src1[2], src2[2];
|
| - get_d_register(Vn, src1);
|
| - get_d_register(Vm, src2);
|
| - dst[0] = MinMax(src1[0], src1[1], min);
|
| - dst[1] = MinMax(src2[0], src2[1], min);
|
| - set_d_register(Vd, dst);
|
| + case Neon32:
|
| + PairwiseMinMax<int32_t>(this, Vd, Vm, Vn, min);
|
| break;
|
| - }
|
| default:
|
| UNREACHABLE();
|
| break;
|
| @@ -4428,8 +4487,8 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
| case 0xd: {
|
| if (instr->Bit(4) == 0) {
|
| float src1[4], src2[4];
|
| - get_q_register(Vn, src1);
|
| - get_q_register(Vm, src2);
|
| + get_neon_register(Vn, src1);
|
| + get_neon_register(Vm, src2);
|
| for (int i = 0; i < 4; i++) {
|
| if (instr->Bit(21) == 0) {
|
| // vadd.f32 Qd, Qm, Qn.
|
| @@ -4439,7 +4498,7 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
| src1[i] = src1[i] - src2[i];
|
| }
|
| }
|
| - set_q_register(Vd, src1);
|
| + set_neon_register(Vd, src1);
|
| } else {
|
| UNIMPLEMENTED();
|
| }
|
| @@ -4449,13 +4508,13 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
| if (instr->Bits(21, 20) == 0 && instr->Bit(4) == 0) {
|
| // vceq.f32.
|
| float src1[4], src2[4];
|
| - get_q_register(Vn, src1);
|
| - get_q_register(Vm, src2);
|
| + get_neon_register(Vn, src1);
|
| + get_neon_register(Vm, src2);
|
| uint32_t dst[4];
|
| for (int i = 0; i < 4; i++) {
|
| dst[i] = (src1[i] == src2[i]) ? 0xFFFFFFFF : 0;
|
| }
|
| - set_q_register(Vd, dst);
|
| + set_neon_register(Vd, dst);
|
| } else {
|
| UNIMPLEMENTED();
|
| }
|
| @@ -4464,8 +4523,8 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
| case 0xf: {
|
| if (instr->Bit(20) == 0 && instr->Bit(6) == 1) {
|
| float src1[4], src2[4];
|
| - get_q_register(Vn, src1);
|
| - get_q_register(Vm, src2);
|
| + get_neon_register(Vn, src1);
|
| + get_neon_register(Vm, src2);
|
| if (instr->Bit(4) == 1) {
|
| if (instr->Bit(21) == 0) {
|
| // vrecps.f32 Qd, Qm, Qn.
|
| @@ -4485,7 +4544,7 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
| src1[i] = MinMax(src1[i], src2[i], min);
|
| }
|
| }
|
| - set_q_register(Vd, src1);
|
| + set_neon_register(Vd, src1);
|
| } else {
|
| UNIMPLEMENTED();
|
| }
|
| @@ -4526,8 +4585,8 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
| int Vm = instr->VFPMRegValue(kSimd128Precision);
|
| int Vn = instr->VFPNRegValue(kSimd128Precision);
|
| uint8_t src1[16], src2[16], dst[16];
|
| - get_q_register(Vn, src1);
|
| - get_q_register(Vm, src2);
|
| + get_neon_register(Vn, src1);
|
| + get_neon_register(Vm, src2);
|
| int boundary = kSimd128Size - imm4;
|
| int i = 0;
|
| for (; i < boundary; i++) {
|
| @@ -4536,7 +4595,7 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
| for (; i < 16; i++) {
|
| dst[i] = src2[i - boundary];
|
| }
|
| - set_q_register(Vd, dst);
|
| + set_neon_register(Vd, dst);
|
| } else if (instr->Bits(11, 7) == 0xA && instr->Bit(4) == 1) {
|
| // vshl.i<size> Qd, Qm, shift
|
| int size = base::bits::RoundDownToPowerOfTwo32(instr->Bits(21, 16));
|
| @@ -4545,33 +4604,15 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
| int Vm = instr->VFPMRegValue(kSimd128Precision);
|
| NeonSize ns = static_cast<NeonSize>(size / 16);
|
| switch (ns) {
|
| - case Neon8: {
|
| - uint8_t src[16];
|
| - get_q_register(Vm, src);
|
| - for (int i = 0; i < 16; i++) {
|
| - src[i] <<= shift;
|
| - }
|
| - set_q_register(Vd, src);
|
| + case Neon8:
|
| + ShiftLeft<uint8_t, kSimd128Size>(this, Vd, Vm, shift);
|
| break;
|
| - }
|
| - case Neon16: {
|
| - uint16_t src[8];
|
| - get_q_register(Vm, src);
|
| - for (int i = 0; i < 8; i++) {
|
| - src[i] <<= shift;
|
| - }
|
| - set_q_register(Vd, src);
|
| + case Neon16:
|
| + ShiftLeft<uint16_t, kSimd128Size>(this, Vd, Vm, shift);
|
| break;
|
| - }
|
| - case Neon32: {
|
| - uint32_t src[4];
|
| - get_q_register(Vm, src);
|
| - for (int i = 0; i < 4; i++) {
|
| - src[i] <<= shift;
|
| - }
|
| - set_q_register(Vd, src);
|
| + case Neon32:
|
| + ShiftLeft<uint32_t, kSimd128Size>(this, Vd, Vm, shift);
|
| break;
|
| - }
|
| default:
|
| UNREACHABLE();
|
| break;
|
| @@ -4584,33 +4625,15 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
| int Vm = instr->VFPMRegValue(kSimd128Precision);
|
| NeonSize ns = static_cast<NeonSize>(size / 16);
|
| switch (ns) {
|
| - case Neon8: {
|
| - int8_t src[16];
|
| - get_q_register(Vm, src);
|
| - for (int i = 0; i < 16; i++) {
|
| - src[i] = ArithmeticShiftRight(src[i], shift);
|
| - }
|
| - set_q_register(Vd, src);
|
| + case Neon8:
|
| + ArithmeticShiftRight<int8_t, kSimd128Size>(this, Vd, Vm, shift);
|
| break;
|
| - }
|
| - case Neon16: {
|
| - int16_t src[8];
|
| - get_q_register(Vm, src);
|
| - for (int i = 0; i < 8; i++) {
|
| - src[i] = ArithmeticShiftRight(src[i], shift);
|
| - }
|
| - set_q_register(Vd, src);
|
| + case Neon16:
|
| + ArithmeticShiftRight<int16_t, kSimd128Size>(this, Vd, Vm, shift);
|
| break;
|
| - }
|
| - case Neon32: {
|
| - int32_t src[4];
|
| - get_q_register(Vm, src);
|
| - for (int i = 0; i < 4; i++) {
|
| - src[i] = ArithmeticShiftRight(src[i], shift);
|
| - }
|
| - set_q_register(Vd, src);
|
| + case Neon32:
|
| + ArithmeticShiftRight<int32_t, kSimd128Size>(this, Vd, Vm, shift);
|
| break;
|
| - }
|
| default:
|
| UNREACHABLE();
|
| break;
|
| @@ -4658,13 +4681,13 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
| if (instr->Bits(21, 20) == 1 && instr->Bit(4) == 1) {
|
| // vbsl.size Qd, Qm, Qn.
|
| uint32_t dst[4], src1[4], src2[4];
|
| - get_q_register(Vd, dst);
|
| - get_q_register(Vn, src1);
|
| - get_q_register(Vm, src2);
|
| + get_neon_register(Vd, dst);
|
| + get_neon_register(Vn, src1);
|
| + get_neon_register(Vm, src2);
|
| for (int i = 0; i < 4; i++) {
|
| dst[i] = (dst[i] & src1[i]) | (~dst[i] & src2[i]);
|
| }
|
| - set_q_register(Vd, dst);
|
| + set_neon_register(Vd, dst);
|
| } else if (instr->Bits(21, 20) == 0 && instr->Bit(4) == 1) {
|
| if (instr->Bit(6) == 0) {
|
| // veor Dd, Dn, Dm
|
| @@ -4677,10 +4700,10 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
| } else {
|
| // veor Qd, Qn, Qm
|
| uint32_t src1[4], src2[4];
|
| - get_q_register(Vn, src1);
|
| - get_q_register(Vm, src2);
|
| + get_neon_register(Vn, src1);
|
| + get_neon_register(Vm, src2);
|
| for (int i = 0; i < 4; i++) src1[i] ^= src2[i];
|
| - set_q_register(Vd, src1);
|
| + set_neon_register(Vd, src1);
|
| }
|
| } else {
|
| UNIMPLEMENTED();
|
| @@ -4715,45 +4738,15 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
| bool ge = instr->Bit(4) == 1;
|
| NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
|
| switch (size) {
|
| - case Neon8: {
|
| - uint8_t src1[16], src2[16];
|
| - get_q_register(Vn, src1);
|
| - get_q_register(Vm, src2);
|
| - for (int i = 0; i < 16; i++) {
|
| - if (ge)
|
| - src1[i] = src1[i] >= src2[i] ? 0xFFu : 0;
|
| - else
|
| - src1[i] = src1[i] > src2[i] ? 0xFFu : 0;
|
| - }
|
| - set_q_register(Vd, src1);
|
| + case Neon8:
|
| + CompareGreater<uint8_t, kSimd128Size>(this, Vd, Vm, Vn, ge);
|
| break;
|
| - }
|
| - case Neon16: {
|
| - uint16_t src1[8], src2[8];
|
| - get_q_register(Vn, src1);
|
| - get_q_register(Vm, src2);
|
| - for (int i = 0; i < 8; i++) {
|
| - if (ge)
|
| - src1[i] = src1[i] >= src2[i] ? 0xFFFFu : 0;
|
| - else
|
| - src1[i] = src1[i] > src2[i] ? 0xFFFFu : 0;
|
| - }
|
| - set_q_register(Vd, src1);
|
| + case Neon16:
|
| + CompareGreater<uint16_t, kSimd128Size>(this, Vd, Vm, Vn, ge);
|
| break;
|
| - }
|
| - case Neon32: {
|
| - uint32_t src1[4], src2[4];
|
| - get_q_register(Vn, src1);
|
| - get_q_register(Vm, src2);
|
| - for (int i = 0; i < 4; i++) {
|
| - if (ge)
|
| - src1[i] = src1[i] >= src2[i] ? 0xFFFFFFFFu : 0;
|
| - else
|
| - src1[i] = src1[i] > src2[i] ? 0xFFFFFFFFu : 0;
|
| - }
|
| - set_q_register(Vd, src1);
|
| + case Neon32:
|
| + CompareGreater<uint32_t, kSimd128Size>(this, Vd, Vm, Vn, ge);
|
| break;
|
| - }
|
| default:
|
| UNREACHABLE();
|
| break;
|
| @@ -4765,36 +4758,15 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
| NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
|
| bool min = instr->Bit(4) != 0;
|
| switch (size) {
|
| - case Neon8: {
|
| - uint8_t src1[16], src2[16];
|
| - get_q_register(Vn, src1);
|
| - get_q_register(Vm, src2);
|
| - for (int i = 0; i < 16; i++) {
|
| - src1[i] = MinMax(src1[i], src2[i], min);
|
| - }
|
| - set_q_register(Vd, src1);
|
| + case Neon8:
|
| + MinMax<uint8_t, kSimd128Size>(this, Vd, Vm, Vn, min);
|
| break;
|
| - }
|
| - case Neon16: {
|
| - uint16_t src1[8], src2[8];
|
| - get_q_register(Vn, src1);
|
| - get_q_register(Vm, src2);
|
| - for (int i = 0; i < 8; i++) {
|
| - src1[i] = MinMax(src1[i], src2[i], min);
|
| - }
|
| - set_q_register(Vd, src1);
|
| + case Neon16:
|
| + MinMax<uint16_t, kSimd128Size>(this, Vd, Vm, Vn, min);
|
| break;
|
| - }
|
| - case Neon32: {
|
| - uint32_t src1[4], src2[4];
|
| - get_q_register(Vn, src1);
|
| - get_q_register(Vm, src2);
|
| - for (int i = 0; i < 4; i++) {
|
| - src1[i] = MinMax(src1[i], src2[i], min);
|
| - }
|
| - set_q_register(Vd, src1);
|
| + case Neon32:
|
| + MinMax<uint32_t, kSimd128Size>(this, Vd, Vm, Vn, min);
|
| break;
|
| - }
|
| default:
|
| UNREACHABLE();
|
| break;
|
| @@ -4806,36 +4778,15 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
| // vsub.size Qd, Qm, Qn.
|
| NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
|
| switch (size) {
|
| - case Neon8: {
|
| - uint8_t src1[16], src2[16];
|
| - get_q_register(Vn, src1);
|
| - get_q_register(Vm, src2);
|
| - for (int i = 0; i < 16; i++) {
|
| - src1[i] -= src2[i];
|
| - }
|
| - set_q_register(Vd, src1);
|
| + case Neon8:
|
| + Sub<uint8_t, kSimd128Size>(this, Vd, Vm, Vn);
|
| break;
|
| - }
|
| - case Neon16: {
|
| - uint16_t src1[8], src2[8];
|
| - get_q_register(Vn, src1);
|
| - get_q_register(Vm, src2);
|
| - for (int i = 0; i < 8; i++) {
|
| - src1[i] -= src2[i];
|
| - }
|
| - set_q_register(Vd, src1);
|
| + case Neon16:
|
| + Sub<uint16_t, kSimd128Size>(this, Vd, Vm, Vn);
|
| break;
|
| - }
|
| - case Neon32: {
|
| - uint32_t src1[4], src2[4];
|
| - get_q_register(Vn, src1);
|
| - get_q_register(Vm, src2);
|
| - for (int i = 0; i < 4; i++) {
|
| - src1[i] -= src2[i];
|
| - }
|
| - set_q_register(Vd, src1);
|
| + case Neon32:
|
| + Sub<uint32_t, kSimd128Size>(this, Vd, Vm, Vn);
|
| break;
|
| - }
|
| default:
|
| UNREACHABLE();
|
| break;
|
| @@ -4844,36 +4795,15 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
| // vceq.size Qd, Qm, Qn.
|
| NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
|
| switch (size) {
|
| - case Neon8: {
|
| - uint8_t src1[16], src2[16];
|
| - get_q_register(Vn, src1);
|
| - get_q_register(Vm, src2);
|
| - for (int i = 0; i < 16; i++) {
|
| - src1[i] = (src1[i] == src2[i]) ? 0xFFu : 0;
|
| - }
|
| - set_q_register(Vd, src1);
|
| + case Neon8:
|
| + CompareEqual<uint8_t, kSimd128Size>(this, Vd, Vm, Vn);
|
| break;
|
| - }
|
| - case Neon16: {
|
| - uint16_t src1[8], src2[8];
|
| - get_q_register(Vn, src1);
|
| - get_q_register(Vm, src2);
|
| - for (int i = 0; i < 8; i++) {
|
| - src1[i] = (src1[i] == src2[i]) ? 0xFFFFu : 0;
|
| - }
|
| - set_q_register(Vd, src1);
|
| + case Neon16:
|
| + CompareEqual<uint16_t, kSimd128Size>(this, Vd, Vm, Vn);
|
| break;
|
| - }
|
| - case Neon32: {
|
| - uint32_t src1[4], src2[4];
|
| - get_q_register(Vn, src1);
|
| - get_q_register(Vm, src2);
|
| - for (int i = 0; i < 4; i++) {
|
| - src1[i] = (src1[i] == src2[i]) ? 0xFFFFFFFFu : 0;
|
| - }
|
| - set_q_register(Vd, src1);
|
| + case Neon32:
|
| + CompareEqual<uint32_t, kSimd128Size>(this, Vd, Vm, Vn);
|
| break;
|
| - }
|
| default:
|
| UNREACHABLE();
|
| break;
|
| @@ -4886,37 +4816,15 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
| NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
|
| bool min = instr->Bit(4) != 0;
|
| switch (size) {
|
| - case Neon8: {
|
| - uint8_t dst[8], src1[8], src2[8];
|
| - get_d_register(Vn, src1);
|
| - get_d_register(Vm, src2);
|
| - for (int i = 0; i < 4; i++) {
|
| - dst[i + 0] = MinMax(src1[i * 2], src1[i * 2 + 1], min);
|
| - dst[i + 4] = MinMax(src2[i * 2], src2[i * 2 + 1], min);
|
| - }
|
| - set_d_register(Vd, dst);
|
| + case Neon8:
|
| + PairwiseMinMax<uint8_t>(this, Vd, Vm, Vn, min);
|
| break;
|
| - }
|
| - case Neon16: {
|
| - uint16_t dst[4], src1[4], src2[4];
|
| - get_d_register(Vn, src1);
|
| - get_d_register(Vm, src2);
|
| - for (int i = 0; i < 2; i++) {
|
| - dst[i + 0] = MinMax(src1[i * 2], src1[i * 2 + 1], min);
|
| - dst[i + 2] = MinMax(src2[i * 2], src2[i * 2 + 1], min);
|
| - }
|
| - set_d_register(Vd, dst);
|
| + case Neon16:
|
| + PairwiseMinMax<uint16_t>(this, Vd, Vm, Vn, min);
|
| break;
|
| - }
|
| - case Neon32: {
|
| - uint32_t dst[2], src1[2], src2[2];
|
| - get_d_register(Vn, src1);
|
| - get_d_register(Vm, src2);
|
| - dst[0] = MinMax(src1[0], src1[1], min);
|
| - dst[1] = MinMax(src2[0], src2[1], min);
|
| - set_d_register(Vd, dst);
|
| + case Neon32:
|
| + PairwiseMinMax<uint32_t>(this, Vd, Vm, Vn, min);
|
| break;
|
| - }
|
| default:
|
| UNREACHABLE();
|
| break;
|
| @@ -4927,12 +4835,12 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
| if (instr->Bit(21) == 0 && instr->Bit(6) == 1 && instr->Bit(4) == 1) {
|
| // vmul.f32 Qd, Qn, Qm
|
| float src1[4], src2[4];
|
| - get_q_register(Vn, src1);
|
| - get_q_register(Vm, src2);
|
| + get_neon_register(Vn, src1);
|
| + get_neon_register(Vm, src2);
|
| for (int i = 0; i < 4; i++) {
|
| src1[i] = src1[i] * src2[i];
|
| }
|
| - set_q_register(Vd, src1);
|
| + set_neon_register(Vd, src1);
|
| } else {
|
| UNIMPLEMENTED();
|
| }
|
| @@ -4943,8 +4851,8 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
| // vcge/vcgt.f32 Qd, Qm, Qn
|
| bool ge = instr->Bit(21) == 0;
|
| float src1[4], src2[4];
|
| - get_q_register(Vn, src1);
|
| - get_q_register(Vm, src2);
|
| + get_neon_register(Vn, src1);
|
| + get_neon_register(Vm, src2);
|
| uint32_t dst[4];
|
| for (int i = 0; i < 4; i++) {
|
| if (ge) {
|
| @@ -4953,7 +4861,7 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
| dst[i] = src1[i] > src2[i] ? 0xFFFFFFFFu : 0;
|
| }
|
| }
|
| - set_q_register(Vd, dst);
|
| + set_neon_register(Vd, dst);
|
| } else {
|
| UNIMPLEMENTED();
|
| }
|
| @@ -4994,7 +4902,7 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
| int Vd = instr->VFPDRegValue(kSimd128Precision);
|
| int Vm = instr->VFPMRegValue(kSimd128Precision);
|
| uint32_t q_data[4];
|
| - get_q_register(Vm, q_data);
|
| + get_neon_register(Vm, q_data);
|
| int op = instr->Bits(8, 7);
|
| for (int i = 0; i < 4; i++) {
|
| switch (op) {
|
| @@ -5020,7 +4928,7 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
| break;
|
| }
|
| }
|
| - set_q_register(Vd, q_data);
|
| + set_neon_register(Vd, q_data);
|
| } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 7) == 0) {
|
| if (instr->Bit(6) == 0) {
|
| // vswp Dd, Dm.
|
| @@ -5036,10 +4944,10 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
| uint32_t dval[4], mval[4];
|
| int vd = instr->VFPDRegValue(kSimd128Precision);
|
| int vm = instr->VFPMRegValue(kSimd128Precision);
|
| - get_q_register(vd, dval);
|
| - get_q_register(vm, mval);
|
| - set_q_register(vm, dval);
|
| - set_q_register(vd, mval);
|
| + get_neon_register(vd, dval);
|
| + get_neon_register(vm, mval);
|
| + set_neon_register(vm, dval);
|
| + set_neon_register(vd, mval);
|
| }
|
| } else if (instr->Bits(11, 7) == 0x18) {
|
| // vdup.32 Qd, Sm.
|
| @@ -5049,15 +4957,15 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
| uint32_t s_data = get_s_register(vm * 2 + index);
|
| uint32_t q_data[4];
|
| for (int i = 0; i < 4; i++) q_data[i] = s_data;
|
| - set_q_register(vd, q_data);
|
| + set_neon_register(vd, q_data);
|
| } else if (instr->Bits(19, 16) == 0 && instr->Bits(11, 6) == 0x17) {
|
| // vmvn Qd, Qm.
|
| int vd = instr->VFPDRegValue(kSimd128Precision);
|
| int vm = instr->VFPMRegValue(kSimd128Precision);
|
| uint32_t q_data[4];
|
| - get_q_register(vm, q_data);
|
| + get_neon_register(vm, q_data);
|
| for (int i = 0; i < 4; i++) q_data[i] = ~q_data[i];
|
| - set_q_register(vd, q_data);
|
| + set_neon_register(vd, q_data);
|
| } else if (instr->Bits(11, 10) == 0x2) {
|
| // vtb[l,x] Dd, <list>, Dm.
|
| int vd = instr->VFPDRegValue(kDoublePrecision);
|
| @@ -5082,108 +4990,79 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
| }
|
| }
|
| set_d_register(vd, &result);
|
| - } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 8) == 0x1 &&
|
| - instr->Bit(6) == 1) {
|
| + } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 8) == 0x1) {
|
| NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18));
|
| - int Vd = instr->VFPDRegValue(kSimd128Precision);
|
| - int Vm = instr->VFPMRegValue(kSimd128Precision);
|
| - if (instr->Bit(7) == 1) {
|
| - // vzip.<size> Qd, Qm.
|
| - switch (size) {
|
| - case Neon8: {
|
| - uint8_t src1[16], src2[16], dst1[16], dst2[16];
|
| - get_q_register(Vd, src1);
|
| - get_q_register(Vm, src2);
|
| - for (int i = 0; i < 8; i++) {
|
| - dst1[i * 2] = src1[i];
|
| - dst1[i * 2 + 1] = src2[i];
|
| - dst2[i * 2] = src1[i + 8];
|
| - dst2[i * 2 + 1] = src2[i + 8];
|
| - }
|
| - set_q_register(Vd, dst1);
|
| - set_q_register(Vm, dst2);
|
| - break;
|
| - }
|
| - case Neon16: {
|
| - uint16_t src1[8], src2[8], dst1[8], dst2[8];
|
| - get_q_register(Vd, src1);
|
| - get_q_register(Vm, src2);
|
| - for (int i = 0; i < 4; i++) {
|
| - dst1[i * 2] = src1[i];
|
| - dst1[i * 2 + 1] = src2[i];
|
| - dst2[i * 2] = src1[i + 4];
|
| - dst2[i * 2 + 1] = src2[i + 4];
|
| - }
|
| - set_q_register(Vd, dst1);
|
| - set_q_register(Vm, dst2);
|
| - break;
|
| + if (instr->Bit(6) == 0) {
|
| + int Vd = instr->VFPDRegValue(kDoublePrecision);
|
| + int Vm = instr->VFPMRegValue(kDoublePrecision);
|
| + if (instr->Bit(7) == 1) {
|
| + // vzip.<size> Dd, Dm.
|
| + switch (size) {
|
| + case Neon8:
|
| + Zip<uint8_t, kDoubleSize>(this, Vd, Vm);
|
| + break;
|
| + case Neon16:
|
| + Zip<uint16_t, kDoubleSize>(this, Vd, Vm);
|
| + break;
|
| + case Neon32:
|
| + UNIMPLEMENTED();
|
| + break;
|
| + default:
|
| + UNREACHABLE();
|
| + break;
|
| }
|
| - case Neon32: {
|
| - uint32_t src1[4], src2[4], dst1[4], dst2[4];
|
| - get_q_register(Vd, src1);
|
| - get_q_register(Vm, src2);
|
| - for (int i = 0; i < 2; i++) {
|
| - dst1[i * 2] = src1[i];
|
| - dst1[i * 2 + 1] = src2[i];
|
| - dst2[i * 2] = src1[i + 2];
|
| - dst2[i * 2 + 1] = src2[i + 2];
|
| - }
|
| - set_q_register(Vd, dst1);
|
| - set_q_register(Vm, dst2);
|
| - break;
|
| + } else {
|
| + // vuzp.<size> Dd, Dm.
|
| + switch (size) {
|
| + case Neon8:
|
| + Unzip<uint8_t, kDoubleSize>(this, Vd, Vm);
|
| + break;
|
| + case Neon16:
|
| + Unzip<uint16_t, kDoubleSize>(this, Vd, Vm);
|
| + break;
|
| + case Neon32:
|
| + UNIMPLEMENTED();
|
| + break;
|
| + default:
|
| + UNREACHABLE();
|
| + break;
|
| }
|
| - default:
|
| - UNREACHABLE();
|
| - break;
|
| }
|
| } else {
|
| - // vuzp.<size> Qd, Qm.
|
| - switch (size) {
|
| - case Neon8: {
|
| - uint8_t src1[16], src2[16], dst1[16], dst2[16];
|
| - get_q_register(Vd, src1);
|
| - get_q_register(Vm, src2);
|
| - for (int i = 0; i < 8; i++) {
|
| - dst1[i] = src1[i * 2];
|
| - dst1[i + 8] = src2[i * 2];
|
| - dst2[i] = src1[i * 2 + 1];
|
| - dst2[i + 8] = src2[i * 2 + 1];
|
| - }
|
| - set_q_register(Vd, dst1);
|
| - set_q_register(Vm, dst2);
|
| - break;
|
| - }
|
| - case Neon16: {
|
| - uint16_t src1[8], src2[8], dst1[8], dst2[8];
|
| - get_q_register(Vd, src1);
|
| - get_q_register(Vm, src2);
|
| - for (int i = 0; i < 4; i++) {
|
| - dst1[i] = src1[i * 2];
|
| - dst1[i + 4] = src2[i * 2];
|
| - dst2[i] = src1[i * 2 + 1];
|
| - dst2[i + 4] = src2[i * 2 + 1];
|
| - }
|
| - set_q_register(Vd, dst1);
|
| - set_q_register(Vm, dst2);
|
| - break;
|
| + int Vd = instr->VFPDRegValue(kSimd128Precision);
|
| + int Vm = instr->VFPMRegValue(kSimd128Precision);
|
| + if (instr->Bit(7) == 1) {
|
| + // vzip.<size> Qd, Qm.
|
| + switch (size) {
|
| + case Neon8:
|
| + Zip<uint8_t, kSimd128Size>(this, Vd, Vm);
|
| + break;
|
| + case Neon16:
|
| + Zip<uint16_t, kSimd128Size>(this, Vd, Vm);
|
| + break;
|
| + case Neon32:
|
| + Zip<uint32_t, kSimd128Size>(this, Vd, Vm);
|
| + break;
|
| + default:
|
| + UNREACHABLE();
|
| + break;
|
| }
|
| - case Neon32: {
|
| - uint32_t src1[4], src2[4], dst1[4], dst2[4];
|
| - get_q_register(Vd, src1);
|
| - get_q_register(Vm, src2);
|
| - for (int i = 0; i < 2; i++) {
|
| - dst1[i] = src1[i * 2];
|
| - dst1[i + 2] = src2[i * 2];
|
| - dst2[i] = src1[i * 2 + 1];
|
| - dst2[i + 2] = src2[i * 2 + 1];
|
| - }
|
| - set_q_register(Vd, dst1);
|
| - set_q_register(Vm, dst2);
|
| - break;
|
| + } else {
|
| + // vuzp.<size> Qd, Qm.
|
| + switch (size) {
|
| + case Neon8:
|
| + Unzip<uint8_t, kSimd128Size>(this, Vd, Vm);
|
| + break;
|
| + case Neon16:
|
| + Unzip<uint16_t, kSimd128Size>(this, Vd, Vm);
|
| + break;
|
| + case Neon32:
|
| + Unzip<uint32_t, kSimd128Size>(this, Vd, Vm);
|
| + break;
|
| + default:
|
| + UNREACHABLE();
|
| + break;
|
| }
|
| - default:
|
| - UNREACHABLE();
|
| - break;
|
| }
|
| }
|
| } else if (instr->Bits(17, 16) == 0 && instr->Bits(11, 9) == 0) {
|
| @@ -5197,32 +5076,32 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
| case Neon16: {
|
| DCHECK_EQ(Neon8, size);
|
| uint8_t src[16];
|
| - get_q_register(Vm, src);
|
| + get_neon_register(Vm, src);
|
| for (int i = 0; i < 16; i += 2) {
|
| std::swap(src[i], src[i + 1]);
|
| }
|
| - set_q_register(Vd, src);
|
| + set_neon_register(Vd, src);
|
| break;
|
| }
|
| case Neon32: {
|
| switch (size) {
|
| case Neon16: {
|
| uint16_t src[8];
|
| - get_q_register(Vm, src);
|
| + get_neon_register(Vm, src);
|
| for (int i = 0; i < 8; i += 2) {
|
| std::swap(src[i], src[i + 1]);
|
| }
|
| - set_q_register(Vd, src);
|
| + set_neon_register(Vd, src);
|
| break;
|
| }
|
| case Neon8: {
|
| uint8_t src[16];
|
| - get_q_register(Vm, src);
|
| + get_neon_register(Vm, src);
|
| for (int i = 0; i < 4; i++) {
|
| std::swap(src[i * 4], src[i * 4 + 3]);
|
| std::swap(src[i * 4 + 1], src[i * 4 + 2]);
|
| }
|
| - set_q_register(Vd, src);
|
| + set_neon_register(Vd, src);
|
| break;
|
| }
|
| default:
|
| @@ -5235,30 +5114,30 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
| switch (size) {
|
| case Neon32: {
|
| uint32_t src[4];
|
| - get_q_register(Vm, src);
|
| + get_neon_register(Vm, src);
|
| std::swap(src[0], src[1]);
|
| std::swap(src[2], src[3]);
|
| - set_q_register(Vd, src);
|
| + set_neon_register(Vd, src);
|
| break;
|
| }
|
| case Neon16: {
|
| uint16_t src[8];
|
| - get_q_register(Vm, src);
|
| + get_neon_register(Vm, src);
|
| for (int i = 0; i < 4; i++) {
|
| std::swap(src[i * 4], src[i * 4 + 3]);
|
| std::swap(src[i * 4 + 1], src[i * 4 + 2]);
|
| }
|
| - set_q_register(Vd, src);
|
| + set_neon_register(Vd, src);
|
| break;
|
| }
|
| case Neon8: {
|
| uint8_t src[16];
|
| - get_q_register(Vm, src);
|
| + get_neon_register(Vm, src);
|
| for (int i = 0; i < 4; i++) {
|
| std::swap(src[i], src[7 - i]);
|
| std::swap(src[i + 8], src[15 - i]);
|
| }
|
| - set_q_register(Vd, src);
|
| + set_neon_register(Vd, src);
|
| break;
|
| }
|
| default:
|
| @@ -5271,48 +5150,44 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
| UNREACHABLE();
|
| break;
|
| }
|
| - } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 6) == 0x3) {
|
| - int Vd = instr->VFPDRegValue(kSimd128Precision);
|
| - int Vm = instr->VFPMRegValue(kSimd128Precision);
|
| + } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 7) == 0x1) {
|
| NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18));
|
| - // vtrn.<size> Qd, Qm.
|
| - switch (size) {
|
| - case Neon8: {
|
| - uint8_t src[16], dst[16];
|
| - get_q_register(Vd, dst);
|
| - get_q_register(Vm, src);
|
| - for (int i = 0; i < 8; i++) {
|
| - std::swap(dst[2 * i + 1], src[2 * i]);
|
| - }
|
| - set_q_register(Vd, dst);
|
| - set_q_register(Vm, src);
|
| - break;
|
| - }
|
| - case Neon16: {
|
| - uint16_t src[8], dst[8];
|
| - get_q_register(Vd, dst);
|
| - get_q_register(Vm, src);
|
| - for (int i = 0; i < 4; i++) {
|
| - std::swap(dst[2 * i + 1], src[2 * i]);
|
| - }
|
| - set_q_register(Vd, dst);
|
| - set_q_register(Vm, src);
|
| - break;
|
| + if (instr->Bit(6) == 0) {
|
| + int Vd = instr->VFPDRegValue(kDoublePrecision);
|
| + int Vm = instr->VFPMRegValue(kDoublePrecision);
|
| + // vtrn.<size> Dd, Dm.
|
| + switch (size) {
|
| + case Neon8:
|
| + Transpose<uint8_t, kDoubleSize>(this, Vd, Vm);
|
| + break;
|
| + case Neon16:
|
| + Transpose<uint16_t, kDoubleSize>(this, Vd, Vm);
|
| + break;
|
| + case Neon32:
|
| + Transpose<uint32_t, kDoubleSize>(this, Vd, Vm);
|
| + break;
|
| + default:
|
| + UNREACHABLE();
|
| + break;
|
| }
|
| - case Neon32: {
|
| - uint32_t src[4], dst[4];
|
| - get_q_register(Vd, dst);
|
| - get_q_register(Vm, src);
|
| - for (int i = 0; i < 2; i++) {
|
| - std::swap(dst[2 * i + 1], src[2 * i]);
|
| - }
|
| - set_q_register(Vd, dst);
|
| - set_q_register(Vm, src);
|
| - break;
|
| + } else {
|
| + int Vd = instr->VFPDRegValue(kSimd128Precision);
|
| + int Vm = instr->VFPMRegValue(kSimd128Precision);
|
| + // vtrn.<size> Qd, Qm.
|
| + switch (size) {
|
| + case Neon8:
|
| + Transpose<uint8_t, kSimd128Size>(this, Vd, Vm);
|
| + break;
|
| + case Neon16:
|
| + Transpose<uint16_t, kSimd128Size>(this, Vd, Vm);
|
| + break;
|
| + case Neon32:
|
| + Transpose<uint32_t, kSimd128Size>(this, Vd, Vm);
|
| + break;
|
| + default:
|
| + UNREACHABLE();
|
| + break;
|
| }
|
| - default:
|
| - UNREACHABLE();
|
| - break;
|
| }
|
| } else if (instr->Bits(17, 16) == 0x1 && instr->Bit(11) == 0) {
|
| int Vd = instr->VFPDRegValue(kSimd128Precision);
|
| @@ -5323,41 +5198,23 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
| if (instr->Bit(10) != 0) {
|
| // floating point (clear sign bits)
|
| uint32_t src[4];
|
| - get_q_register(Vm, src);
|
| + get_neon_register(Vm, src);
|
| for (int i = 0; i < 4; i++) {
|
| src[i] &= ~0x80000000;
|
| }
|
| - set_q_register(Vd, src);
|
| + set_neon_register(Vd, src);
|
| } else {
|
| // signed integer
|
| switch (size) {
|
| - case Neon8: {
|
| - int8_t src[16];
|
| - get_q_register(Vm, src);
|
| - for (int i = 0; i < 16; i++) {
|
| - src[i] = std::abs(src[i]);
|
| - }
|
| - set_q_register(Vd, src);
|
| + case Neon8:
|
| + Abs<int8_t, kSimd128Size>(this, Vd, Vm);
|
| break;
|
| - }
|
| - case Neon16: {
|
| - int16_t src[8];
|
| - get_q_register(Vm, src);
|
| - for (int i = 0; i < 8; i++) {
|
| - src[i] = std::abs(src[i]);
|
| - }
|
| - set_q_register(Vd, src);
|
| + case Neon16:
|
| + Abs<int16_t, kSimd128Size>(this, Vd, Vm);
|
| break;
|
| - }
|
| - case Neon32: {
|
| - int32_t src[4];
|
| - get_q_register(Vm, src);
|
| - for (int i = 0; i < 4; i++) {
|
| - src[i] = std::abs(src[i]);
|
| - }
|
| - set_q_register(Vd, src);
|
| + case Neon32:
|
| + Abs<int32_t, kSimd128Size>(this, Vd, Vm);
|
| break;
|
| - }
|
| default:
|
| UNIMPLEMENTED();
|
| break;
|
| @@ -5368,40 +5225,23 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
| if (instr->Bit(10) != 0) {
|
| // floating point (toggle sign bits)
|
| uint32_t src[4];
|
| - get_q_register(Vm, src);
|
| + get_neon_register(Vm, src);
|
| for (int i = 0; i < 4; i++) {
|
| src[i] ^= 0x80000000;
|
| }
|
| - set_q_register(Vd, src);
|
| + set_neon_register(Vd, src);
|
| } else {
|
| // signed integer
|
| switch (size) {
|
| - case Neon8: {
|
| - int8_t src[16];
|
| - get_q_register(Vm, src);
|
| - for (int i = 0; i < 16; i++) {
|
| - src[i] = -src[i];
|
| - }
|
| - set_q_register(Vd, src);
|
| + case Neon8:
|
| + Neg<int8_t, kSimd128Size>(this, Vd, Vm);
|
| break;
|
| - }
|
| case Neon16:
|
| - int16_t src[8];
|
| - get_q_register(Vm, src);
|
| - for (int i = 0; i < 8; i++) {
|
| - src[i] = -src[i];
|
| - }
|
| - set_q_register(Vd, src);
|
| + Neg<int16_t, kSimd128Size>(this, Vd, Vm);
|
| break;
|
| - case Neon32: {
|
| - int32_t src[4];
|
| - get_q_register(Vm, src);
|
| - for (int i = 0; i < 4; i++) {
|
| - src[i] = -src[i];
|
| - }
|
| - set_q_register(Vd, src);
|
| + case Neon32:
|
| + Neg<int32_t, kSimd128Size>(this, Vd, Vm);
|
| break;
|
| - }
|
| default:
|
| UNIMPLEMENTED();
|
| break;
|
| @@ -5415,7 +5255,7 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
| int Vd = instr->VFPDRegValue(kSimd128Precision);
|
| int Vm = instr->VFPMRegValue(kSimd128Precision);
|
| uint32_t src[4];
|
| - get_q_register(Vm, src);
|
| + get_neon_register(Vm, src);
|
| if (instr->Bit(7) == 0) {
|
| for (int i = 0; i < 4; i++) {
|
| float denom = bit_cast<float>(src[i]);
|
| @@ -5433,7 +5273,7 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
| src[i] = bit_cast<uint32_t>(result);
|
| }
|
| }
|
| - set_q_register(Vd, src);
|
| + set_neon_register(Vd, src);
|
| } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 8) == 0x2 &&
|
| instr->Bits(7, 6) != 0) {
|
| // vqmovn.<type><size> Dd, Qm.
|
| @@ -5481,33 +5321,15 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
| int Vm = instr->VFPMRegValue(kSimd128Precision);
|
| NeonSize ns = static_cast<NeonSize>(size / 16);
|
| switch (ns) {
|
| - case Neon8: {
|
| - uint8_t src[16];
|
| - get_q_register(Vm, src);
|
| - for (int i = 0; i < 16; i++) {
|
| - src[i] >>= shift;
|
| - }
|
| - set_q_register(Vd, src);
|
| + case Neon8:
|
| + ShiftRight<uint8_t, kSimd128Size>(this, Vd, Vm, shift);
|
| break;
|
| - }
|
| - case Neon16: {
|
| - uint16_t src[8];
|
| - get_q_register(Vm, src);
|
| - for (int i = 0; i < 8; i++) {
|
| - src[i] >>= shift;
|
| - }
|
| - set_q_register(Vd, src);
|
| + case Neon16:
|
| + ShiftRight<uint16_t, kSimd128Size>(this, Vd, Vm, shift);
|
| break;
|
| - }
|
| - case Neon32: {
|
| - uint32_t src[4];
|
| - get_q_register(Vm, src);
|
| - for (int i = 0; i < 4; i++) {
|
| - src[i] >>= shift;
|
| - }
|
| - set_q_register(Vd, src);
|
| + case Neon32:
|
| + ShiftRight<uint32_t, kSimd128Size>(this, Vd, Vm, shift);
|
| break;
|
| - }
|
| default:
|
| UNREACHABLE();
|
| break;
|
|
|