| OLD | NEW |
| 1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include <stdarg.h> | 5 #include <stdarg.h> |
| 6 #include <stdlib.h> | 6 #include <stdlib.h> |
| 7 #include <cmath> | 7 #include <cmath> |
| 8 | 8 |
| 9 #if V8_TARGET_ARCH_ARM | 9 #if V8_TARGET_ARCH_ARM |
| 10 | 10 |
| (...skipping 878 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 889 DCHECK((dreg >= 0) && (dreg < DwVfpRegister::NumRegisters())); | 889 DCHECK((dreg >= 0) && (dreg < DwVfpRegister::NumRegisters())); |
| 890 memcpy(value, vfp_registers_ + dreg * 2, sizeof(*value) * 2); | 890 memcpy(value, vfp_registers_ + dreg * 2, sizeof(*value) * 2); |
| 891 } | 891 } |
| 892 | 892 |
| 893 | 893 |
| 894 void Simulator::set_d_register(int dreg, const uint32_t* value) { | 894 void Simulator::set_d_register(int dreg, const uint32_t* value) { |
| 895 DCHECK((dreg >= 0) && (dreg < DwVfpRegister::NumRegisters())); | 895 DCHECK((dreg >= 0) && (dreg < DwVfpRegister::NumRegisters())); |
| 896 memcpy(vfp_registers_ + dreg * 2, value, sizeof(*value) * 2); | 896 memcpy(vfp_registers_ + dreg * 2, value, sizeof(*value) * 2); |
| 897 } | 897 } |
| 898 | 898 |
| 899 template <typename T> | 899 template <typename T, int SIZE> |
| 900 void Simulator::get_d_register(int dreg, T* value) { | 900 void Simulator::get_neon_register(int reg, T (&value)[SIZE / sizeof(T)]) { |
| 901 DCHECK((dreg >= 0) && (dreg < num_d_registers)); | 901 DCHECK(SIZE == kSimd128Size || SIZE == kDoubleSize); |
| 902 memcpy(value, vfp_registers_ + dreg * 2, kDoubleSize); | 902 DCHECK_LE(0, reg); |
| 903 DCHECK_GT(SIZE == kSimd128Size ? num_q_registers : num_d_registers, reg); |
| 904 memcpy(value, vfp_registers_ + reg * (SIZE / 4), SIZE); |
| 903 } | 905 } |
| 904 | 906 |
| 905 template <typename T> | 907 template <typename T, int SIZE> |
| 906 void Simulator::set_d_register(int dreg, const T* value) { | 908 void Simulator::set_neon_register(int reg, const T (&value)[SIZE / sizeof(T)]) { |
| 907 DCHECK((dreg >= 0) && (dreg < num_d_registers)); | 909 DCHECK(SIZE == kSimd128Size || SIZE == kDoubleSize); |
| 908 memcpy(vfp_registers_ + dreg * 2, value, kDoubleSize); | 910 DCHECK_LE(0, reg); |
| 909 } | 911 DCHECK_GT(SIZE == kSimd128Size ? num_q_registers : num_d_registers, reg); |
| 910 | 912 memcpy(vfp_registers_ + reg * (SIZE / 4), value, SIZE); |
| 911 template <typename T> | |
| 912 void Simulator::get_q_register(int qreg, T* value) { | |
| 913 DCHECK((qreg >= 0) && (qreg < num_q_registers)); | |
| 914 memcpy(value, vfp_registers_ + qreg * 4, kSimd128Size); | |
| 915 } | |
| 916 | |
| 917 template <typename T> | |
| 918 void Simulator::set_q_register(int qreg, const T* value) { | |
| 919 DCHECK((qreg >= 0) && (qreg < num_q_registers)); | |
| 920 memcpy(vfp_registers_ + qreg * 4, value, kSimd128Size); | |
| 921 } | 913 } |
| 922 | 914 |
| 923 // Raw access to the PC register. | 915 // Raw access to the PC register. |
| 924 void Simulator::set_pc(int32_t value) { | 916 void Simulator::set_pc(int32_t value) { |
| 925 pc_modified_ = true; | 917 pc_modified_ = true; |
| 926 registers_[pc] = value; | 918 registers_[pc] = value; |
| 927 } | 919 } |
| 928 | 920 |
| 929 | 921 |
| 930 bool Simulator::has_bad_pc() const { | 922 bool Simulator::has_bad_pc() const { |
| (...skipping 2570 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3501 case Neon32: { | 3493 case Neon32: { |
| 3502 for (int i = 0; i < 4; i++) { | 3494 for (int i = 0; i < 4; i++) { |
| 3503 q_data[i] = rt_value; | 3495 q_data[i] = rt_value; |
| 3504 } | 3496 } |
| 3505 break; | 3497 break; |
| 3506 } | 3498 } |
| 3507 default: | 3499 default: |
| 3508 UNREACHABLE(); | 3500 UNREACHABLE(); |
| 3509 break; | 3501 break; |
| 3510 } | 3502 } |
| 3511 set_q_register(vd, q_data); | 3503 set_neon_register(vd, q_data); |
| 3512 } | 3504 } |
| 3513 } else if ((instr->VLValue() == 0x1) && (instr->VCValue() == 0x1)) { | 3505 } else if ((instr->VLValue() == 0x1) && (instr->VCValue() == 0x1)) { |
| 3514 // vmov (scalar to ARM core register) | 3506 // vmov (scalar to ARM core register) |
| 3515 int vn = instr->VFPNRegValue(kDoublePrecision); | 3507 int vn = instr->VFPNRegValue(kDoublePrecision); |
| 3516 int rt = instr->RtValue(); | 3508 int rt = instr->RtValue(); |
| 3517 int opc1_opc2 = (instr->Bits(22, 21) << 2) | instr->Bits(6, 5); | 3509 int opc1_opc2 = (instr->Bits(22, 21) << 2) | instr->Bits(6, 5); |
| 3518 uint64_t data; | 3510 uint64_t data; |
| 3519 get_d_register(vn, &data); | 3511 get_d_register(vn, &data); |
| 3520 if ((opc1_opc2 & 0xb) == 0) { | 3512 if ((opc1_opc2 & 0xb) == 0) { |
| 3521 // NeonS32 / NeonU32 | 3513 // NeonS32 / NeonU32 |
| (...skipping 466 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3988 break; | 3980 break; |
| 3989 default: | 3981 default: |
| 3990 UNIMPLEMENTED(); // Not used by V8. | 3982 UNIMPLEMENTED(); // Not used by V8. |
| 3991 } | 3983 } |
| 3992 } else { | 3984 } else { |
| 3993 UNIMPLEMENTED(); // Not used by V8. | 3985 UNIMPLEMENTED(); // Not used by V8. |
| 3994 } | 3986 } |
| 3995 } | 3987 } |
| 3996 | 3988 |
| 3997 // Templated operations for NEON instructions. | 3989 // Templated operations for NEON instructions. |
| 3998 // TODO(bbudge) Add more templates for use in DecodeSpecialCondition. | |
| 3999 template <typename T, typename U> | 3990 template <typename T, typename U> |
| 4000 U Widen(T value) { | 3991 U Widen(T value) { |
| 4001 static_assert(sizeof(int64_t) > sizeof(T), "T must be int32_t or smaller"); | 3992 static_assert(sizeof(int64_t) > sizeof(T), "T must be int32_t or smaller"); |
| 4002 static_assert(sizeof(U) > sizeof(T), "T must smaller than U"); | 3993 static_assert(sizeof(U) > sizeof(T), "T must smaller than U"); |
| 4003 return static_cast<U>(value); | 3994 return static_cast<U>(value); |
| 4004 } | 3995 } |
| 4005 | 3996 |
| 4006 template <typename T, typename U> | 3997 template <typename T, typename U> |
| 4007 U Narrow(T value) { | 3998 U Narrow(T value) { |
| 4008 static_assert(sizeof(int8_t) < sizeof(T), "T must be int16_t or larger"); | 3999 static_assert(sizeof(int8_t) < sizeof(T), "T must be int16_t or larger"); |
| 4009 static_assert(sizeof(U) < sizeof(T), "T must larger than U"); | 4000 static_assert(sizeof(U) < sizeof(T), "T must larger than U"); |
| 4010 static_assert(std::is_unsigned<T>() == std::is_unsigned<U>(), | 4001 static_assert(std::is_unsigned<T>() == std::is_unsigned<U>(), |
| 4011 "Signed-ness of T and U must match"); | 4002 "Signed-ness of T and U must match"); |
| 4012 // Make sure value can be expressed in the smaller type; otherwise, the | 4003 // Make sure value can be expressed in the smaller type; otherwise, the |
| 4013 // casted result is implementation defined. | 4004 // casted result is implementation defined. |
| 4014 DCHECK_LE(std::numeric_limits<T>::min(), value); | 4005 DCHECK_LE(std::numeric_limits<T>::min(), value); |
| 4015 DCHECK_GE(std::numeric_limits<T>::max(), value); | 4006 DCHECK_GE(std::numeric_limits<T>::max(), value); |
| 4016 return static_cast<U>(value); | 4007 return static_cast<U>(value); |
| 4017 } | 4008 } |
| 4018 | 4009 |
| 4019 template <typename T> | 4010 template <typename T> |
| 4020 T Clamp(int64_t value) { | 4011 T Clamp(int64_t value) { |
| 4021 static_assert(sizeof(int64_t) > sizeof(T), "T must be int32_t or smaller"); | 4012 static_assert(sizeof(int64_t) > sizeof(T), "T must be int32_t or smaller"); |
| 4022 int64_t min = static_cast<int64_t>(std::numeric_limits<T>::min()); | 4013 int64_t min = static_cast<int64_t>(std::numeric_limits<T>::min()); |
| 4023 int64_t max = static_cast<int64_t>(std::numeric_limits<T>::max()); | 4014 int64_t max = static_cast<int64_t>(std::numeric_limits<T>::max()); |
| 4024 int64_t clamped = std::max(min, std::min(max, value)); | 4015 int64_t clamped = std::max(min, std::min(max, value)); |
| 4025 return static_cast<T>(clamped); | 4016 return static_cast<T>(clamped); |
| 4026 } | 4017 } |
| 4027 | 4018 |
| 4028 template <typename T> | |
| 4029 T MinMax(T a, T b, bool is_min) { | |
| 4030 return is_min ? std::min(a, b) : std::max(a, b); | |
| 4031 } | |
| 4032 | |
| 4033 template <typename T, typename U> | 4019 template <typename T, typename U> |
| 4034 void Widen(Simulator* simulator, int Vd, int Vm) { | 4020 void Widen(Simulator* simulator, int Vd, int Vm) { |
| 4035 static const int kLanes = 8 / sizeof(T); | 4021 static const int kLanes = 8 / sizeof(T); |
| 4036 T src[kLanes]; | 4022 T src[kLanes]; |
| 4037 U dst[kLanes]; | 4023 U dst[kLanes]; |
| 4038 simulator->get_d_register(Vm, src); | 4024 simulator->get_neon_register<T, kDoubleSize>(Vm, src); |
| 4039 for (int i = 0; i < kLanes; i++) { | 4025 for (int i = 0; i < kLanes; i++) { |
| 4040 dst[i] = Widen<T, U>(src[i]); | 4026 dst[i] = Widen<T, U>(src[i]); |
| 4041 } | 4027 } |
| 4042 simulator->set_q_register(Vd, dst); | 4028 simulator->set_neon_register(Vd, dst); |
| 4029 } |
| 4030 |
| 4031 template <typename T, int SIZE> |
| 4032 void Abs(Simulator* simulator, int Vd, int Vm) { |
| 4033 static const int kElems = SIZE / sizeof(T); |
| 4034 T src[kElems]; |
| 4035 simulator->get_neon_register<T, SIZE>(Vm, src); |
| 4036 for (int i = 0; i < kElems; i++) { |
| 4037 src[i] = std::abs(src[i]); |
| 4038 } |
| 4039 simulator->set_neon_register<T, SIZE>(Vd, src); |
| 4040 } |
| 4041 |
| 4042 template <typename T, int SIZE> |
| 4043 void Neg(Simulator* simulator, int Vd, int Vm) { |
| 4044 static const int kElems = SIZE / sizeof(T); |
| 4045 T src[kElems]; |
| 4046 simulator->get_neon_register<T, SIZE>(Vm, src); |
| 4047 for (int i = 0; i < kElems; i++) { |
| 4048 src[i] = -src[i]; |
| 4049 } |
| 4050 simulator->set_neon_register<T, SIZE>(Vd, src); |
| 4043 } | 4051 } |
| 4044 | 4052 |
| 4045 template <typename T, typename U> | 4053 template <typename T, typename U> |
| 4046 void SaturatingNarrow(Simulator* simulator, int Vd, int Vm) { | 4054 void SaturatingNarrow(Simulator* simulator, int Vd, int Vm) { |
| 4047 static const int kLanes = 16 / sizeof(T); | 4055 static const int kLanes = 16 / sizeof(T); |
| 4048 T src[kLanes]; | 4056 T src[kLanes]; |
| 4049 U dst[kLanes]; | 4057 U dst[kLanes]; |
| 4050 simulator->get_q_register(Vm, src); | 4058 simulator->get_neon_register(Vm, src); |
| 4051 for (int i = 0; i < kLanes; i++) { | 4059 for (int i = 0; i < kLanes; i++) { |
| 4052 dst[i] = Narrow<T, U>(Clamp<U>(src[i])); | 4060 dst[i] = Narrow<T, U>(Clamp<U>(src[i])); |
| 4053 } | 4061 } |
| 4054 simulator->set_d_register(Vd, dst); | 4062 simulator->set_neon_register<U, kDoubleSize>(Vd, dst); |
| 4055 } | 4063 } |
| 4056 | 4064 |
| 4057 template <typename T> | 4065 template <typename T> |
| 4058 void AddSaturate(Simulator* simulator, int Vd, int Vm, int Vn) { | 4066 void AddSaturate(Simulator* simulator, int Vd, int Vm, int Vn) { |
| 4059 static const int kLanes = 16 / sizeof(T); | 4067 static const int kLanes = 16 / sizeof(T); |
| 4060 T src1[kLanes], src2[kLanes]; | 4068 T src1[kLanes], src2[kLanes]; |
| 4061 simulator->get_q_register(Vn, src1); | 4069 simulator->get_neon_register(Vn, src1); |
| 4062 simulator->get_q_register(Vm, src2); | 4070 simulator->get_neon_register(Vm, src2); |
| 4063 for (int i = 0; i < kLanes; i++) { | 4071 for (int i = 0; i < kLanes; i++) { |
| 4064 src1[i] = Clamp<T>(Widen<T, int64_t>(src1[i]) + Widen<T, int64_t>(src2[i])); | 4072 src1[i] = Clamp<T>(Widen<T, int64_t>(src1[i]) + Widen<T, int64_t>(src2[i])); |
| 4065 } | 4073 } |
| 4066 simulator->set_q_register(Vd, src1); | 4074 simulator->set_neon_register(Vd, src1); |
| 4067 } | 4075 } |
| 4068 | 4076 |
| 4069 template <typename T> | 4077 template <typename T> |
| 4070 void SubSaturate(Simulator* simulator, int Vd, int Vm, int Vn) { | 4078 void SubSaturate(Simulator* simulator, int Vd, int Vm, int Vn) { |
| 4071 static const int kLanes = 16 / sizeof(T); | 4079 static const int kLanes = 16 / sizeof(T); |
| 4072 T src1[kLanes], src2[kLanes]; | 4080 T src1[kLanes], src2[kLanes]; |
| 4073 simulator->get_q_register(Vn, src1); | 4081 simulator->get_neon_register(Vn, src1); |
| 4074 simulator->get_q_register(Vm, src2); | 4082 simulator->get_neon_register(Vm, src2); |
| 4075 for (int i = 0; i < kLanes; i++) { | 4083 for (int i = 0; i < kLanes; i++) { |
| 4076 src1[i] = Clamp<T>(Widen<T, int64_t>(src1[i]) - Widen<T, int64_t>(src2[i])); | 4084 src1[i] = Clamp<T>(Widen<T, int64_t>(src1[i]) - Widen<T, int64_t>(src2[i])); |
| 4077 } | 4085 } |
| 4078 simulator->set_q_register(Vd, src1); | 4086 simulator->set_neon_register(Vd, src1); |
| 4087 } |
| 4088 |
| 4089 template <typename T, int SIZE> |
| 4090 void Zip(Simulator* simulator, int Vd, int Vm) { |
| 4091 static const int kElems = SIZE / sizeof(T); |
| 4092 static const int kPairs = kElems / 2; |
| 4093 T src1[kElems], src2[kElems], dst1[kElems], dst2[kElems]; |
| 4094 simulator->get_neon_register<T, SIZE>(Vd, src1); |
| 4095 simulator->get_neon_register<T, SIZE>(Vm, src2); |
| 4096 for (int i = 0; i < kPairs; i++) { |
| 4097 dst1[i * 2] = src1[i]; |
| 4098 dst1[i * 2 + 1] = src2[i]; |
| 4099 dst2[i * 2] = src1[i + kPairs]; |
| 4100 dst2[i * 2 + 1] = src2[i + kPairs]; |
| 4101 } |
| 4102 simulator->set_neon_register<T, SIZE>(Vd, dst1); |
| 4103 simulator->set_neon_register<T, SIZE>(Vm, dst2); |
| 4104 } |
| 4105 |
| 4106 template <typename T, int SIZE> |
| 4107 void Unzip(Simulator* simulator, int Vd, int Vm) { |
| 4108 static const int kElems = SIZE / sizeof(T); |
| 4109 static const int kPairs = kElems / 2; |
| 4110 T src1[kElems], src2[kElems], dst1[kElems], dst2[kElems]; |
| 4111 simulator->get_neon_register<T, SIZE>(Vd, src1); |
| 4112 simulator->get_neon_register<T, SIZE>(Vm, src2); |
| 4113 for (int i = 0; i < kPairs; i++) { |
| 4114 dst1[i] = src1[i * 2]; |
| 4115 dst1[i + kPairs] = src2[i * 2]; |
| 4116 dst2[i] = src1[i * 2 + 1]; |
| 4117 dst2[i + kPairs] = src2[i * 2 + 1]; |
| 4118 } |
| 4119 simulator->set_neon_register<T, SIZE>(Vd, dst1); |
| 4120 simulator->set_neon_register<T, SIZE>(Vm, dst2); |
| 4121 } |
| 4122 |
| 4123 template <typename T, int SIZE> |
| 4124 void Transpose(Simulator* simulator, int Vd, int Vm) { |
| 4125 static const int kElems = SIZE / sizeof(T); |
| 4126 static const int kPairs = kElems / 2; |
| 4127 T src1[kElems], src2[kElems]; |
| 4128 simulator->get_neon_register<T, SIZE>(Vd, src1); |
| 4129 simulator->get_neon_register<T, SIZE>(Vm, src2); |
| 4130 for (int i = 0; i < kPairs; i++) { |
| 4131 std::swap(src1[2 * i + 1], src2[2 * i]); |
| 4132 } |
| 4133 simulator->set_neon_register<T, SIZE>(Vd, src1); |
| 4134 simulator->set_neon_register<T, SIZE>(Vm, src2); |
| 4135 } |
| 4136 |
| 4137 template <typename T, int SIZE> |
| 4138 void Test(Simulator* simulator, int Vd, int Vm, int Vn) { |
| 4139 static const int kElems = SIZE / sizeof(T); |
| 4140 T src1[kElems], src2[kElems]; |
| 4141 simulator->get_neon_register<T, SIZE>(Vn, src1); |
| 4142 simulator->get_neon_register<T, SIZE>(Vm, src2); |
| 4143 for (int i = 0; i < kElems; i++) { |
| 4144 src1[i] = (src1[i] & src2[i]) != 0 ? -1 : 0; |
| 4145 } |
| 4146 simulator->set_neon_register<T, SIZE>(Vd, src1); |
| 4147 } |
| 4148 |
| 4149 template <typename T, int SIZE> |
| 4150 void Add(Simulator* simulator, int Vd, int Vm, int Vn) { |
| 4151 static const int kElems = SIZE / sizeof(T); |
| 4152 T src1[kElems], src2[kElems]; |
| 4153 simulator->get_neon_register<T, SIZE>(Vn, src1); |
| 4154 simulator->get_neon_register<T, SIZE>(Vm, src2); |
| 4155 for (int i = 0; i < kElems; i++) { |
| 4156 src1[i] += src2[i]; |
| 4157 } |
| 4158 simulator->set_neon_register<T, SIZE>(Vd, src1); |
| 4159 } |
| 4160 |
| 4161 template <typename T, int SIZE> |
| 4162 void Sub(Simulator* simulator, int Vd, int Vm, int Vn) { |
| 4163 static const int kElems = SIZE / sizeof(T); |
| 4164 T src1[kElems], src2[kElems]; |
| 4165 simulator->get_neon_register<T, SIZE>(Vn, src1); |
| 4166 simulator->get_neon_register<T, SIZE>(Vm, src2); |
| 4167 for (int i = 0; i < kElems; i++) { |
| 4168 src1[i] -= src2[i]; |
| 4169 } |
| 4170 simulator->set_neon_register<T, SIZE>(Vd, src1); |
| 4171 } |
| 4172 |
| 4173 template <typename T, int SIZE> |
| 4174 void Mul(Simulator* simulator, int Vd, int Vm, int Vn) { |
| 4175 static const int kElems = SIZE / sizeof(T); |
| 4176 T src1[kElems], src2[kElems]; |
| 4177 simulator->get_neon_register<T, SIZE>(Vn, src1); |
| 4178 simulator->get_neon_register<T, SIZE>(Vm, src2); |
| 4179 for (int i = 0; i < kElems; i++) { |
| 4180 src1[i] *= src2[i]; |
| 4181 } |
| 4182 simulator->set_neon_register<T, SIZE>(Vd, src1); |
| 4183 } |
| 4184 |
| 4185 template <typename T, int SIZE> |
| 4186 void ShiftLeft(Simulator* simulator, int Vd, int Vm, int shift) { |
| 4187 static const int kElems = SIZE / sizeof(T); |
| 4188 T src[kElems]; |
| 4189 simulator->get_neon_register<T, SIZE>(Vm, src); |
| 4190 for (int i = 0; i < kElems; i++) { |
| 4191 src[i] <<= shift; |
| 4192 } |
| 4193 simulator->set_neon_register<T, SIZE>(Vd, src); |
| 4194 } |
| 4195 |
| 4196 template <typename T, int SIZE> |
| 4197 void ShiftRight(Simulator* simulator, int Vd, int Vm, int shift) { |
| 4198 static const int kElems = SIZE / sizeof(T); |
| 4199 T src[kElems]; |
| 4200 simulator->get_neon_register<T, SIZE>(Vm, src); |
| 4201 for (int i = 0; i < kElems; i++) { |
| 4202 src[i] >>= shift; |
| 4203 } |
| 4204 simulator->set_neon_register<T, SIZE>(Vd, src); |
| 4205 } |
| 4206 |
| 4207 template <typename T, int SIZE> |
| 4208 void ArithmeticShiftRight(Simulator* simulator, int Vd, int Vm, int shift) { |
| 4209 static const int kElems = SIZE / sizeof(T); |
| 4210 T src[kElems]; |
| 4211 simulator->get_neon_register<T, SIZE>(Vm, src); |
| 4212 for (int i = 0; i < kElems; i++) { |
| 4213 src[i] = ArithmeticShiftRight(src[i], shift); |
| 4214 } |
| 4215 simulator->set_neon_register<T, SIZE>(Vd, src); |
| 4216 } |
| 4217 |
| 4218 template <typename T, int SIZE> |
| 4219 void CompareEqual(Simulator* simulator, int Vd, int Vm, int Vn) { |
| 4220 static const int kElems = SIZE / sizeof(T); |
| 4221 T src1[kElems], src2[kElems]; |
| 4222 simulator->get_neon_register<T, SIZE>(Vn, src1); |
| 4223 simulator->get_neon_register<T, SIZE>(Vm, src2); |
| 4224 for (int i = 0; i < kElems; i++) { |
| 4225 src1[i] = src1[i] == src2[i] ? -1 : 0; |
| 4226 } |
| 4227 simulator->set_neon_register<T, SIZE>(Vd, src1); |
| 4228 } |
| 4229 |
| 4230 template <typename T, int SIZE> |
| 4231 void CompareGreater(Simulator* simulator, int Vd, int Vm, int Vn, bool ge) { |
| 4232 static const int kElems = SIZE / sizeof(T); |
| 4233 T src1[kElems], src2[kElems]; |
| 4234 simulator->get_neon_register<T, SIZE>(Vn, src1); |
| 4235 simulator->get_neon_register<T, SIZE>(Vm, src2); |
| 4236 for (int i = 0; i < kElems; i++) { |
| 4237 if (ge) |
| 4238 src1[i] = src1[i] >= src2[i] ? -1 : 0; |
| 4239 else |
| 4240 src1[i] = src1[i] > src2[i] ? -1 : 0; |
| 4241 } |
| 4242 simulator->set_neon_register<T, SIZE>(Vd, src1); |
| 4243 } |
| 4244 |
| 4245 template <typename T> |
| 4246 T MinMax(T a, T b, bool is_min) { |
| 4247 return is_min ? std::min(a, b) : std::max(a, b); |
| 4248 } |
| 4249 |
| 4250 template <typename T, int SIZE> |
| 4251 void MinMax(Simulator* simulator, int Vd, int Vm, int Vn, bool min) { |
| 4252 static const int kElems = SIZE / sizeof(T); |
| 4253 T src1[kElems], src2[kElems]; |
| 4254 simulator->get_neon_register<T, SIZE>(Vn, src1); |
| 4255 simulator->get_neon_register<T, SIZE>(Vm, src2); |
| 4256 for (int i = 0; i < kElems; i++) { |
| 4257 src1[i] = MinMax(src1[i], src2[i], min); |
| 4258 } |
| 4259 simulator->set_neon_register<T, SIZE>(Vd, src1); |
| 4260 } |
| 4261 |
| 4262 template <typename T> |
| 4263 void PairwiseMinMax(Simulator* simulator, int Vd, int Vm, int Vn, bool min) { |
| 4264 static const int kElems = kDoubleSize / sizeof(T); |
| 4265 static const int kPairs = kElems / 2; |
| 4266 T dst[kElems], src1[kElems], src2[kElems]; |
| 4267 simulator->get_neon_register<T, kDoubleSize>(Vn, src1); |
| 4268 simulator->get_neon_register<T, kDoubleSize>(Vm, src2); |
| 4269 for (int i = 0; i < kPairs; i++) { |
| 4270 dst[i] = MinMax(src1[i * 2], src1[i * 2 + 1], min); |
| 4271 dst[i + kPairs] = MinMax(src2[i * 2], src2[i * 2 + 1], min); |
| 4272 } |
| 4273 simulator->set_neon_register<T, kDoubleSize>(Vd, dst); |
| 4079 } | 4274 } |
| 4080 | 4275 |
| 4081 void Simulator::DecodeSpecialCondition(Instruction* instr) { | 4276 void Simulator::DecodeSpecialCondition(Instruction* instr) { |
| 4082 switch (instr->SpecialValue()) { | 4277 switch (instr->SpecialValue()) { |
| 4083 case 4: { | 4278 case 4: { |
| 4084 int Vd, Vm, Vn; | 4279 int Vd, Vm, Vn; |
| 4085 if (instr->Bit(6) == 0) { | 4280 if (instr->Bit(6) == 0) { |
| 4086 Vd = instr->VFPDRegValue(kDoublePrecision); | 4281 Vd = instr->VFPDRegValue(kDoublePrecision); |
| 4087 Vm = instr->VFPMRegValue(kDoublePrecision); | 4282 Vm = instr->VFPMRegValue(kDoublePrecision); |
| 4088 Vn = instr->VFPNRegValue(kDoublePrecision); | 4283 Vn = instr->VFPNRegValue(kDoublePrecision); |
| (...skipping 25 matching lines...) Expand all Loading... |
| 4114 UNIMPLEMENTED(); | 4309 UNIMPLEMENTED(); |
| 4115 } | 4310 } |
| 4116 break; | 4311 break; |
| 4117 } | 4312 } |
| 4118 case 0x1: { | 4313 case 0x1: { |
| 4119 if (instr->Bits(21, 20) == 2 && instr->Bit(6) == 1 && | 4314 if (instr->Bits(21, 20) == 2 && instr->Bit(6) == 1 && |
| 4120 instr->Bit(4) == 1) { | 4315 instr->Bit(4) == 1) { |
| 4121 // vmov Qd, Qm. | 4316 // vmov Qd, Qm. |
| 4122 // vorr, Qd, Qm, Qn. | 4317 // vorr, Qd, Qm, Qn. |
| 4123 uint32_t src1[4]; | 4318 uint32_t src1[4]; |
| 4124 get_q_register(Vm, src1); | 4319 get_neon_register(Vm, src1); |
| 4125 if (Vm != Vn) { | 4320 if (Vm != Vn) { |
| 4126 uint32_t src2[4]; | 4321 uint32_t src2[4]; |
| 4127 get_q_register(Vn, src2); | 4322 get_neon_register(Vn, src2); |
| 4128 for (int i = 0; i < 4; i++) { | 4323 for (int i = 0; i < 4; i++) { |
| 4129 src1[i] = src1[i] | src2[i]; | 4324 src1[i] = src1[i] | src2[i]; |
| 4130 } | 4325 } |
| 4131 } | 4326 } |
| 4132 set_q_register(Vd, src1); | 4327 set_neon_register(Vd, src1); |
| 4133 } else if (instr->Bits(21, 20) == 0 && instr->Bit(6) == 1 && | 4328 } else if (instr->Bits(21, 20) == 0 && instr->Bit(6) == 1 && |
| 4134 instr->Bit(4) == 1) { | 4329 instr->Bit(4) == 1) { |
| 4135 // vand Qd, Qm, Qn. | 4330 // vand Qd, Qm, Qn. |
| 4136 uint32_t src1[4], src2[4]; | 4331 uint32_t src1[4], src2[4]; |
| 4137 get_q_register(Vn, src1); | 4332 get_neon_register(Vn, src1); |
| 4138 get_q_register(Vm, src2); | 4333 get_neon_register(Vm, src2); |
| 4139 for (int i = 0; i < 4; i++) { | 4334 for (int i = 0; i < 4; i++) { |
| 4140 src1[i] = src1[i] & src2[i]; | 4335 src1[i] = src1[i] & src2[i]; |
| 4141 } | 4336 } |
| 4142 set_q_register(Vd, src1); | 4337 set_neon_register(Vd, src1); |
| 4143 } else { | 4338 } else { |
| 4144 UNIMPLEMENTED(); | 4339 UNIMPLEMENTED(); |
| 4145 } | 4340 } |
| 4146 break; | 4341 break; |
| 4147 } | 4342 } |
| 4148 case 0x2: { | 4343 case 0x2: { |
| 4149 if (instr->Bit(4) == 1) { | 4344 if (instr->Bit(4) == 1) { |
| 4150 // vqsub.s<size> Qd, Qm, Qn. | 4345 // vqsub.s<size> Qd, Qm, Qn. |
| 4151 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); | 4346 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
| 4152 switch (size) { | 4347 switch (size) { |
| (...skipping 13 matching lines...) Expand all Loading... |
| 4166 } else { | 4361 } else { |
| 4167 UNIMPLEMENTED(); | 4362 UNIMPLEMENTED(); |
| 4168 } | 4363 } |
| 4169 break; | 4364 break; |
| 4170 } | 4365 } |
| 4171 case 0x3: { | 4366 case 0x3: { |
| 4172 // vcge/vcgt.s<size> Qd, Qm, Qn. | 4367 // vcge/vcgt.s<size> Qd, Qm, Qn. |
| 4173 bool ge = instr->Bit(4) == 1; | 4368 bool ge = instr->Bit(4) == 1; |
| 4174 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); | 4369 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
| 4175 switch (size) { | 4370 switch (size) { |
| 4176 case Neon8: { | 4371 case Neon8: |
| 4177 int8_t src1[16], src2[16]; | 4372 CompareGreater<int8_t, kSimd128Size>(this, Vd, Vm, Vn, ge); |
| 4178 get_q_register(Vn, src1); | |
| 4179 get_q_register(Vm, src2); | |
| 4180 for (int i = 0; i < 16; i++) { | |
| 4181 if (ge) | |
| 4182 src1[i] = src1[i] >= src2[i] ? 0xFF : 0; | |
| 4183 else | |
| 4184 src1[i] = src1[i] > src2[i] ? 0xFF : 0; | |
| 4185 } | |
| 4186 set_q_register(Vd, src1); | |
| 4187 break; | 4373 break; |
| 4188 } | 4374 case Neon16: |
| 4189 case Neon16: { | 4375 CompareGreater<int16_t, kSimd128Size>(this, Vd, Vm, Vn, ge); |
| 4190 int16_t src1[8], src2[8]; | |
| 4191 get_q_register(Vn, src1); | |
| 4192 get_q_register(Vm, src2); | |
| 4193 for (int i = 0; i < 8; i++) { | |
| 4194 if (ge) | |
| 4195 src1[i] = src1[i] >= src2[i] ? 0xFFFF : 0; | |
| 4196 else | |
| 4197 src1[i] = src1[i] > src2[i] ? 0xFFFF : 0; | |
| 4198 } | |
| 4199 set_q_register(Vd, src1); | |
| 4200 break; | 4376 break; |
| 4201 } | 4377 case Neon32: |
| 4202 case Neon32: { | 4378 CompareGreater<int32_t, kSimd128Size>(this, Vd, Vm, Vn, ge); |
| 4203 int32_t src1[4], src2[4]; | |
| 4204 get_q_register(Vn, src1); | |
| 4205 get_q_register(Vm, src2); | |
| 4206 for (int i = 0; i < 4; i++) { | |
| 4207 if (ge) | |
| 4208 src1[i] = src1[i] >= src2[i] ? 0xFFFFFFFF : 0; | |
| 4209 else | |
| 4210 src1[i] = src1[i] > src2[i] ? 0xFFFFFFFF : 0; | |
| 4211 } | |
| 4212 set_q_register(Vd, src1); | |
| 4213 break; | 4379 break; |
| 4214 } | |
| 4215 default: | 4380 default: |
| 4216 UNREACHABLE(); | 4381 UNREACHABLE(); |
| 4217 break; | 4382 break; |
| 4218 } | 4383 } |
| 4219 break; | 4384 break; |
| 4220 } | 4385 } |
| 4221 case 0x6: { | 4386 case 0x6: { |
| 4222 // vmin/vmax.s<size> Qd, Qm, Qn. | 4387 // vmin/vmax.s<size> Qd, Qm, Qn. |
| 4223 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); | 4388 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
| 4224 bool min = instr->Bit(4) != 0; | 4389 bool min = instr->Bit(4) != 0; |
| 4225 switch (size) { | 4390 switch (size) { |
| 4226 case Neon8: { | 4391 case Neon8: |
| 4227 int8_t src1[16], src2[16]; | 4392 MinMax<int8_t, kSimd128Size>(this, Vd, Vm, Vn, min); |
| 4228 get_q_register(Vn, src1); | |
| 4229 get_q_register(Vm, src2); | |
| 4230 for (int i = 0; i < 16; i++) { | |
| 4231 src1[i] = MinMax(src1[i], src2[i], min); | |
| 4232 } | |
| 4233 set_q_register(Vd, src1); | |
| 4234 break; | 4393 break; |
| 4235 } | 4394 case Neon16: |
| 4236 case Neon16: { | 4395 MinMax<int16_t, kSimd128Size>(this, Vd, Vm, Vn, min); |
| 4237 int16_t src1[8], src2[8]; | |
| 4238 get_q_register(Vn, src1); | |
| 4239 get_q_register(Vm, src2); | |
| 4240 for (int i = 0; i < 8; i++) { | |
| 4241 src1[i] = MinMax(src1[i], src2[i], min); | |
| 4242 } | |
| 4243 set_q_register(Vd, src1); | |
| 4244 break; | 4396 break; |
| 4245 } | 4397 case Neon32: |
| 4246 case Neon32: { | 4398 MinMax<int32_t, kSimd128Size>(this, Vd, Vm, Vn, min); |
| 4247 int32_t src1[4], src2[4]; | |
| 4248 get_q_register(Vn, src1); | |
| 4249 get_q_register(Vm, src2); | |
| 4250 for (int i = 0; i < 4; i++) { | |
| 4251 src1[i] = MinMax(src1[i], src2[i], min); | |
| 4252 } | |
| 4253 set_q_register(Vd, src1); | |
| 4254 break; | 4399 break; |
| 4255 } | |
| 4256 default: | 4400 default: |
| 4257 UNREACHABLE(); | 4401 UNREACHABLE(); |
| 4258 break; | 4402 break; |
| 4259 } | 4403 } |
| 4260 break; | 4404 break; |
| 4261 } | 4405 } |
| 4262 case 0x8: { | 4406 case 0x8: { |
| 4263 // vadd/vtst | 4407 // vadd/vtst |
| 4264 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); | 4408 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
| 4265 if (instr->Bit(4) == 0) { | 4409 if (instr->Bit(4) == 0) { |
| 4266 // vadd.i<size> Qd, Qm, Qn. | 4410 // vadd.i<size> Qd, Qm, Qn. |
| 4267 switch (size) { | 4411 switch (size) { |
| 4268 case Neon8: { | 4412 case Neon8: |
| 4269 uint8_t src1[16], src2[16]; | 4413 Add<uint8_t, kSimd128Size>(this, Vd, Vm, Vn); |
| 4270 get_q_register(Vn, src1); | |
| 4271 get_q_register(Vm, src2); | |
| 4272 for (int i = 0; i < 16; i++) { | |
| 4273 src1[i] += src2[i]; | |
| 4274 } | |
| 4275 set_q_register(Vd, src1); | |
| 4276 break; | 4414 break; |
| 4277 } | 4415 case Neon16: |
| 4278 case Neon16: { | 4416 Add<uint16_t, kSimd128Size>(this, Vd, Vm, Vn); |
| 4279 uint16_t src1[8], src2[8]; | |
| 4280 get_q_register(Vn, src1); | |
| 4281 get_q_register(Vm, src2); | |
| 4282 for (int i = 0; i < 8; i++) { | |
| 4283 src1[i] += src2[i]; | |
| 4284 } | |
| 4285 set_q_register(Vd, src1); | |
| 4286 break; | 4417 break; |
| 4287 } | 4418 case Neon32: |
| 4288 case Neon32: { | 4419 Add<uint32_t, kSimd128Size>(this, Vd, Vm, Vn); |
| 4289 uint32_t src1[4], src2[4]; | |
| 4290 get_q_register(Vn, src1); | |
| 4291 get_q_register(Vm, src2); | |
| 4292 for (int i = 0; i < 4; i++) { | |
| 4293 src1[i] += src2[i]; | |
| 4294 } | |
| 4295 set_q_register(Vd, src1); | |
| 4296 break; | 4420 break; |
| 4297 } | |
| 4298 default: | 4421 default: |
| 4299 UNREACHABLE(); | 4422 UNREACHABLE(); |
| 4300 break; | 4423 break; |
| 4301 } | 4424 } |
| 4302 } else { | 4425 } else { |
| 4303 // vtst.i<size> Qd, Qm, Qn. | 4426 // vtst.i<size> Qd, Qm, Qn. |
| 4304 switch (size) { | 4427 switch (size) { |
| 4305 case Neon8: { | 4428 case Neon8: |
| 4306 uint8_t src1[16], src2[16]; | 4429 Test<uint8_t, kSimd128Size>(this, Vd, Vm, Vn); |
| 4307 get_q_register(Vn, src1); | |
| 4308 get_q_register(Vm, src2); | |
| 4309 for (int i = 0; i < 16; i++) { | |
| 4310 src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFu : 0; | |
| 4311 } | |
| 4312 set_q_register(Vd, src1); | |
| 4313 break; | 4430 break; |
| 4314 } | 4431 case Neon16: |
| 4315 case Neon16: { | 4432 Test<uint16_t, kSimd128Size>(this, Vd, Vm, Vn); |
| 4316 uint16_t src1[8], src2[8]; | |
| 4317 get_q_register(Vn, src1); | |
| 4318 get_q_register(Vm, src2); | |
| 4319 for (int i = 0; i < 8; i++) { | |
| 4320 src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFFFu : 0; | |
| 4321 } | |
| 4322 set_q_register(Vd, src1); | |
| 4323 break; | 4433 break; |
| 4324 } | 4434 case Neon32: |
| 4325 case Neon32: { | 4435 Test<uint32_t, kSimd128Size>(this, Vd, Vm, Vn); |
| 4326 uint32_t src1[4], src2[4]; | |
| 4327 get_q_register(Vn, src1); | |
| 4328 get_q_register(Vm, src2); | |
| 4329 for (int i = 0; i < 4; i++) { | |
| 4330 src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFFFFFFFu : 0; | |
| 4331 } | |
| 4332 set_q_register(Vd, src1); | |
| 4333 break; | 4436 break; |
| 4334 } | |
| 4335 default: | 4437 default: |
| 4336 UNREACHABLE(); | 4438 UNREACHABLE(); |
| 4337 break; | 4439 break; |
| 4338 } | 4440 } |
| 4339 } | 4441 } |
| 4340 break; | 4442 break; |
| 4341 } | 4443 } |
| 4342 case 0x9: { | 4444 case 0x9: { |
| 4343 if (instr->Bit(6) == 1 && instr->Bit(4) == 1) { | 4445 if (instr->Bit(6) == 1 && instr->Bit(4) == 1) { |
| 4344 // vmul.i<size> Qd, Qm, Qn. | 4446 // vmul.i<size> Qd, Qm, Qn. |
| 4345 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); | 4447 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
| 4346 switch (size) { | 4448 switch (size) { |
| 4347 case Neon8: { | 4449 case Neon8: |
| 4348 uint8_t src1[16], src2[16]; | 4450 Mul<uint8_t, kSimd128Size>(this, Vd, Vm, Vn); |
| 4349 get_q_register(Vn, src1); | |
| 4350 get_q_register(Vm, src2); | |
| 4351 for (int i = 0; i < 16; i++) { | |
| 4352 src1[i] *= src2[i]; | |
| 4353 } | |
| 4354 set_q_register(Vd, src1); | |
| 4355 break; | 4451 break; |
| 4356 } | 4452 case Neon16: |
| 4357 case Neon16: { | 4453 Mul<uint16_t, kSimd128Size>(this, Vd, Vm, Vn); |
| 4358 uint16_t src1[8], src2[8]; | |
| 4359 get_q_register(Vn, src1); | |
| 4360 get_q_register(Vm, src2); | |
| 4361 for (int i = 0; i < 8; i++) { | |
| 4362 src1[i] *= src2[i]; | |
| 4363 } | |
| 4364 set_q_register(Vd, src1); | |
| 4365 break; | 4454 break; |
| 4366 } | 4455 case Neon32: |
| 4367 case Neon32: { | 4456 Mul<uint32_t, kSimd128Size>(this, Vd, Vm, Vn); |
| 4368 uint32_t src1[4], src2[4]; | |
| 4369 get_q_register(Vn, src1); | |
| 4370 get_q_register(Vm, src2); | |
| 4371 for (int i = 0; i < 4; i++) { | |
| 4372 src1[i] *= src2[i]; | |
| 4373 } | |
| 4374 set_q_register(Vd, src1); | |
| 4375 break; | 4457 break; |
| 4376 } | |
| 4377 default: | 4458 default: |
| 4378 UNREACHABLE(); | 4459 UNREACHABLE(); |
| 4379 break; | 4460 break; |
| 4380 } | 4461 } |
| 4381 } else { | 4462 } else { |
| 4382 UNIMPLEMENTED(); | 4463 UNIMPLEMENTED(); |
| 4383 } | 4464 } |
| 4384 break; | 4465 break; |
| 4385 } | 4466 } |
| 4386 case 0xa: { | 4467 case 0xa: { |
| 4387 // vpmin/vpmax.s<size> Dd, Dm, Dn. | 4468 // vpmin/vpmax.s<size> Dd, Dm, Dn. |
| 4388 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); | 4469 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
| 4389 bool min = instr->Bit(4) != 0; | 4470 bool min = instr->Bit(4) != 0; |
| 4390 switch (size) { | 4471 switch (size) { |
| 4391 case Neon8: { | 4472 case Neon8: |
| 4392 int8_t dst[8], src1[8], src2[8]; | 4473 PairwiseMinMax<int8_t>(this, Vd, Vm, Vn, min); |
| 4393 get_d_register(Vn, src1); | |
| 4394 get_d_register(Vm, src2); | |
| 4395 for (int i = 0; i < 4; i++) { | |
| 4396 dst[i + 0] = MinMax(src1[i * 2], src1[i * 2 + 1], min); | |
| 4397 dst[i + 4] = MinMax(src2[i * 2], src2[i * 2 + 1], min); | |
| 4398 } | |
| 4399 set_d_register(Vd, dst); | |
| 4400 break; | 4474 break; |
| 4401 } | 4475 case Neon16: |
| 4402 case Neon16: { | 4476 PairwiseMinMax<int16_t>(this, Vd, Vm, Vn, min); |
| 4403 int16_t dst[4], src1[4], src2[4]; | |
| 4404 get_d_register(Vn, src1); | |
| 4405 get_d_register(Vm, src2); | |
| 4406 for (int i = 0; i < 2; i++) { | |
| 4407 dst[i + 0] = MinMax(src1[i * 2], src1[i * 2 + 1], min); | |
| 4408 dst[i + 2] = MinMax(src2[i * 2], src2[i * 2 + 1], min); | |
| 4409 } | |
| 4410 set_d_register(Vd, dst); | |
| 4411 break; | 4477 break; |
| 4412 } | 4478 case Neon32: |
| 4413 case Neon32: { | 4479 PairwiseMinMax<int32_t>(this, Vd, Vm, Vn, min); |
| 4414 int32_t dst[2], src1[2], src2[2]; | |
| 4415 get_d_register(Vn, src1); | |
| 4416 get_d_register(Vm, src2); | |
| 4417 dst[0] = MinMax(src1[0], src1[1], min); | |
| 4418 dst[1] = MinMax(src2[0], src2[1], min); | |
| 4419 set_d_register(Vd, dst); | |
| 4420 break; | 4480 break; |
| 4421 } | |
| 4422 default: | 4481 default: |
| 4423 UNREACHABLE(); | 4482 UNREACHABLE(); |
| 4424 break; | 4483 break; |
| 4425 } | 4484 } |
| 4426 break; | 4485 break; |
| 4427 } | 4486 } |
| 4428 case 0xd: { | 4487 case 0xd: { |
| 4429 if (instr->Bit(4) == 0) { | 4488 if (instr->Bit(4) == 0) { |
| 4430 float src1[4], src2[4]; | 4489 float src1[4], src2[4]; |
| 4431 get_q_register(Vn, src1); | 4490 get_neon_register(Vn, src1); |
| 4432 get_q_register(Vm, src2); | 4491 get_neon_register(Vm, src2); |
| 4433 for (int i = 0; i < 4; i++) { | 4492 for (int i = 0; i < 4; i++) { |
| 4434 if (instr->Bit(21) == 0) { | 4493 if (instr->Bit(21) == 0) { |
| 4435 // vadd.f32 Qd, Qm, Qn. | 4494 // vadd.f32 Qd, Qm, Qn. |
| 4436 src1[i] = src1[i] + src2[i]; | 4495 src1[i] = src1[i] + src2[i]; |
| 4437 } else { | 4496 } else { |
| 4438 // vsub.f32 Qd, Qm, Qn. | 4497 // vsub.f32 Qd, Qm, Qn. |
| 4439 src1[i] = src1[i] - src2[i]; | 4498 src1[i] = src1[i] - src2[i]; |
| 4440 } | 4499 } |
| 4441 } | 4500 } |
| 4442 set_q_register(Vd, src1); | 4501 set_neon_register(Vd, src1); |
| 4443 } else { | 4502 } else { |
| 4444 UNIMPLEMENTED(); | 4503 UNIMPLEMENTED(); |
| 4445 } | 4504 } |
| 4446 break; | 4505 break; |
| 4447 } | 4506 } |
| 4448 case 0xe: { | 4507 case 0xe: { |
| 4449 if (instr->Bits(21, 20) == 0 && instr->Bit(4) == 0) { | 4508 if (instr->Bits(21, 20) == 0 && instr->Bit(4) == 0) { |
| 4450 // vceq.f32. | 4509 // vceq.f32. |
| 4451 float src1[4], src2[4]; | 4510 float src1[4], src2[4]; |
| 4452 get_q_register(Vn, src1); | 4511 get_neon_register(Vn, src1); |
| 4453 get_q_register(Vm, src2); | 4512 get_neon_register(Vm, src2); |
| 4454 uint32_t dst[4]; | 4513 uint32_t dst[4]; |
| 4455 for (int i = 0; i < 4; i++) { | 4514 for (int i = 0; i < 4; i++) { |
| 4456 dst[i] = (src1[i] == src2[i]) ? 0xFFFFFFFF : 0; | 4515 dst[i] = (src1[i] == src2[i]) ? 0xFFFFFFFF : 0; |
| 4457 } | 4516 } |
| 4458 set_q_register(Vd, dst); | 4517 set_neon_register(Vd, dst); |
| 4459 } else { | 4518 } else { |
| 4460 UNIMPLEMENTED(); | 4519 UNIMPLEMENTED(); |
| 4461 } | 4520 } |
| 4462 break; | 4521 break; |
| 4463 } | 4522 } |
| 4464 case 0xf: { | 4523 case 0xf: { |
| 4465 if (instr->Bit(20) == 0 && instr->Bit(6) == 1) { | 4524 if (instr->Bit(20) == 0 && instr->Bit(6) == 1) { |
| 4466 float src1[4], src2[4]; | 4525 float src1[4], src2[4]; |
| 4467 get_q_register(Vn, src1); | 4526 get_neon_register(Vn, src1); |
| 4468 get_q_register(Vm, src2); | 4527 get_neon_register(Vm, src2); |
| 4469 if (instr->Bit(4) == 1) { | 4528 if (instr->Bit(4) == 1) { |
| 4470 if (instr->Bit(21) == 0) { | 4529 if (instr->Bit(21) == 0) { |
| 4471 // vrecps.f32 Qd, Qm, Qn. | 4530 // vrecps.f32 Qd, Qm, Qn. |
| 4472 for (int i = 0; i < 4; i++) { | 4531 for (int i = 0; i < 4; i++) { |
| 4473 src1[i] = 2.0f - src1[i] * src2[i]; | 4532 src1[i] = 2.0f - src1[i] * src2[i]; |
| 4474 } | 4533 } |
| 4475 } else { | 4534 } else { |
| 4476 // vrsqrts.f32 Qd, Qm, Qn. | 4535 // vrsqrts.f32 Qd, Qm, Qn. |
| 4477 for (int i = 0; i < 4; i++) { | 4536 for (int i = 0; i < 4; i++) { |
| 4478 src1[i] = (3.0f - src1[i] * src2[i]) * 0.5f; | 4537 src1[i] = (3.0f - src1[i] * src2[i]) * 0.5f; |
| 4479 } | 4538 } |
| 4480 } | 4539 } |
| 4481 } else { | 4540 } else { |
| 4482 // vmin/vmax.f32 Qd, Qm, Qn. | 4541 // vmin/vmax.f32 Qd, Qm, Qn. |
| 4483 bool min = instr->Bit(21) == 1; | 4542 bool min = instr->Bit(21) == 1; |
| 4484 for (int i = 0; i < 4; i++) { | 4543 for (int i = 0; i < 4; i++) { |
| 4485 src1[i] = MinMax(src1[i], src2[i], min); | 4544 src1[i] = MinMax(src1[i], src2[i], min); |
| 4486 } | 4545 } |
| 4487 } | 4546 } |
| 4488 set_q_register(Vd, src1); | 4547 set_neon_register(Vd, src1); |
| 4489 } else { | 4548 } else { |
| 4490 UNIMPLEMENTED(); | 4549 UNIMPLEMENTED(); |
| 4491 } | 4550 } |
| 4492 break; | 4551 break; |
| 4493 } | 4552 } |
| 4494 default: | 4553 default: |
| 4495 UNIMPLEMENTED(); | 4554 UNIMPLEMENTED(); |
| 4496 break; | 4555 break; |
| 4497 } | 4556 } |
| 4498 break; | 4557 break; |
| (...skipping 20 matching lines...) Expand all Loading... |
| 4519 UNIMPLEMENTED(); | 4578 UNIMPLEMENTED(); |
| 4520 break; | 4579 break; |
| 4521 } | 4580 } |
| 4522 } else if (instr->Bits(21, 20) == 3 && instr->Bit(4) == 0) { | 4581 } else if (instr->Bits(21, 20) == 3 && instr->Bit(4) == 0) { |
| 4523 // vext. | 4582 // vext. |
| 4524 int imm4 = instr->Bits(11, 8); | 4583 int imm4 = instr->Bits(11, 8); |
| 4525 int Vd = instr->VFPDRegValue(kSimd128Precision); | 4584 int Vd = instr->VFPDRegValue(kSimd128Precision); |
| 4526 int Vm = instr->VFPMRegValue(kSimd128Precision); | 4585 int Vm = instr->VFPMRegValue(kSimd128Precision); |
| 4527 int Vn = instr->VFPNRegValue(kSimd128Precision); | 4586 int Vn = instr->VFPNRegValue(kSimd128Precision); |
| 4528 uint8_t src1[16], src2[16], dst[16]; | 4587 uint8_t src1[16], src2[16], dst[16]; |
| 4529 get_q_register(Vn, src1); | 4588 get_neon_register(Vn, src1); |
| 4530 get_q_register(Vm, src2); | 4589 get_neon_register(Vm, src2); |
| 4531 int boundary = kSimd128Size - imm4; | 4590 int boundary = kSimd128Size - imm4; |
| 4532 int i = 0; | 4591 int i = 0; |
| 4533 for (; i < boundary; i++) { | 4592 for (; i < boundary; i++) { |
| 4534 dst[i] = src1[i + imm4]; | 4593 dst[i] = src1[i + imm4]; |
| 4535 } | 4594 } |
| 4536 for (; i < 16; i++) { | 4595 for (; i < 16; i++) { |
| 4537 dst[i] = src2[i - boundary]; | 4596 dst[i] = src2[i - boundary]; |
| 4538 } | 4597 } |
| 4539 set_q_register(Vd, dst); | 4598 set_neon_register(Vd, dst); |
| 4540 } else if (instr->Bits(11, 7) == 0xA && instr->Bit(4) == 1) { | 4599 } else if (instr->Bits(11, 7) == 0xA && instr->Bit(4) == 1) { |
| 4541 // vshl.i<size> Qd, Qm, shift | 4600 // vshl.i<size> Qd, Qm, shift |
| 4542 int size = base::bits::RoundDownToPowerOfTwo32(instr->Bits(21, 16)); | 4601 int size = base::bits::RoundDownToPowerOfTwo32(instr->Bits(21, 16)); |
| 4543 int shift = instr->Bits(21, 16) - size; | 4602 int shift = instr->Bits(21, 16) - size; |
| 4544 int Vd = instr->VFPDRegValue(kSimd128Precision); | 4603 int Vd = instr->VFPDRegValue(kSimd128Precision); |
| 4545 int Vm = instr->VFPMRegValue(kSimd128Precision); | 4604 int Vm = instr->VFPMRegValue(kSimd128Precision); |
| 4546 NeonSize ns = static_cast<NeonSize>(size / 16); | 4605 NeonSize ns = static_cast<NeonSize>(size / 16); |
| 4547 switch (ns) { | 4606 switch (ns) { |
| 4548 case Neon8: { | 4607 case Neon8: |
| 4549 uint8_t src[16]; | 4608 ShiftLeft<uint8_t, kSimd128Size>(this, Vd, Vm, shift); |
| 4550 get_q_register(Vm, src); | |
| 4551 for (int i = 0; i < 16; i++) { | |
| 4552 src[i] <<= shift; | |
| 4553 } | |
| 4554 set_q_register(Vd, src); | |
| 4555 break; | 4609 break; |
| 4556 } | 4610 case Neon16: |
| 4557 case Neon16: { | 4611 ShiftLeft<uint16_t, kSimd128Size>(this, Vd, Vm, shift); |
| 4558 uint16_t src[8]; | |
| 4559 get_q_register(Vm, src); | |
| 4560 for (int i = 0; i < 8; i++) { | |
| 4561 src[i] <<= shift; | |
| 4562 } | |
| 4563 set_q_register(Vd, src); | |
| 4564 break; | 4612 break; |
| 4565 } | 4613 case Neon32: |
| 4566 case Neon32: { | 4614 ShiftLeft<uint32_t, kSimd128Size>(this, Vd, Vm, shift); |
| 4567 uint32_t src[4]; | |
| 4568 get_q_register(Vm, src); | |
| 4569 for (int i = 0; i < 4; i++) { | |
| 4570 src[i] <<= shift; | |
| 4571 } | |
| 4572 set_q_register(Vd, src); | |
| 4573 break; | 4615 break; |
| 4574 } | |
| 4575 default: | 4616 default: |
| 4576 UNREACHABLE(); | 4617 UNREACHABLE(); |
| 4577 break; | 4618 break; |
| 4578 } | 4619 } |
| 4579 } else if (instr->Bits(11, 7) == 0 && instr->Bit(4) == 1) { | 4620 } else if (instr->Bits(11, 7) == 0 && instr->Bit(4) == 1) { |
| 4580 // vshr.s<size> Qd, Qm, shift | 4621 // vshr.s<size> Qd, Qm, shift |
| 4581 int size = base::bits::RoundDownToPowerOfTwo32(instr->Bits(21, 16)); | 4622 int size = base::bits::RoundDownToPowerOfTwo32(instr->Bits(21, 16)); |
| 4582 int shift = 2 * size - instr->Bits(21, 16); | 4623 int shift = 2 * size - instr->Bits(21, 16); |
| 4583 int Vd = instr->VFPDRegValue(kSimd128Precision); | 4624 int Vd = instr->VFPDRegValue(kSimd128Precision); |
| 4584 int Vm = instr->VFPMRegValue(kSimd128Precision); | 4625 int Vm = instr->VFPMRegValue(kSimd128Precision); |
| 4585 NeonSize ns = static_cast<NeonSize>(size / 16); | 4626 NeonSize ns = static_cast<NeonSize>(size / 16); |
| 4586 switch (ns) { | 4627 switch (ns) { |
| 4587 case Neon8: { | 4628 case Neon8: |
| 4588 int8_t src[16]; | 4629 ArithmeticShiftRight<int8_t, kSimd128Size>(this, Vd, Vm, shift); |
| 4589 get_q_register(Vm, src); | |
| 4590 for (int i = 0; i < 16; i++) { | |
| 4591 src[i] = ArithmeticShiftRight(src[i], shift); | |
| 4592 } | |
| 4593 set_q_register(Vd, src); | |
| 4594 break; | 4630 break; |
| 4595 } | 4631 case Neon16: |
| 4596 case Neon16: { | 4632 ArithmeticShiftRight<int16_t, kSimd128Size>(this, Vd, Vm, shift); |
| 4597 int16_t src[8]; | |
| 4598 get_q_register(Vm, src); | |
| 4599 for (int i = 0; i < 8; i++) { | |
| 4600 src[i] = ArithmeticShiftRight(src[i], shift); | |
| 4601 } | |
| 4602 set_q_register(Vd, src); | |
| 4603 break; | 4633 break; |
| 4604 } | 4634 case Neon32: |
| 4605 case Neon32: { | 4635 ArithmeticShiftRight<int32_t, kSimd128Size>(this, Vd, Vm, shift); |
| 4606 int32_t src[4]; | |
| 4607 get_q_register(Vm, src); | |
| 4608 for (int i = 0; i < 4; i++) { | |
| 4609 src[i] = ArithmeticShiftRight(src[i], shift); | |
| 4610 } | |
| 4611 set_q_register(Vd, src); | |
| 4612 break; | 4636 break; |
| 4613 } | |
| 4614 default: | 4637 default: |
| 4615 UNREACHABLE(); | 4638 UNREACHABLE(); |
| 4616 break; | 4639 break; |
| 4617 } | 4640 } |
| 4618 } else { | 4641 } else { |
| 4619 UNIMPLEMENTED(); | 4642 UNIMPLEMENTED(); |
| 4620 } | 4643 } |
| 4621 break; | 4644 break; |
| 4622 case 6: { | 4645 case 6: { |
| 4623 int Vd, Vm, Vn; | 4646 int Vd, Vm, Vn; |
| (...skipping 27 matching lines...) Expand all Loading... |
| 4651 } | 4674 } |
| 4652 } else { | 4675 } else { |
| 4653 UNIMPLEMENTED(); | 4676 UNIMPLEMENTED(); |
| 4654 } | 4677 } |
| 4655 break; | 4678 break; |
| 4656 } | 4679 } |
| 4657 case 0x1: { | 4680 case 0x1: { |
| 4658 if (instr->Bits(21, 20) == 1 && instr->Bit(4) == 1) { | 4681 if (instr->Bits(21, 20) == 1 && instr->Bit(4) == 1) { |
| 4659 // vbsl.size Qd, Qm, Qn. | 4682 // vbsl.size Qd, Qm, Qn. |
| 4660 uint32_t dst[4], src1[4], src2[4]; | 4683 uint32_t dst[4], src1[4], src2[4]; |
| 4661 get_q_register(Vd, dst); | 4684 get_neon_register(Vd, dst); |
| 4662 get_q_register(Vn, src1); | 4685 get_neon_register(Vn, src1); |
| 4663 get_q_register(Vm, src2); | 4686 get_neon_register(Vm, src2); |
| 4664 for (int i = 0; i < 4; i++) { | 4687 for (int i = 0; i < 4; i++) { |
| 4665 dst[i] = (dst[i] & src1[i]) | (~dst[i] & src2[i]); | 4688 dst[i] = (dst[i] & src1[i]) | (~dst[i] & src2[i]); |
| 4666 } | 4689 } |
| 4667 set_q_register(Vd, dst); | 4690 set_neon_register(Vd, dst); |
| 4668 } else if (instr->Bits(21, 20) == 0 && instr->Bit(4) == 1) { | 4691 } else if (instr->Bits(21, 20) == 0 && instr->Bit(4) == 1) { |
| 4669 if (instr->Bit(6) == 0) { | 4692 if (instr->Bit(6) == 0) { |
| 4670 // veor Dd, Dn, Dm | 4693 // veor Dd, Dn, Dm |
| 4671 uint64_t src1, src2; | 4694 uint64_t src1, src2; |
| 4672 get_d_register(Vn, &src1); | 4695 get_d_register(Vn, &src1); |
| 4673 get_d_register(Vm, &src2); | 4696 get_d_register(Vm, &src2); |
| 4674 src1 ^= src2; | 4697 src1 ^= src2; |
| 4675 set_d_register(Vd, &src1); | 4698 set_d_register(Vd, &src1); |
| 4676 | 4699 |
| 4677 } else { | 4700 } else { |
| 4678 // veor Qd, Qn, Qm | 4701 // veor Qd, Qn, Qm |
| 4679 uint32_t src1[4], src2[4]; | 4702 uint32_t src1[4], src2[4]; |
| 4680 get_q_register(Vn, src1); | 4703 get_neon_register(Vn, src1); |
| 4681 get_q_register(Vm, src2); | 4704 get_neon_register(Vm, src2); |
| 4682 for (int i = 0; i < 4; i++) src1[i] ^= src2[i]; | 4705 for (int i = 0; i < 4; i++) src1[i] ^= src2[i]; |
| 4683 set_q_register(Vd, src1); | 4706 set_neon_register(Vd, src1); |
| 4684 } | 4707 } |
| 4685 } else { | 4708 } else { |
| 4686 UNIMPLEMENTED(); | 4709 UNIMPLEMENTED(); |
| 4687 } | 4710 } |
| 4688 break; | 4711 break; |
| 4689 } | 4712 } |
| 4690 case 0x2: { | 4713 case 0x2: { |
| 4691 if (instr->Bit(4) == 1) { | 4714 if (instr->Bit(4) == 1) { |
| 4692 // vqsub.u<size> Qd, Qm, Qn. | 4715 // vqsub.u<size> Qd, Qm, Qn. |
| 4693 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); | 4716 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
| (...skipping 14 matching lines...) Expand all Loading... |
| 4708 } else { | 4731 } else { |
| 4709 UNIMPLEMENTED(); | 4732 UNIMPLEMENTED(); |
| 4710 } | 4733 } |
| 4711 break; | 4734 break; |
| 4712 } | 4735 } |
| 4713 case 0x3: { | 4736 case 0x3: { |
| 4714 // vcge/vcgt.u<size> Qd, Qm, Qn. | 4737 // vcge/vcgt.u<size> Qd, Qm, Qn. |
| 4715 bool ge = instr->Bit(4) == 1; | 4738 bool ge = instr->Bit(4) == 1; |
| 4716 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); | 4739 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
| 4717 switch (size) { | 4740 switch (size) { |
| 4718 case Neon8: { | 4741 case Neon8: |
| 4719 uint8_t src1[16], src2[16]; | 4742 CompareGreater<uint8_t, kSimd128Size>(this, Vd, Vm, Vn, ge); |
| 4720 get_q_register(Vn, src1); | |
| 4721 get_q_register(Vm, src2); | |
| 4722 for (int i = 0; i < 16; i++) { | |
| 4723 if (ge) | |
| 4724 src1[i] = src1[i] >= src2[i] ? 0xFFu : 0; | |
| 4725 else | |
| 4726 src1[i] = src1[i] > src2[i] ? 0xFFu : 0; | |
| 4727 } | |
| 4728 set_q_register(Vd, src1); | |
| 4729 break; | 4743 break; |
| 4730 } | 4744 case Neon16: |
| 4731 case Neon16: { | 4745 CompareGreater<uint16_t, kSimd128Size>(this, Vd, Vm, Vn, ge); |
| 4732 uint16_t src1[8], src2[8]; | |
| 4733 get_q_register(Vn, src1); | |
| 4734 get_q_register(Vm, src2); | |
| 4735 for (int i = 0; i < 8; i++) { | |
| 4736 if (ge) | |
| 4737 src1[i] = src1[i] >= src2[i] ? 0xFFFFu : 0; | |
| 4738 else | |
| 4739 src1[i] = src1[i] > src2[i] ? 0xFFFFu : 0; | |
| 4740 } | |
| 4741 set_q_register(Vd, src1); | |
| 4742 break; | 4746 break; |
| 4743 } | 4747 case Neon32: |
| 4744 case Neon32: { | 4748 CompareGreater<uint32_t, kSimd128Size>(this, Vd, Vm, Vn, ge); |
| 4745 uint32_t src1[4], src2[4]; | |
| 4746 get_q_register(Vn, src1); | |
| 4747 get_q_register(Vm, src2); | |
| 4748 for (int i = 0; i < 4; i++) { | |
| 4749 if (ge) | |
| 4750 src1[i] = src1[i] >= src2[i] ? 0xFFFFFFFFu : 0; | |
| 4751 else | |
| 4752 src1[i] = src1[i] > src2[i] ? 0xFFFFFFFFu : 0; | |
| 4753 } | |
| 4754 set_q_register(Vd, src1); | |
| 4755 break; | 4749 break; |
| 4756 } | |
| 4757 default: | 4750 default: |
| 4758 UNREACHABLE(); | 4751 UNREACHABLE(); |
| 4759 break; | 4752 break; |
| 4760 } | 4753 } |
| 4761 break; | 4754 break; |
| 4762 } | 4755 } |
| 4763 case 0x6: { | 4756 case 0x6: { |
| 4764 // vmin/vmax.u<size> Qd, Qm, Qn. | 4757 // vmin/vmax.u<size> Qd, Qm, Qn. |
| 4765 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); | 4758 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
| 4766 bool min = instr->Bit(4) != 0; | 4759 bool min = instr->Bit(4) != 0; |
| 4767 switch (size) { | 4760 switch (size) { |
| 4768 case Neon8: { | 4761 case Neon8: |
| 4769 uint8_t src1[16], src2[16]; | 4762 MinMax<uint8_t, kSimd128Size>(this, Vd, Vm, Vn, min); |
| 4770 get_q_register(Vn, src1); | |
| 4771 get_q_register(Vm, src2); | |
| 4772 for (int i = 0; i < 16; i++) { | |
| 4773 src1[i] = MinMax(src1[i], src2[i], min); | |
| 4774 } | |
| 4775 set_q_register(Vd, src1); | |
| 4776 break; | 4763 break; |
| 4777 } | 4764 case Neon16: |
| 4778 case Neon16: { | 4765 MinMax<uint16_t, kSimd128Size>(this, Vd, Vm, Vn, min); |
| 4779 uint16_t src1[8], src2[8]; | |
| 4780 get_q_register(Vn, src1); | |
| 4781 get_q_register(Vm, src2); | |
| 4782 for (int i = 0; i < 8; i++) { | |
| 4783 src1[i] = MinMax(src1[i], src2[i], min); | |
| 4784 } | |
| 4785 set_q_register(Vd, src1); | |
| 4786 break; | 4766 break; |
| 4787 } | 4767 case Neon32: |
| 4788 case Neon32: { | 4768 MinMax<uint32_t, kSimd128Size>(this, Vd, Vm, Vn, min); |
| 4789 uint32_t src1[4], src2[4]; | |
| 4790 get_q_register(Vn, src1); | |
| 4791 get_q_register(Vm, src2); | |
| 4792 for (int i = 0; i < 4; i++) { | |
| 4793 src1[i] = MinMax(src1[i], src2[i], min); | |
| 4794 } | |
| 4795 set_q_register(Vd, src1); | |
| 4796 break; | 4769 break; |
| 4797 } | |
| 4798 default: | 4770 default: |
| 4799 UNREACHABLE(); | 4771 UNREACHABLE(); |
| 4800 break; | 4772 break; |
| 4801 } | 4773 } |
| 4802 break; | 4774 break; |
| 4803 } | 4775 } |
| 4804 case 0x8: { | 4776 case 0x8: { |
| 4805 if (instr->Bit(4) == 0) { | 4777 if (instr->Bit(4) == 0) { |
| 4806 // vsub.size Qd, Qm, Qn. | 4778 // vsub.size Qd, Qm, Qn. |
| 4807 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); | 4779 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
| 4808 switch (size) { | 4780 switch (size) { |
| 4809 case Neon8: { | 4781 case Neon8: |
| 4810 uint8_t src1[16], src2[16]; | 4782 Sub<uint8_t, kSimd128Size>(this, Vd, Vm, Vn); |
| 4811 get_q_register(Vn, src1); | |
| 4812 get_q_register(Vm, src2); | |
| 4813 for (int i = 0; i < 16; i++) { | |
| 4814 src1[i] -= src2[i]; | |
| 4815 } | |
| 4816 set_q_register(Vd, src1); | |
| 4817 break; | 4783 break; |
| 4818 } | 4784 case Neon16: |
| 4819 case Neon16: { | 4785 Sub<uint16_t, kSimd128Size>(this, Vd, Vm, Vn); |
| 4820 uint16_t src1[8], src2[8]; | |
| 4821 get_q_register(Vn, src1); | |
| 4822 get_q_register(Vm, src2); | |
| 4823 for (int i = 0; i < 8; i++) { | |
| 4824 src1[i] -= src2[i]; | |
| 4825 } | |
| 4826 set_q_register(Vd, src1); | |
| 4827 break; | 4786 break; |
| 4828 } | 4787 case Neon32: |
| 4829 case Neon32: { | 4788 Sub<uint32_t, kSimd128Size>(this, Vd, Vm, Vn); |
| 4830 uint32_t src1[4], src2[4]; | |
| 4831 get_q_register(Vn, src1); | |
| 4832 get_q_register(Vm, src2); | |
| 4833 for (int i = 0; i < 4; i++) { | |
| 4834 src1[i] -= src2[i]; | |
| 4835 } | |
| 4836 set_q_register(Vd, src1); | |
| 4837 break; | 4789 break; |
| 4838 } | |
| 4839 default: | 4790 default: |
| 4840 UNREACHABLE(); | 4791 UNREACHABLE(); |
| 4841 break; | 4792 break; |
| 4842 } | 4793 } |
| 4843 } else { | 4794 } else { |
| 4844 // vceq.size Qd, Qm, Qn. | 4795 // vceq.size Qd, Qm, Qn. |
| 4845 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); | 4796 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
| 4846 switch (size) { | 4797 switch (size) { |
| 4847 case Neon8: { | 4798 case Neon8: |
| 4848 uint8_t src1[16], src2[16]; | 4799 CompareEqual<uint8_t, kSimd128Size>(this, Vd, Vm, Vn); |
| 4849 get_q_register(Vn, src1); | |
| 4850 get_q_register(Vm, src2); | |
| 4851 for (int i = 0; i < 16; i++) { | |
| 4852 src1[i] = (src1[i] == src2[i]) ? 0xFFu : 0; | |
| 4853 } | |
| 4854 set_q_register(Vd, src1); | |
| 4855 break; | 4800 break; |
| 4856 } | 4801 case Neon16: |
| 4857 case Neon16: { | 4802 CompareEqual<uint16_t, kSimd128Size>(this, Vd, Vm, Vn); |
| 4858 uint16_t src1[8], src2[8]; | |
| 4859 get_q_register(Vn, src1); | |
| 4860 get_q_register(Vm, src2); | |
| 4861 for (int i = 0; i < 8; i++) { | |
| 4862 src1[i] = (src1[i] == src2[i]) ? 0xFFFFu : 0; | |
| 4863 } | |
| 4864 set_q_register(Vd, src1); | |
| 4865 break; | 4803 break; |
| 4866 } | 4804 case Neon32: |
| 4867 case Neon32: { | 4805 CompareEqual<uint32_t, kSimd128Size>(this, Vd, Vm, Vn); |
| 4868 uint32_t src1[4], src2[4]; | |
| 4869 get_q_register(Vn, src1); | |
| 4870 get_q_register(Vm, src2); | |
| 4871 for (int i = 0; i < 4; i++) { | |
| 4872 src1[i] = (src1[i] == src2[i]) ? 0xFFFFFFFFu : 0; | |
| 4873 } | |
| 4874 set_q_register(Vd, src1); | |
| 4875 break; | 4806 break; |
| 4876 } | |
| 4877 default: | 4807 default: |
| 4878 UNREACHABLE(); | 4808 UNREACHABLE(); |
| 4879 break; | 4809 break; |
| 4880 } | 4810 } |
| 4881 } | 4811 } |
| 4882 break; | 4812 break; |
| 4883 } | 4813 } |
| 4884 case 0xa: { | 4814 case 0xa: { |
| 4885 // vpmin/vpmax.u<size> Dd, Dm, Dn. | 4815 // vpmin/vpmax.u<size> Dd, Dm, Dn. |
| 4886 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); | 4816 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
| 4887 bool min = instr->Bit(4) != 0; | 4817 bool min = instr->Bit(4) != 0; |
| 4888 switch (size) { | 4818 switch (size) { |
| 4889 case Neon8: { | 4819 case Neon8: |
| 4890 uint8_t dst[8], src1[8], src2[8]; | 4820 PairwiseMinMax<uint8_t>(this, Vd, Vm, Vn, min); |
| 4891 get_d_register(Vn, src1); | |
| 4892 get_d_register(Vm, src2); | |
| 4893 for (int i = 0; i < 4; i++) { | |
| 4894 dst[i + 0] = MinMax(src1[i * 2], src1[i * 2 + 1], min); | |
| 4895 dst[i + 4] = MinMax(src2[i * 2], src2[i * 2 + 1], min); | |
| 4896 } | |
| 4897 set_d_register(Vd, dst); | |
| 4898 break; | 4821 break; |
| 4899 } | 4822 case Neon16: |
| 4900 case Neon16: { | 4823 PairwiseMinMax<uint16_t>(this, Vd, Vm, Vn, min); |
| 4901 uint16_t dst[4], src1[4], src2[4]; | |
| 4902 get_d_register(Vn, src1); | |
| 4903 get_d_register(Vm, src2); | |
| 4904 for (int i = 0; i < 2; i++) { | |
| 4905 dst[i + 0] = MinMax(src1[i * 2], src1[i * 2 + 1], min); | |
| 4906 dst[i + 2] = MinMax(src2[i * 2], src2[i * 2 + 1], min); | |
| 4907 } | |
| 4908 set_d_register(Vd, dst); | |
| 4909 break; | 4824 break; |
| 4910 } | 4825 case Neon32: |
| 4911 case Neon32: { | 4826 PairwiseMinMax<uint32_t>(this, Vd, Vm, Vn, min); |
| 4912 uint32_t dst[2], src1[2], src2[2]; | |
| 4913 get_d_register(Vn, src1); | |
| 4914 get_d_register(Vm, src2); | |
| 4915 dst[0] = MinMax(src1[0], src1[1], min); | |
| 4916 dst[1] = MinMax(src2[0], src2[1], min); | |
| 4917 set_d_register(Vd, dst); | |
| 4918 break; | 4827 break; |
| 4919 } | |
| 4920 default: | 4828 default: |
| 4921 UNREACHABLE(); | 4829 UNREACHABLE(); |
| 4922 break; | 4830 break; |
| 4923 } | 4831 } |
| 4924 break; | 4832 break; |
| 4925 } | 4833 } |
| 4926 case 0xd: { | 4834 case 0xd: { |
| 4927 if (instr->Bit(21) == 0 && instr->Bit(6) == 1 && instr->Bit(4) == 1) { | 4835 if (instr->Bit(21) == 0 && instr->Bit(6) == 1 && instr->Bit(4) == 1) { |
| 4928 // vmul.f32 Qd, Qn, Qm | 4836 // vmul.f32 Qd, Qn, Qm |
| 4929 float src1[4], src2[4]; | 4837 float src1[4], src2[4]; |
| 4930 get_q_register(Vn, src1); | 4838 get_neon_register(Vn, src1); |
| 4931 get_q_register(Vm, src2); | 4839 get_neon_register(Vm, src2); |
| 4932 for (int i = 0; i < 4; i++) { | 4840 for (int i = 0; i < 4; i++) { |
| 4933 src1[i] = src1[i] * src2[i]; | 4841 src1[i] = src1[i] * src2[i]; |
| 4934 } | 4842 } |
| 4935 set_q_register(Vd, src1); | 4843 set_neon_register(Vd, src1); |
| 4936 } else { | 4844 } else { |
| 4937 UNIMPLEMENTED(); | 4845 UNIMPLEMENTED(); |
| 4938 } | 4846 } |
| 4939 break; | 4847 break; |
| 4940 } | 4848 } |
| 4941 case 0xe: { | 4849 case 0xe: { |
| 4942 if (instr->Bit(20) == 0 && instr->Bit(4) == 0) { | 4850 if (instr->Bit(20) == 0 && instr->Bit(4) == 0) { |
| 4943 // vcge/vcgt.f32 Qd, Qm, Qn | 4851 // vcge/vcgt.f32 Qd, Qm, Qn |
| 4944 bool ge = instr->Bit(21) == 0; | 4852 bool ge = instr->Bit(21) == 0; |
| 4945 float src1[4], src2[4]; | 4853 float src1[4], src2[4]; |
| 4946 get_q_register(Vn, src1); | 4854 get_neon_register(Vn, src1); |
| 4947 get_q_register(Vm, src2); | 4855 get_neon_register(Vm, src2); |
| 4948 uint32_t dst[4]; | 4856 uint32_t dst[4]; |
| 4949 for (int i = 0; i < 4; i++) { | 4857 for (int i = 0; i < 4; i++) { |
| 4950 if (ge) { | 4858 if (ge) { |
| 4951 dst[i] = src1[i] >= src2[i] ? 0xFFFFFFFFu : 0; | 4859 dst[i] = src1[i] >= src2[i] ? 0xFFFFFFFFu : 0; |
| 4952 } else { | 4860 } else { |
| 4953 dst[i] = src1[i] > src2[i] ? 0xFFFFFFFFu : 0; | 4861 dst[i] = src1[i] > src2[i] ? 0xFFFFFFFFu : 0; |
| 4954 } | 4862 } |
| 4955 } | 4863 } |
| 4956 set_q_register(Vd, dst); | 4864 set_neon_register(Vd, dst); |
| 4957 } else { | 4865 } else { |
| 4958 UNIMPLEMENTED(); | 4866 UNIMPLEMENTED(); |
| 4959 } | 4867 } |
| 4960 break; | 4868 break; |
| 4961 } | 4869 } |
| 4962 default: | 4870 default: |
| 4963 UNREACHABLE(); | 4871 UNREACHABLE(); |
| 4964 break; | 4872 break; |
| 4965 } | 4873 } |
| 4966 break; | 4874 break; |
| (...skipping 20 matching lines...) Expand all Loading... |
| 4987 UNIMPLEMENTED(); | 4895 UNIMPLEMENTED(); |
| 4988 break; | 4896 break; |
| 4989 } | 4897 } |
| 4990 } else if (instr->Opc1Value() == 7 && instr->Bit(4) == 0) { | 4898 } else if (instr->Opc1Value() == 7 && instr->Bit(4) == 0) { |
| 4991 if (instr->Bits(19, 16) == 0xB && instr->Bits(11, 9) == 0x3 && | 4899 if (instr->Bits(19, 16) == 0xB && instr->Bits(11, 9) == 0x3 && |
| 4992 instr->Bit(6) == 1) { | 4900 instr->Bit(6) == 1) { |
| 4993 // vcvt.<Td>.<Tm> Qd, Qm. | 4901 // vcvt.<Td>.<Tm> Qd, Qm. |
| 4994 int Vd = instr->VFPDRegValue(kSimd128Precision); | 4902 int Vd = instr->VFPDRegValue(kSimd128Precision); |
| 4995 int Vm = instr->VFPMRegValue(kSimd128Precision); | 4903 int Vm = instr->VFPMRegValue(kSimd128Precision); |
| 4996 uint32_t q_data[4]; | 4904 uint32_t q_data[4]; |
| 4997 get_q_register(Vm, q_data); | 4905 get_neon_register(Vm, q_data); |
| 4998 int op = instr->Bits(8, 7); | 4906 int op = instr->Bits(8, 7); |
| 4999 for (int i = 0; i < 4; i++) { | 4907 for (int i = 0; i < 4; i++) { |
| 5000 switch (op) { | 4908 switch (op) { |
| 5001 case 0: | 4909 case 0: |
| 5002 // f32 <- s32, round towards nearest. | 4910 // f32 <- s32, round towards nearest. |
| 5003 q_data[i] = bit_cast<uint32_t>(std::round( | 4911 q_data[i] = bit_cast<uint32_t>(std::round( |
| 5004 static_cast<float>(bit_cast<int32_t>(q_data[i])))); | 4912 static_cast<float>(bit_cast<int32_t>(q_data[i])))); |
| 5005 break; | 4913 break; |
| 5006 case 1: | 4914 case 1: |
| 5007 // f32 <- u32, round towards nearest. | 4915 // f32 <- u32, round towards nearest. |
| 5008 q_data[i] = bit_cast<uint32_t>( | 4916 q_data[i] = bit_cast<uint32_t>( |
| 5009 std::round(static_cast<float>(q_data[i]))); | 4917 std::round(static_cast<float>(q_data[i]))); |
| 5010 break; | 4918 break; |
| 5011 case 2: | 4919 case 2: |
| 5012 // s32 <- f32, round to zero. | 4920 // s32 <- f32, round to zero. |
| 5013 q_data[i] = static_cast<uint32_t>( | 4921 q_data[i] = static_cast<uint32_t>( |
| 5014 ConvertDoubleToInt(bit_cast<float>(q_data[i]), false, RZ)); | 4922 ConvertDoubleToInt(bit_cast<float>(q_data[i]), false, RZ)); |
| 5015 break; | 4923 break; |
| 5016 case 3: | 4924 case 3: |
| 5017 // u32 <- f32, round to zero. | 4925 // u32 <- f32, round to zero. |
| 5018 q_data[i] = static_cast<uint32_t>( | 4926 q_data[i] = static_cast<uint32_t>( |
| 5019 ConvertDoubleToInt(bit_cast<float>(q_data[i]), true, RZ)); | 4927 ConvertDoubleToInt(bit_cast<float>(q_data[i]), true, RZ)); |
| 5020 break; | 4928 break; |
| 5021 } | 4929 } |
| 5022 } | 4930 } |
| 5023 set_q_register(Vd, q_data); | 4931 set_neon_register(Vd, q_data); |
| 5024 } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 7) == 0) { | 4932 } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 7) == 0) { |
| 5025 if (instr->Bit(6) == 0) { | 4933 if (instr->Bit(6) == 0) { |
| 5026 // vswp Dd, Dm. | 4934 // vswp Dd, Dm. |
| 5027 uint64_t dval, mval; | 4935 uint64_t dval, mval; |
| 5028 int vd = instr->VFPDRegValue(kDoublePrecision); | 4936 int vd = instr->VFPDRegValue(kDoublePrecision); |
| 5029 int vm = instr->VFPMRegValue(kDoublePrecision); | 4937 int vm = instr->VFPMRegValue(kDoublePrecision); |
| 5030 get_d_register(vd, &dval); | 4938 get_d_register(vd, &dval); |
| 5031 get_d_register(vm, &mval); | 4939 get_d_register(vm, &mval); |
| 5032 set_d_register(vm, &dval); | 4940 set_d_register(vm, &dval); |
| 5033 set_d_register(vd, &mval); | 4941 set_d_register(vd, &mval); |
| 5034 } else { | 4942 } else { |
| 5035 // vswp Qd, Qm. | 4943 // vswp Qd, Qm. |
| 5036 uint32_t dval[4], mval[4]; | 4944 uint32_t dval[4], mval[4]; |
| 5037 int vd = instr->VFPDRegValue(kSimd128Precision); | 4945 int vd = instr->VFPDRegValue(kSimd128Precision); |
| 5038 int vm = instr->VFPMRegValue(kSimd128Precision); | 4946 int vm = instr->VFPMRegValue(kSimd128Precision); |
| 5039 get_q_register(vd, dval); | 4947 get_neon_register(vd, dval); |
| 5040 get_q_register(vm, mval); | 4948 get_neon_register(vm, mval); |
| 5041 set_q_register(vm, dval); | 4949 set_neon_register(vm, dval); |
| 5042 set_q_register(vd, mval); | 4950 set_neon_register(vd, mval); |
| 5043 } | 4951 } |
| 5044 } else if (instr->Bits(11, 7) == 0x18) { | 4952 } else if (instr->Bits(11, 7) == 0x18) { |
| 5045 // vdup.32 Qd, Sm. | 4953 // vdup.32 Qd, Sm. |
| 5046 int vd = instr->VFPDRegValue(kSimd128Precision); | 4954 int vd = instr->VFPDRegValue(kSimd128Precision); |
| 5047 int vm = instr->VFPMRegValue(kDoublePrecision); | 4955 int vm = instr->VFPMRegValue(kDoublePrecision); |
| 5048 int index = instr->Bit(19); | 4956 int index = instr->Bit(19); |
| 5049 uint32_t s_data = get_s_register(vm * 2 + index); | 4957 uint32_t s_data = get_s_register(vm * 2 + index); |
| 5050 uint32_t q_data[4]; | 4958 uint32_t q_data[4]; |
| 5051 for (int i = 0; i < 4; i++) q_data[i] = s_data; | 4959 for (int i = 0; i < 4; i++) q_data[i] = s_data; |
| 5052 set_q_register(vd, q_data); | 4960 set_neon_register(vd, q_data); |
| 5053 } else if (instr->Bits(19, 16) == 0 && instr->Bits(11, 6) == 0x17) { | 4961 } else if (instr->Bits(19, 16) == 0 && instr->Bits(11, 6) == 0x17) { |
| 5054 // vmvn Qd, Qm. | 4962 // vmvn Qd, Qm. |
| 5055 int vd = instr->VFPDRegValue(kSimd128Precision); | 4963 int vd = instr->VFPDRegValue(kSimd128Precision); |
| 5056 int vm = instr->VFPMRegValue(kSimd128Precision); | 4964 int vm = instr->VFPMRegValue(kSimd128Precision); |
| 5057 uint32_t q_data[4]; | 4965 uint32_t q_data[4]; |
| 5058 get_q_register(vm, q_data); | 4966 get_neon_register(vm, q_data); |
| 5059 for (int i = 0; i < 4; i++) q_data[i] = ~q_data[i]; | 4967 for (int i = 0; i < 4; i++) q_data[i] = ~q_data[i]; |
| 5060 set_q_register(vd, q_data); | 4968 set_neon_register(vd, q_data); |
| 5061 } else if (instr->Bits(11, 10) == 0x2) { | 4969 } else if (instr->Bits(11, 10) == 0x2) { |
| 5062 // vtb[l,x] Dd, <list>, Dm. | 4970 // vtb[l,x] Dd, <list>, Dm. |
| 5063 int vd = instr->VFPDRegValue(kDoublePrecision); | 4971 int vd = instr->VFPDRegValue(kDoublePrecision); |
| 5064 int vn = instr->VFPNRegValue(kDoublePrecision); | 4972 int vn = instr->VFPNRegValue(kDoublePrecision); |
| 5065 int vm = instr->VFPMRegValue(kDoublePrecision); | 4973 int vm = instr->VFPMRegValue(kDoublePrecision); |
| 5066 int table_len = (instr->Bits(9, 8) + 1) * kDoubleSize; | 4974 int table_len = (instr->Bits(9, 8) + 1) * kDoubleSize; |
| 5067 bool vtbx = instr->Bit(6) != 0; // vtbl / vtbx | 4975 bool vtbx = instr->Bit(6) != 0; // vtbl / vtbx |
| 5068 uint64_t destination = 0, indices = 0, result = 0; | 4976 uint64_t destination = 0, indices = 0, result = 0; |
| 5069 get_d_register(vd, &destination); | 4977 get_d_register(vd, &destination); |
| 5070 get_d_register(vm, &indices); | 4978 get_d_register(vm, &indices); |
| 5071 for (int i = 0; i < kDoubleSize; i++) { | 4979 for (int i = 0; i < kDoubleSize; i++) { |
| 5072 int shift = i * kBitsPerByte; | 4980 int shift = i * kBitsPerByte; |
| 5073 int index = (indices >> shift) & 0xFF; | 4981 int index = (indices >> shift) & 0xFF; |
| 5074 if (index < table_len) { | 4982 if (index < table_len) { |
| 5075 uint64_t table; | 4983 uint64_t table; |
| 5076 get_d_register(vn + index / kDoubleSize, &table); | 4984 get_d_register(vn + index / kDoubleSize, &table); |
| 5077 result |= | 4985 result |= |
| 5078 ((table >> ((index % kDoubleSize) * kBitsPerByte)) & 0xFF) | 4986 ((table >> ((index % kDoubleSize) * kBitsPerByte)) & 0xFF) |
| 5079 << shift; | 4987 << shift; |
| 5080 } else if (vtbx) { | 4988 } else if (vtbx) { |
| 5081 result |= destination & (0xFFull << shift); | 4989 result |= destination & (0xFFull << shift); |
| 5082 } | 4990 } |
| 5083 } | 4991 } |
| 5084 set_d_register(vd, &result); | 4992 set_d_register(vd, &result); |
| 5085 } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 8) == 0x1 && | 4993 } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 8) == 0x1) { |
| 5086 instr->Bit(6) == 1) { | |
| 5087 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); | 4994 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); |
| 5088 int Vd = instr->VFPDRegValue(kSimd128Precision); | 4995 if (instr->Bit(6) == 0) { |
| 5089 int Vm = instr->VFPMRegValue(kSimd128Precision); | 4996 int Vd = instr->VFPDRegValue(kDoublePrecision); |
| 5090 if (instr->Bit(7) == 1) { | 4997 int Vm = instr->VFPMRegValue(kDoublePrecision); |
| 5091 // vzip.<size> Qd, Qm. | 4998 if (instr->Bit(7) == 1) { |
| 5092 switch (size) { | 4999 // vzip.<size> Dd, Dm. |
| 5093 case Neon8: { | 5000 switch (size) { |
| 5094 uint8_t src1[16], src2[16], dst1[16], dst2[16]; | 5001 case Neon8: |
| 5095 get_q_register(Vd, src1); | 5002 Zip<uint8_t, kDoubleSize>(this, Vd, Vm); |
| 5096 get_q_register(Vm, src2); | 5003 break; |
| 5097 for (int i = 0; i < 8; i++) { | 5004 case Neon16: |
| 5098 dst1[i * 2] = src1[i]; | 5005 Zip<uint16_t, kDoubleSize>(this, Vd, Vm); |
| 5099 dst1[i * 2 + 1] = src2[i]; | 5006 break; |
| 5100 dst2[i * 2] = src1[i + 8]; | 5007 case Neon32: |
| 5101 dst2[i * 2 + 1] = src2[i + 8]; | 5008 UNIMPLEMENTED(); |
| 5102 } | 5009 break; |
| 5103 set_q_register(Vd, dst1); | 5010 default: |
| 5104 set_q_register(Vm, dst2); | 5011 UNREACHABLE(); |
| 5105 break; | 5012 break; |
| 5106 } | 5013 } |
| 5107 case Neon16: { | 5014 } else { |
| 5108 uint16_t src1[8], src2[8], dst1[8], dst2[8]; | 5015 // vuzp.<size> Dd, Dm. |
| 5109 get_q_register(Vd, src1); | 5016 switch (size) { |
| 5110 get_q_register(Vm, src2); | 5017 case Neon8: |
| 5111 for (int i = 0; i < 4; i++) { | 5018 Unzip<uint8_t, kDoubleSize>(this, Vd, Vm); |
| 5112 dst1[i * 2] = src1[i]; | 5019 break; |
| 5113 dst1[i * 2 + 1] = src2[i]; | 5020 case Neon16: |
| 5114 dst2[i * 2] = src1[i + 4]; | 5021 Unzip<uint16_t, kDoubleSize>(this, Vd, Vm); |
| 5115 dst2[i * 2 + 1] = src2[i + 4]; | 5022 break; |
| 5116 } | 5023 case Neon32: |
| 5117 set_q_register(Vd, dst1); | 5024 UNIMPLEMENTED(); |
| 5118 set_q_register(Vm, dst2); | 5025 break; |
| 5119 break; | 5026 default: |
| 5027 UNREACHABLE(); |
| 5028 break; |
| 5120 } | 5029 } |
| 5121 case Neon32: { | |
| 5122 uint32_t src1[4], src2[4], dst1[4], dst2[4]; | |
| 5123 get_q_register(Vd, src1); | |
| 5124 get_q_register(Vm, src2); | |
| 5125 for (int i = 0; i < 2; i++) { | |
| 5126 dst1[i * 2] = src1[i]; | |
| 5127 dst1[i * 2 + 1] = src2[i]; | |
| 5128 dst2[i * 2] = src1[i + 2]; | |
| 5129 dst2[i * 2 + 1] = src2[i + 2]; | |
| 5130 } | |
| 5131 set_q_register(Vd, dst1); | |
| 5132 set_q_register(Vm, dst2); | |
| 5133 break; | |
| 5134 } | |
| 5135 default: | |
| 5136 UNREACHABLE(); | |
| 5137 break; | |
| 5138 } | 5030 } |
| 5139 } else { | 5031 } else { |
| 5140 // vuzp.<size> Qd, Qm. | 5032 int Vd = instr->VFPDRegValue(kSimd128Precision); |
| 5141 switch (size) { | 5033 int Vm = instr->VFPMRegValue(kSimd128Precision); |
| 5142 case Neon8: { | 5034 if (instr->Bit(7) == 1) { |
| 5143 uint8_t src1[16], src2[16], dst1[16], dst2[16]; | 5035 // vzip.<size> Qd, Qm. |
| 5144 get_q_register(Vd, src1); | 5036 switch (size) { |
| 5145 get_q_register(Vm, src2); | 5037 case Neon8: |
| 5146 for (int i = 0; i < 8; i++) { | 5038 Zip<uint8_t, kSimd128Size>(this, Vd, Vm); |
| 5147 dst1[i] = src1[i * 2]; | 5039 break; |
| 5148 dst1[i + 8] = src2[i * 2]; | 5040 case Neon16: |
| 5149 dst2[i] = src1[i * 2 + 1]; | 5041 Zip<uint16_t, kSimd128Size>(this, Vd, Vm); |
| 5150 dst2[i + 8] = src2[i * 2 + 1]; | 5042 break; |
| 5151 } | 5043 case Neon32: |
| 5152 set_q_register(Vd, dst1); | 5044 Zip<uint32_t, kSimd128Size>(this, Vd, Vm); |
| 5153 set_q_register(Vm, dst2); | 5045 break; |
| 5154 break; | 5046 default: |
| 5047 UNREACHABLE(); |
| 5048 break; |
| 5155 } | 5049 } |
| 5156 case Neon16: { | 5050 } else { |
| 5157 uint16_t src1[8], src2[8], dst1[8], dst2[8]; | 5051 // vuzp.<size> Qd, Qm. |
| 5158 get_q_register(Vd, src1); | 5052 switch (size) { |
| 5159 get_q_register(Vm, src2); | 5053 case Neon8: |
| 5160 for (int i = 0; i < 4; i++) { | 5054 Unzip<uint8_t, kSimd128Size>(this, Vd, Vm); |
| 5161 dst1[i] = src1[i * 2]; | 5055 break; |
| 5162 dst1[i + 4] = src2[i * 2]; | 5056 case Neon16: |
| 5163 dst2[i] = src1[i * 2 + 1]; | 5057 Unzip<uint16_t, kSimd128Size>(this, Vd, Vm); |
| 5164 dst2[i + 4] = src2[i * 2 + 1]; | 5058 break; |
| 5165 } | 5059 case Neon32: |
| 5166 set_q_register(Vd, dst1); | 5060 Unzip<uint32_t, kSimd128Size>(this, Vd, Vm); |
| 5167 set_q_register(Vm, dst2); | 5061 break; |
| 5168 break; | 5062 default: |
| 5063 UNREACHABLE(); |
| 5064 break; |
| 5169 } | 5065 } |
| 5170 case Neon32: { | |
| 5171 uint32_t src1[4], src2[4], dst1[4], dst2[4]; | |
| 5172 get_q_register(Vd, src1); | |
| 5173 get_q_register(Vm, src2); | |
| 5174 for (int i = 0; i < 2; i++) { | |
| 5175 dst1[i] = src1[i * 2]; | |
| 5176 dst1[i + 2] = src2[i * 2]; | |
| 5177 dst2[i] = src1[i * 2 + 1]; | |
| 5178 dst2[i + 2] = src2[i * 2 + 1]; | |
| 5179 } | |
| 5180 set_q_register(Vd, dst1); | |
| 5181 set_q_register(Vm, dst2); | |
| 5182 break; | |
| 5183 } | |
| 5184 default: | |
| 5185 UNREACHABLE(); | |
| 5186 break; | |
| 5187 } | 5066 } |
| 5188 } | 5067 } |
| 5189 } else if (instr->Bits(17, 16) == 0 && instr->Bits(11, 9) == 0) { | 5068 } else if (instr->Bits(17, 16) == 0 && instr->Bits(11, 9) == 0) { |
| 5190 // vrev<op>.size Qd, Qm | 5069 // vrev<op>.size Qd, Qm |
| 5191 int Vd = instr->VFPDRegValue(kSimd128Precision); | 5070 int Vd = instr->VFPDRegValue(kSimd128Precision); |
| 5192 int Vm = instr->VFPMRegValue(kSimd128Precision); | 5071 int Vm = instr->VFPMRegValue(kSimd128Precision); |
| 5193 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); | 5072 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); |
| 5194 NeonSize op = static_cast<NeonSize>(static_cast<int>(Neon64) - | 5073 NeonSize op = static_cast<NeonSize>(static_cast<int>(Neon64) - |
| 5195 instr->Bits(8, 7)); | 5074 instr->Bits(8, 7)); |
| 5196 switch (op) { | 5075 switch (op) { |
| 5197 case Neon16: { | 5076 case Neon16: { |
| 5198 DCHECK_EQ(Neon8, size); | 5077 DCHECK_EQ(Neon8, size); |
| 5199 uint8_t src[16]; | 5078 uint8_t src[16]; |
| 5200 get_q_register(Vm, src); | 5079 get_neon_register(Vm, src); |
| 5201 for (int i = 0; i < 16; i += 2) { | 5080 for (int i = 0; i < 16; i += 2) { |
| 5202 std::swap(src[i], src[i + 1]); | 5081 std::swap(src[i], src[i + 1]); |
| 5203 } | 5082 } |
| 5204 set_q_register(Vd, src); | 5083 set_neon_register(Vd, src); |
| 5205 break; | 5084 break; |
| 5206 } | 5085 } |
| 5207 case Neon32: { | 5086 case Neon32: { |
| 5208 switch (size) { | 5087 switch (size) { |
| 5209 case Neon16: { | 5088 case Neon16: { |
| 5210 uint16_t src[8]; | 5089 uint16_t src[8]; |
| 5211 get_q_register(Vm, src); | 5090 get_neon_register(Vm, src); |
| 5212 for (int i = 0; i < 8; i += 2) { | 5091 for (int i = 0; i < 8; i += 2) { |
| 5213 std::swap(src[i], src[i + 1]); | 5092 std::swap(src[i], src[i + 1]); |
| 5214 } | 5093 } |
| 5215 set_q_register(Vd, src); | 5094 set_neon_register(Vd, src); |
| 5216 break; | 5095 break; |
| 5217 } | 5096 } |
| 5218 case Neon8: { | 5097 case Neon8: { |
| 5219 uint8_t src[16]; | 5098 uint8_t src[16]; |
| 5220 get_q_register(Vm, src); | 5099 get_neon_register(Vm, src); |
| 5221 for (int i = 0; i < 4; i++) { | 5100 for (int i = 0; i < 4; i++) { |
| 5222 std::swap(src[i * 4], src[i * 4 + 3]); | 5101 std::swap(src[i * 4], src[i * 4 + 3]); |
| 5223 std::swap(src[i * 4 + 1], src[i * 4 + 2]); | 5102 std::swap(src[i * 4 + 1], src[i * 4 + 2]); |
| 5224 } | 5103 } |
| 5225 set_q_register(Vd, src); | 5104 set_neon_register(Vd, src); |
| 5226 break; | 5105 break; |
| 5227 } | 5106 } |
| 5228 default: | 5107 default: |
| 5229 UNREACHABLE(); | 5108 UNREACHABLE(); |
| 5230 break; | 5109 break; |
| 5231 } | 5110 } |
| 5232 break; | 5111 break; |
| 5233 } | 5112 } |
| 5234 case Neon64: { | 5113 case Neon64: { |
| 5235 switch (size) { | 5114 switch (size) { |
| 5236 case Neon32: { | 5115 case Neon32: { |
| 5237 uint32_t src[4]; | 5116 uint32_t src[4]; |
| 5238 get_q_register(Vm, src); | 5117 get_neon_register(Vm, src); |
| 5239 std::swap(src[0], src[1]); | 5118 std::swap(src[0], src[1]); |
| 5240 std::swap(src[2], src[3]); | 5119 std::swap(src[2], src[3]); |
| 5241 set_q_register(Vd, src); | 5120 set_neon_register(Vd, src); |
| 5242 break; | 5121 break; |
| 5243 } | 5122 } |
| 5244 case Neon16: { | 5123 case Neon16: { |
| 5245 uint16_t src[8]; | 5124 uint16_t src[8]; |
| 5246 get_q_register(Vm, src); | 5125 get_neon_register(Vm, src); |
| 5247 for (int i = 0; i < 4; i++) { | 5126 for (int i = 0; i < 4; i++) { |
| 5248 std::swap(src[i * 4], src[i * 4 + 3]); | 5127 std::swap(src[i * 4], src[i * 4 + 3]); |
| 5249 std::swap(src[i * 4 + 1], src[i * 4 + 2]); | 5128 std::swap(src[i * 4 + 1], src[i * 4 + 2]); |
| 5250 } | 5129 } |
| 5251 set_q_register(Vd, src); | 5130 set_neon_register(Vd, src); |
| 5252 break; | 5131 break; |
| 5253 } | 5132 } |
| 5254 case Neon8: { | 5133 case Neon8: { |
| 5255 uint8_t src[16]; | 5134 uint8_t src[16]; |
| 5256 get_q_register(Vm, src); | 5135 get_neon_register(Vm, src); |
| 5257 for (int i = 0; i < 4; i++) { | 5136 for (int i = 0; i < 4; i++) { |
| 5258 std::swap(src[i], src[7 - i]); | 5137 std::swap(src[i], src[7 - i]); |
| 5259 std::swap(src[i + 8], src[15 - i]); | 5138 std::swap(src[i + 8], src[15 - i]); |
| 5260 } | 5139 } |
| 5261 set_q_register(Vd, src); | 5140 set_neon_register(Vd, src); |
| 5262 break; | 5141 break; |
| 5263 } | 5142 } |
| 5264 default: | 5143 default: |
| 5265 UNREACHABLE(); | 5144 UNREACHABLE(); |
| 5266 break; | 5145 break; |
| 5267 } | 5146 } |
| 5268 break; | 5147 break; |
| 5269 } | 5148 } |
| 5270 default: | 5149 default: |
| 5271 UNREACHABLE(); | 5150 UNREACHABLE(); |
| 5272 break; | 5151 break; |
| 5273 } | 5152 } |
| 5274 } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 6) == 0x3) { | 5153 } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 7) == 0x1) { |
| 5275 int Vd = instr->VFPDRegValue(kSimd128Precision); | |
| 5276 int Vm = instr->VFPMRegValue(kSimd128Precision); | |
| 5277 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); | 5154 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); |
| 5278 // vtrn.<size> Qd, Qm. | 5155 if (instr->Bit(6) == 0) { |
| 5279 switch (size) { | 5156 int Vd = instr->VFPDRegValue(kDoublePrecision); |
| 5280 case Neon8: { | 5157 int Vm = instr->VFPMRegValue(kDoublePrecision); |
| 5281 uint8_t src[16], dst[16]; | 5158 // vtrn.<size> Dd, Dm. |
| 5282 get_q_register(Vd, dst); | 5159 switch (size) { |
| 5283 get_q_register(Vm, src); | 5160 case Neon8: |
| 5284 for (int i = 0; i < 8; i++) { | 5161 Transpose<uint8_t, kDoubleSize>(this, Vd, Vm); |
| 5285 std::swap(dst[2 * i + 1], src[2 * i]); | 5162 break; |
| 5286 } | 5163 case Neon16: |
| 5287 set_q_register(Vd, dst); | 5164 Transpose<uint16_t, kDoubleSize>(this, Vd, Vm); |
| 5288 set_q_register(Vm, src); | 5165 break; |
| 5289 break; | 5166 case Neon32: |
| 5167 Transpose<uint32_t, kDoubleSize>(this, Vd, Vm); |
| 5168 break; |
| 5169 default: |
| 5170 UNREACHABLE(); |
| 5171 break; |
| 5290 } | 5172 } |
| 5291 case Neon16: { | 5173 } else { |
| 5292 uint16_t src[8], dst[8]; | 5174 int Vd = instr->VFPDRegValue(kSimd128Precision); |
| 5293 get_q_register(Vd, dst); | 5175 int Vm = instr->VFPMRegValue(kSimd128Precision); |
| 5294 get_q_register(Vm, src); | 5176 // vtrn.<size> Qd, Qm. |
| 5295 for (int i = 0; i < 4; i++) { | 5177 switch (size) { |
| 5296 std::swap(dst[2 * i + 1], src[2 * i]); | 5178 case Neon8: |
| 5297 } | 5179 Transpose<uint8_t, kSimd128Size>(this, Vd, Vm); |
| 5298 set_q_register(Vd, dst); | 5180 break; |
| 5299 set_q_register(Vm, src); | 5181 case Neon16: |
| 5300 break; | 5182 Transpose<uint16_t, kSimd128Size>(this, Vd, Vm); |
| 5183 break; |
| 5184 case Neon32: |
| 5185 Transpose<uint32_t, kSimd128Size>(this, Vd, Vm); |
| 5186 break; |
| 5187 default: |
| 5188 UNREACHABLE(); |
| 5189 break; |
| 5301 } | 5190 } |
| 5302 case Neon32: { | |
| 5303 uint32_t src[4], dst[4]; | |
| 5304 get_q_register(Vd, dst); | |
| 5305 get_q_register(Vm, src); | |
| 5306 for (int i = 0; i < 2; i++) { | |
| 5307 std::swap(dst[2 * i + 1], src[2 * i]); | |
| 5308 } | |
| 5309 set_q_register(Vd, dst); | |
| 5310 set_q_register(Vm, src); | |
| 5311 break; | |
| 5312 } | |
| 5313 default: | |
| 5314 UNREACHABLE(); | |
| 5315 break; | |
| 5316 } | 5191 } |
| 5317 } else if (instr->Bits(17, 16) == 0x1 && instr->Bit(11) == 0) { | 5192 } else if (instr->Bits(17, 16) == 0x1 && instr->Bit(11) == 0) { |
| 5318 int Vd = instr->VFPDRegValue(kSimd128Precision); | 5193 int Vd = instr->VFPDRegValue(kSimd128Precision); |
| 5319 int Vm = instr->VFPMRegValue(kSimd128Precision); | 5194 int Vm = instr->VFPMRegValue(kSimd128Precision); |
| 5320 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); | 5195 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); |
| 5321 if (instr->Bits(9, 6) == 0xd) { | 5196 if (instr->Bits(9, 6) == 0xd) { |
| 5322 // vabs<type>.<size> Qd, Qm | 5197 // vabs<type>.<size> Qd, Qm |
| 5323 if (instr->Bit(10) != 0) { | 5198 if (instr->Bit(10) != 0) { |
| 5324 // floating point (clear sign bits) | 5199 // floating point (clear sign bits) |
| 5325 uint32_t src[4]; | 5200 uint32_t src[4]; |
| 5326 get_q_register(Vm, src); | 5201 get_neon_register(Vm, src); |
| 5327 for (int i = 0; i < 4; i++) { | 5202 for (int i = 0; i < 4; i++) { |
| 5328 src[i] &= ~0x80000000; | 5203 src[i] &= ~0x80000000; |
| 5329 } | 5204 } |
| 5330 set_q_register(Vd, src); | 5205 set_neon_register(Vd, src); |
| 5331 } else { | 5206 } else { |
| 5332 // signed integer | 5207 // signed integer |
| 5333 switch (size) { | 5208 switch (size) { |
| 5334 case Neon8: { | 5209 case Neon8: |
| 5335 int8_t src[16]; | 5210 Abs<int8_t, kSimd128Size>(this, Vd, Vm); |
| 5336 get_q_register(Vm, src); | |
| 5337 for (int i = 0; i < 16; i++) { | |
| 5338 src[i] = std::abs(src[i]); | |
| 5339 } | |
| 5340 set_q_register(Vd, src); | |
| 5341 break; | 5211 break; |
| 5342 } | 5212 case Neon16: |
| 5343 case Neon16: { | 5213 Abs<int16_t, kSimd128Size>(this, Vd, Vm); |
| 5344 int16_t src[8]; | |
| 5345 get_q_register(Vm, src); | |
| 5346 for (int i = 0; i < 8; i++) { | |
| 5347 src[i] = std::abs(src[i]); | |
| 5348 } | |
| 5349 set_q_register(Vd, src); | |
| 5350 break; | 5214 break; |
| 5351 } | 5215 case Neon32: |
| 5352 case Neon32: { | 5216 Abs<int32_t, kSimd128Size>(this, Vd, Vm); |
| 5353 int32_t src[4]; | |
| 5354 get_q_register(Vm, src); | |
| 5355 for (int i = 0; i < 4; i++) { | |
| 5356 src[i] = std::abs(src[i]); | |
| 5357 } | |
| 5358 set_q_register(Vd, src); | |
| 5359 break; | 5217 break; |
| 5360 } | |
| 5361 default: | 5218 default: |
| 5362 UNIMPLEMENTED(); | 5219 UNIMPLEMENTED(); |
| 5363 break; | 5220 break; |
| 5364 } | 5221 } |
| 5365 } | 5222 } |
| 5366 } else if (instr->Bits(9, 6) == 0xf) { | 5223 } else if (instr->Bits(9, 6) == 0xf) { |
| 5367 // vneg<type>.<size> Qd, Qm (signed integer) | 5224 // vneg<type>.<size> Qd, Qm (signed integer) |
| 5368 if (instr->Bit(10) != 0) { | 5225 if (instr->Bit(10) != 0) { |
| 5369 // floating point (toggle sign bits) | 5226 // floating point (toggle sign bits) |
| 5370 uint32_t src[4]; | 5227 uint32_t src[4]; |
| 5371 get_q_register(Vm, src); | 5228 get_neon_register(Vm, src); |
| 5372 for (int i = 0; i < 4; i++) { | 5229 for (int i = 0; i < 4; i++) { |
| 5373 src[i] ^= 0x80000000; | 5230 src[i] ^= 0x80000000; |
| 5374 } | 5231 } |
| 5375 set_q_register(Vd, src); | 5232 set_neon_register(Vd, src); |
| 5376 } else { | 5233 } else { |
| 5377 // signed integer | 5234 // signed integer |
| 5378 switch (size) { | 5235 switch (size) { |
| 5379 case Neon8: { | 5236 case Neon8: |
| 5380 int8_t src[16]; | 5237 Neg<int8_t, kSimd128Size>(this, Vd, Vm); |
| 5381 get_q_register(Vm, src); | |
| 5382 for (int i = 0; i < 16; i++) { | |
| 5383 src[i] = -src[i]; | |
| 5384 } | |
| 5385 set_q_register(Vd, src); | |
| 5386 break; | 5238 break; |
| 5387 } | |
| 5388 case Neon16: | 5239 case Neon16: |
| 5389 int16_t src[8]; | 5240 Neg<int16_t, kSimd128Size>(this, Vd, Vm); |
| 5390 get_q_register(Vm, src); | |
| 5391 for (int i = 0; i < 8; i++) { | |
| 5392 src[i] = -src[i]; | |
| 5393 } | |
| 5394 set_q_register(Vd, src); | |
| 5395 break; | 5241 break; |
| 5396 case Neon32: { | 5242 case Neon32: |
| 5397 int32_t src[4]; | 5243 Neg<int32_t, kSimd128Size>(this, Vd, Vm); |
| 5398 get_q_register(Vm, src); | |
| 5399 for (int i = 0; i < 4; i++) { | |
| 5400 src[i] = -src[i]; | |
| 5401 } | |
| 5402 set_q_register(Vd, src); | |
| 5403 break; | 5244 break; |
| 5404 } | |
| 5405 default: | 5245 default: |
| 5406 UNIMPLEMENTED(); | 5246 UNIMPLEMENTED(); |
| 5407 break; | 5247 break; |
| 5408 } | 5248 } |
| 5409 } | 5249 } |
| 5410 } else { | 5250 } else { |
| 5411 UNIMPLEMENTED(); | 5251 UNIMPLEMENTED(); |
| 5412 } | 5252 } |
| 5413 } else if (instr->Bits(19, 18) == 0x2 && instr->Bits(11, 8) == 0x5) { | 5253 } else if (instr->Bits(19, 18) == 0x2 && instr->Bits(11, 8) == 0x5) { |
| 5414 // vrecpe/vrsqrte.f32 Qd, Qm. | 5254 // vrecpe/vrsqrte.f32 Qd, Qm. |
| 5415 int Vd = instr->VFPDRegValue(kSimd128Precision); | 5255 int Vd = instr->VFPDRegValue(kSimd128Precision); |
| 5416 int Vm = instr->VFPMRegValue(kSimd128Precision); | 5256 int Vm = instr->VFPMRegValue(kSimd128Precision); |
| 5417 uint32_t src[4]; | 5257 uint32_t src[4]; |
| 5418 get_q_register(Vm, src); | 5258 get_neon_register(Vm, src); |
| 5419 if (instr->Bit(7) == 0) { | 5259 if (instr->Bit(7) == 0) { |
| 5420 for (int i = 0; i < 4; i++) { | 5260 for (int i = 0; i < 4; i++) { |
| 5421 float denom = bit_cast<float>(src[i]); | 5261 float denom = bit_cast<float>(src[i]); |
| 5422 div_zero_vfp_flag_ = (denom == 0); | 5262 div_zero_vfp_flag_ = (denom == 0); |
| 5423 float result = 1.0f / denom; | 5263 float result = 1.0f / denom; |
| 5424 result = canonicalizeNaN(result); | 5264 result = canonicalizeNaN(result); |
| 5425 src[i] = bit_cast<uint32_t>(result); | 5265 src[i] = bit_cast<uint32_t>(result); |
| 5426 } | 5266 } |
| 5427 } else { | 5267 } else { |
| 5428 lazily_initialize_fast_sqrt(isolate_); | 5268 lazily_initialize_fast_sqrt(isolate_); |
| 5429 for (int i = 0; i < 4; i++) { | 5269 for (int i = 0; i < 4; i++) { |
| 5430 float radicand = bit_cast<float>(src[i]); | 5270 float radicand = bit_cast<float>(src[i]); |
| 5431 float result = 1.0f / fast_sqrt(radicand, isolate_); | 5271 float result = 1.0f / fast_sqrt(radicand, isolate_); |
| 5432 result = canonicalizeNaN(result); | 5272 result = canonicalizeNaN(result); |
| 5433 src[i] = bit_cast<uint32_t>(result); | 5273 src[i] = bit_cast<uint32_t>(result); |
| 5434 } | 5274 } |
| 5435 } | 5275 } |
| 5436 set_q_register(Vd, src); | 5276 set_neon_register(Vd, src); |
| 5437 } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 8) == 0x2 && | 5277 } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 8) == 0x2 && |
| 5438 instr->Bits(7, 6) != 0) { | 5278 instr->Bits(7, 6) != 0) { |
| 5439 // vqmovn.<type><size> Dd, Qm. | 5279 // vqmovn.<type><size> Dd, Qm. |
| 5440 int Vd = instr->VFPDRegValue(kDoublePrecision); | 5280 int Vd = instr->VFPDRegValue(kDoublePrecision); |
| 5441 int Vm = instr->VFPMRegValue(kSimd128Precision); | 5281 int Vm = instr->VFPMRegValue(kSimd128Precision); |
| 5442 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); | 5282 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); |
| 5443 bool is_unsigned = instr->Bit(6) != 0; | 5283 bool is_unsigned = instr->Bit(6) != 0; |
| 5444 switch (size) { | 5284 switch (size) { |
| 5445 case Neon8: { | 5285 case Neon8: { |
| 5446 if (is_unsigned) { | 5286 if (is_unsigned) { |
| (...skipping 27 matching lines...) Expand all Loading... |
| 5474 UNIMPLEMENTED(); | 5314 UNIMPLEMENTED(); |
| 5475 } | 5315 } |
| 5476 } else if (instr->Bits(11, 7) == 0 && instr->Bit(4) == 1) { | 5316 } else if (instr->Bits(11, 7) == 0 && instr->Bit(4) == 1) { |
| 5477 // vshr.u<size> Qd, Qm, shift | 5317 // vshr.u<size> Qd, Qm, shift |
| 5478 int size = base::bits::RoundDownToPowerOfTwo32(instr->Bits(21, 16)); | 5318 int size = base::bits::RoundDownToPowerOfTwo32(instr->Bits(21, 16)); |
| 5479 int shift = 2 * size - instr->Bits(21, 16); | 5319 int shift = 2 * size - instr->Bits(21, 16); |
| 5480 int Vd = instr->VFPDRegValue(kSimd128Precision); | 5320 int Vd = instr->VFPDRegValue(kSimd128Precision); |
| 5481 int Vm = instr->VFPMRegValue(kSimd128Precision); | 5321 int Vm = instr->VFPMRegValue(kSimd128Precision); |
| 5482 NeonSize ns = static_cast<NeonSize>(size / 16); | 5322 NeonSize ns = static_cast<NeonSize>(size / 16); |
| 5483 switch (ns) { | 5323 switch (ns) { |
| 5484 case Neon8: { | 5324 case Neon8: |
| 5485 uint8_t src[16]; | 5325 ShiftRight<uint8_t, kSimd128Size>(this, Vd, Vm, shift); |
| 5486 get_q_register(Vm, src); | |
| 5487 for (int i = 0; i < 16; i++) { | |
| 5488 src[i] >>= shift; | |
| 5489 } | |
| 5490 set_q_register(Vd, src); | |
| 5491 break; | 5326 break; |
| 5492 } | 5327 case Neon16: |
| 5493 case Neon16: { | 5328 ShiftRight<uint16_t, kSimd128Size>(this, Vd, Vm, shift); |
| 5494 uint16_t src[8]; | |
| 5495 get_q_register(Vm, src); | |
| 5496 for (int i = 0; i < 8; i++) { | |
| 5497 src[i] >>= shift; | |
| 5498 } | |
| 5499 set_q_register(Vd, src); | |
| 5500 break; | 5329 break; |
| 5501 } | 5330 case Neon32: |
| 5502 case Neon32: { | 5331 ShiftRight<uint32_t, kSimd128Size>(this, Vd, Vm, shift); |
| 5503 uint32_t src[4]; | |
| 5504 get_q_register(Vm, src); | |
| 5505 for (int i = 0; i < 4; i++) { | |
| 5506 src[i] >>= shift; | |
| 5507 } | |
| 5508 set_q_register(Vd, src); | |
| 5509 break; | 5332 break; |
| 5510 } | |
| 5511 default: | 5333 default: |
| 5512 UNREACHABLE(); | 5334 UNREACHABLE(); |
| 5513 break; | 5335 break; |
| 5514 } | 5336 } |
| 5515 } else { | 5337 } else { |
| 5516 UNIMPLEMENTED(); | 5338 UNIMPLEMENTED(); |
| 5517 } | 5339 } |
| 5518 break; | 5340 break; |
| 5519 case 8: | 5341 case 8: |
| 5520 if (instr->Bits(21, 20) == 0) { | 5342 if (instr->Bits(21, 20) == 0) { |
| (...skipping 701 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6222 processor->prev_ = nullptr; | 6044 processor->prev_ = nullptr; |
| 6223 processor->next_ = nullptr; | 6045 processor->next_ = nullptr; |
| 6224 } | 6046 } |
| 6225 | 6047 |
| 6226 } // namespace internal | 6048 } // namespace internal |
| 6227 } // namespace v8 | 6049 } // namespace v8 |
| 6228 | 6050 |
| 6229 #endif // USE_SIMULATOR | 6051 #endif // USE_SIMULATOR |
| 6230 | 6052 |
| 6231 #endif // V8_TARGET_ARCH_ARM | 6053 #endif // V8_TARGET_ARCH_ARM |
| OLD | NEW |