OLD | NEW |
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include <stdarg.h> | 5 #include <stdarg.h> |
6 #include <stdlib.h> | 6 #include <stdlib.h> |
7 #include <cmath> | 7 #include <cmath> |
8 | 8 |
9 #if V8_TARGET_ARCH_ARM | 9 #if V8_TARGET_ARCH_ARM |
10 | 10 |
(...skipping 878 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
889 DCHECK((dreg >= 0) && (dreg < DwVfpRegister::NumRegisters())); | 889 DCHECK((dreg >= 0) && (dreg < DwVfpRegister::NumRegisters())); |
890 memcpy(value, vfp_registers_ + dreg * 2, sizeof(*value) * 2); | 890 memcpy(value, vfp_registers_ + dreg * 2, sizeof(*value) * 2); |
891 } | 891 } |
892 | 892 |
893 | 893 |
894 void Simulator::set_d_register(int dreg, const uint32_t* value) { | 894 void Simulator::set_d_register(int dreg, const uint32_t* value) { |
895 DCHECK((dreg >= 0) && (dreg < DwVfpRegister::NumRegisters())); | 895 DCHECK((dreg >= 0) && (dreg < DwVfpRegister::NumRegisters())); |
896 memcpy(vfp_registers_ + dreg * 2, value, sizeof(*value) * 2); | 896 memcpy(vfp_registers_ + dreg * 2, value, sizeof(*value) * 2); |
897 } | 897 } |
898 | 898 |
899 template <typename T> | 899 template <typename T, int SIZE> |
900 void Simulator::get_d_register(int dreg, T* value) { | 900 void Simulator::get_neon_register(int reg, T (&value)[SIZE / sizeof(T)]) { |
901 DCHECK((dreg >= 0) && (dreg < num_d_registers)); | 901 DCHECK(SIZE == kSimd128Size || SIZE == kDoubleSize); |
902 memcpy(value, vfp_registers_ + dreg * 2, kDoubleSize); | 902 DCHECK_LE(0, reg); |
| 903 DCHECK_GT(SIZE == kSimd128Size ? num_q_registers : num_d_registers, reg); |
| 904 memcpy(value, vfp_registers_ + reg * (SIZE / 4), SIZE); |
903 } | 905 } |
904 | 906 |
905 template <typename T> | 907 template <typename T, int SIZE> |
906 void Simulator::set_d_register(int dreg, const T* value) { | 908 void Simulator::set_neon_register(int reg, const T (&value)[SIZE / sizeof(T)]) { |
907 DCHECK((dreg >= 0) && (dreg < num_d_registers)); | 909 DCHECK(SIZE == kSimd128Size || SIZE == kDoubleSize); |
908 memcpy(vfp_registers_ + dreg * 2, value, kDoubleSize); | 910 DCHECK_LE(0, reg); |
909 } | 911 DCHECK_GT(SIZE == kSimd128Size ? num_q_registers : num_d_registers, reg); |
910 | 912 memcpy(vfp_registers_ + reg * (SIZE / 4), value, SIZE); |
911 template <typename T> | |
912 void Simulator::get_q_register(int qreg, T* value) { | |
913 DCHECK((qreg >= 0) && (qreg < num_q_registers)); | |
914 memcpy(value, vfp_registers_ + qreg * 4, kSimd128Size); | |
915 } | |
916 | |
917 template <typename T> | |
918 void Simulator::set_q_register(int qreg, const T* value) { | |
919 DCHECK((qreg >= 0) && (qreg < num_q_registers)); | |
920 memcpy(vfp_registers_ + qreg * 4, value, kSimd128Size); | |
921 } | 913 } |
922 | 914 |
923 // Raw access to the PC register. | 915 // Raw access to the PC register. |
924 void Simulator::set_pc(int32_t value) { | 916 void Simulator::set_pc(int32_t value) { |
925 pc_modified_ = true; | 917 pc_modified_ = true; |
926 registers_[pc] = value; | 918 registers_[pc] = value; |
927 } | 919 } |
928 | 920 |
929 | 921 |
930 bool Simulator::has_bad_pc() const { | 922 bool Simulator::has_bad_pc() const { |
(...skipping 2570 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3501 case Neon32: { | 3493 case Neon32: { |
3502 for (int i = 0; i < 4; i++) { | 3494 for (int i = 0; i < 4; i++) { |
3503 q_data[i] = rt_value; | 3495 q_data[i] = rt_value; |
3504 } | 3496 } |
3505 break; | 3497 break; |
3506 } | 3498 } |
3507 default: | 3499 default: |
3508 UNREACHABLE(); | 3500 UNREACHABLE(); |
3509 break; | 3501 break; |
3510 } | 3502 } |
3511 set_q_register(vd, q_data); | 3503 set_neon_register(vd, q_data); |
3512 } | 3504 } |
3513 } else if ((instr->VLValue() == 0x1) && (instr->VCValue() == 0x1)) { | 3505 } else if ((instr->VLValue() == 0x1) && (instr->VCValue() == 0x1)) { |
3514 // vmov (scalar to ARM core register) | 3506 // vmov (scalar to ARM core register) |
3515 int vn = instr->VFPNRegValue(kDoublePrecision); | 3507 int vn = instr->VFPNRegValue(kDoublePrecision); |
3516 int rt = instr->RtValue(); | 3508 int rt = instr->RtValue(); |
3517 int opc1_opc2 = (instr->Bits(22, 21) << 2) | instr->Bits(6, 5); | 3509 int opc1_opc2 = (instr->Bits(22, 21) << 2) | instr->Bits(6, 5); |
3518 uint64_t data; | 3510 uint64_t data; |
3519 get_d_register(vn, &data); | 3511 get_d_register(vn, &data); |
3520 if ((opc1_opc2 & 0xb) == 0) { | 3512 if ((opc1_opc2 & 0xb) == 0) { |
3521 // NeonS32 / NeonU32 | 3513 // NeonS32 / NeonU32 |
(...skipping 466 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3988 break; | 3980 break; |
3989 default: | 3981 default: |
3990 UNIMPLEMENTED(); // Not used by V8. | 3982 UNIMPLEMENTED(); // Not used by V8. |
3991 } | 3983 } |
3992 } else { | 3984 } else { |
3993 UNIMPLEMENTED(); // Not used by V8. | 3985 UNIMPLEMENTED(); // Not used by V8. |
3994 } | 3986 } |
3995 } | 3987 } |
3996 | 3988 |
3997 // Templated operations for NEON instructions. | 3989 // Templated operations for NEON instructions. |
3998 // TODO(bbudge) Add more templates for use in DecodeSpecialCondition. | |
3999 template <typename T, typename U> | 3990 template <typename T, typename U> |
4000 U Widen(T value) { | 3991 U Widen(T value) { |
4001 static_assert(sizeof(int64_t) > sizeof(T), "T must be int32_t or smaller"); | 3992 static_assert(sizeof(int64_t) > sizeof(T), "T must be int32_t or smaller"); |
4002 static_assert(sizeof(U) > sizeof(T), "T must smaller than U"); | 3993 static_assert(sizeof(U) > sizeof(T), "T must smaller than U"); |
4003 return static_cast<U>(value); | 3994 return static_cast<U>(value); |
4004 } | 3995 } |
4005 | 3996 |
4006 template <typename T, typename U> | 3997 template <typename T, typename U> |
4007 U Narrow(T value) { | 3998 U Narrow(T value) { |
4008 static_assert(sizeof(int8_t) < sizeof(T), "T must be int16_t or larger"); | 3999 static_assert(sizeof(int8_t) < sizeof(T), "T must be int16_t or larger"); |
4009 static_assert(sizeof(U) < sizeof(T), "T must larger than U"); | 4000 static_assert(sizeof(U) < sizeof(T), "T must larger than U"); |
4010 static_assert(std::is_unsigned<T>() == std::is_unsigned<U>(), | 4001 static_assert(std::is_unsigned<T>() == std::is_unsigned<U>(), |
4011 "Signed-ness of T and U must match"); | 4002 "Signed-ness of T and U must match"); |
4012 // Make sure value can be expressed in the smaller type; otherwise, the | 4003 // Make sure value can be expressed in the smaller type; otherwise, the |
4013 // casted result is implementation defined. | 4004 // casted result is implementation defined. |
4014 DCHECK_LE(std::numeric_limits<T>::min(), value); | 4005 DCHECK_LE(std::numeric_limits<T>::min(), value); |
4015 DCHECK_GE(std::numeric_limits<T>::max(), value); | 4006 DCHECK_GE(std::numeric_limits<T>::max(), value); |
4016 return static_cast<U>(value); | 4007 return static_cast<U>(value); |
4017 } | 4008 } |
4018 | 4009 |
4019 template <typename T> | 4010 template <typename T> |
4020 T Clamp(int64_t value) { | 4011 T Clamp(int64_t value) { |
4021 static_assert(sizeof(int64_t) > sizeof(T), "T must be int32_t or smaller"); | 4012 static_assert(sizeof(int64_t) > sizeof(T), "T must be int32_t or smaller"); |
4022 int64_t min = static_cast<int64_t>(std::numeric_limits<T>::min()); | 4013 int64_t min = static_cast<int64_t>(std::numeric_limits<T>::min()); |
4023 int64_t max = static_cast<int64_t>(std::numeric_limits<T>::max()); | 4014 int64_t max = static_cast<int64_t>(std::numeric_limits<T>::max()); |
4024 int64_t clamped = std::max(min, std::min(max, value)); | 4015 int64_t clamped = std::max(min, std::min(max, value)); |
4025 return static_cast<T>(clamped); | 4016 return static_cast<T>(clamped); |
4026 } | 4017 } |
4027 | 4018 |
4028 template <typename T> | |
4029 T MinMax(T a, T b, bool is_min) { | |
4030 return is_min ? std::min(a, b) : std::max(a, b); | |
4031 } | |
4032 | |
4033 template <typename T, typename U> | 4019 template <typename T, typename U> |
4034 void Widen(Simulator* simulator, int Vd, int Vm) { | 4020 void Widen(Simulator* simulator, int Vd, int Vm) { |
4035 static const int kLanes = 8 / sizeof(T); | 4021 static const int kLanes = 8 / sizeof(T); |
4036 T src[kLanes]; | 4022 T src[kLanes]; |
4037 U dst[kLanes]; | 4023 U dst[kLanes]; |
4038 simulator->get_d_register(Vm, src); | 4024 simulator->get_neon_register<T, kDoubleSize>(Vm, src); |
4039 for (int i = 0; i < kLanes; i++) { | 4025 for (int i = 0; i < kLanes; i++) { |
4040 dst[i] = Widen<T, U>(src[i]); | 4026 dst[i] = Widen<T, U>(src[i]); |
4041 } | 4027 } |
4042 simulator->set_q_register(Vd, dst); | 4028 simulator->set_neon_register(Vd, dst); |
| 4029 } |
| 4030 |
| 4031 template <typename T, int SIZE> |
| 4032 void Abs(Simulator* simulator, int Vd, int Vm) { |
| 4033 static const int kElems = SIZE / sizeof(T); |
| 4034 T src[kElems]; |
| 4035 simulator->get_neon_register<T, SIZE>(Vm, src); |
| 4036 for (int i = 0; i < kElems; i++) { |
| 4037 src[i] = std::abs(src[i]); |
| 4038 } |
| 4039 simulator->set_neon_register<T, SIZE>(Vd, src); |
| 4040 } |
| 4041 |
| 4042 template <typename T, int SIZE> |
| 4043 void Neg(Simulator* simulator, int Vd, int Vm) { |
| 4044 static const int kElems = SIZE / sizeof(T); |
| 4045 T src[kElems]; |
| 4046 simulator->get_neon_register<T, SIZE>(Vm, src); |
| 4047 for (int i = 0; i < kElems; i++) { |
| 4048 src[i] = -src[i]; |
| 4049 } |
| 4050 simulator->set_neon_register<T, SIZE>(Vd, src); |
4043 } | 4051 } |
4044 | 4052 |
4045 template <typename T, typename U> | 4053 template <typename T, typename U> |
4046 void SaturatingNarrow(Simulator* simulator, int Vd, int Vm) { | 4054 void SaturatingNarrow(Simulator* simulator, int Vd, int Vm) { |
4047 static const int kLanes = 16 / sizeof(T); | 4055 static const int kLanes = 16 / sizeof(T); |
4048 T src[kLanes]; | 4056 T src[kLanes]; |
4049 U dst[kLanes]; | 4057 U dst[kLanes]; |
4050 simulator->get_q_register(Vm, src); | 4058 simulator->get_neon_register(Vm, src); |
4051 for (int i = 0; i < kLanes; i++) { | 4059 for (int i = 0; i < kLanes; i++) { |
4052 dst[i] = Narrow<T, U>(Clamp<U>(src[i])); | 4060 dst[i] = Narrow<T, U>(Clamp<U>(src[i])); |
4053 } | 4061 } |
4054 simulator->set_d_register(Vd, dst); | 4062 simulator->set_neon_register<U, kDoubleSize>(Vd, dst); |
4055 } | 4063 } |
4056 | 4064 |
4057 template <typename T> | 4065 template <typename T> |
4058 void AddSaturate(Simulator* simulator, int Vd, int Vm, int Vn) { | 4066 void AddSaturate(Simulator* simulator, int Vd, int Vm, int Vn) { |
4059 static const int kLanes = 16 / sizeof(T); | 4067 static const int kLanes = 16 / sizeof(T); |
4060 T src1[kLanes], src2[kLanes]; | 4068 T src1[kLanes], src2[kLanes]; |
4061 simulator->get_q_register(Vn, src1); | 4069 simulator->get_neon_register(Vn, src1); |
4062 simulator->get_q_register(Vm, src2); | 4070 simulator->get_neon_register(Vm, src2); |
4063 for (int i = 0; i < kLanes; i++) { | 4071 for (int i = 0; i < kLanes; i++) { |
4064 src1[i] = Clamp<T>(Widen<T, int64_t>(src1[i]) + Widen<T, int64_t>(src2[i])); | 4072 src1[i] = Clamp<T>(Widen<T, int64_t>(src1[i]) + Widen<T, int64_t>(src2[i])); |
4065 } | 4073 } |
4066 simulator->set_q_register(Vd, src1); | 4074 simulator->set_neon_register(Vd, src1); |
4067 } | 4075 } |
4068 | 4076 |
4069 template <typename T> | 4077 template <typename T> |
4070 void SubSaturate(Simulator* simulator, int Vd, int Vm, int Vn) { | 4078 void SubSaturate(Simulator* simulator, int Vd, int Vm, int Vn) { |
4071 static const int kLanes = 16 / sizeof(T); | 4079 static const int kLanes = 16 / sizeof(T); |
4072 T src1[kLanes], src2[kLanes]; | 4080 T src1[kLanes], src2[kLanes]; |
4073 simulator->get_q_register(Vn, src1); | 4081 simulator->get_neon_register(Vn, src1); |
4074 simulator->get_q_register(Vm, src2); | 4082 simulator->get_neon_register(Vm, src2); |
4075 for (int i = 0; i < kLanes; i++) { | 4083 for (int i = 0; i < kLanes; i++) { |
4076 src1[i] = Clamp<T>(Widen<T, int64_t>(src1[i]) - Widen<T, int64_t>(src2[i])); | 4084 src1[i] = Clamp<T>(Widen<T, int64_t>(src1[i]) - Widen<T, int64_t>(src2[i])); |
4077 } | 4085 } |
4078 simulator->set_q_register(Vd, src1); | 4086 simulator->set_neon_register(Vd, src1); |
| 4087 } |
| 4088 |
| 4089 template <typename T, int SIZE> |
| 4090 void Zip(Simulator* simulator, int Vd, int Vm) { |
| 4091 static const int kElems = SIZE / sizeof(T); |
| 4092 static const int kPairs = kElems / 2; |
| 4093 T src1[kElems], src2[kElems], dst1[kElems], dst2[kElems]; |
| 4094 simulator->get_neon_register<T, SIZE>(Vd, src1); |
| 4095 simulator->get_neon_register<T, SIZE>(Vm, src2); |
| 4096 for (int i = 0; i < kPairs; i++) { |
| 4097 dst1[i * 2] = src1[i]; |
| 4098 dst1[i * 2 + 1] = src2[i]; |
| 4099 dst2[i * 2] = src1[i + kPairs]; |
| 4100 dst2[i * 2 + 1] = src2[i + kPairs]; |
| 4101 } |
| 4102 simulator->set_neon_register<T, SIZE>(Vd, dst1); |
| 4103 simulator->set_neon_register<T, SIZE>(Vm, dst2); |
| 4104 } |
| 4105 |
| 4106 template <typename T, int SIZE> |
| 4107 void Unzip(Simulator* simulator, int Vd, int Vm) { |
| 4108 static const int kElems = SIZE / sizeof(T); |
| 4109 static const int kPairs = kElems / 2; |
| 4110 T src1[kElems], src2[kElems], dst1[kElems], dst2[kElems]; |
| 4111 simulator->get_neon_register<T, SIZE>(Vd, src1); |
| 4112 simulator->get_neon_register<T, SIZE>(Vm, src2); |
| 4113 for (int i = 0; i < kPairs; i++) { |
| 4114 dst1[i] = src1[i * 2]; |
| 4115 dst1[i + kPairs] = src2[i * 2]; |
| 4116 dst2[i] = src1[i * 2 + 1]; |
| 4117 dst2[i + kPairs] = src2[i * 2 + 1]; |
| 4118 } |
| 4119 simulator->set_neon_register<T, SIZE>(Vd, dst1); |
| 4120 simulator->set_neon_register<T, SIZE>(Vm, dst2); |
| 4121 } |
| 4122 |
| 4123 template <typename T, int SIZE> |
| 4124 void Transpose(Simulator* simulator, int Vd, int Vm) { |
| 4125 static const int kElems = SIZE / sizeof(T); |
| 4126 static const int kPairs = kElems / 2; |
| 4127 T src1[kElems], src2[kElems]; |
| 4128 simulator->get_neon_register<T, SIZE>(Vd, src1); |
| 4129 simulator->get_neon_register<T, SIZE>(Vm, src2); |
| 4130 for (int i = 0; i < kPairs; i++) { |
| 4131 std::swap(src1[2 * i + 1], src2[2 * i]); |
| 4132 } |
| 4133 simulator->set_neon_register<T, SIZE>(Vd, src1); |
| 4134 simulator->set_neon_register<T, SIZE>(Vm, src2); |
| 4135 } |
| 4136 |
| 4137 template <typename T, int SIZE> |
| 4138 void Test(Simulator* simulator, int Vd, int Vm, int Vn) { |
| 4139 static const int kElems = SIZE / sizeof(T); |
| 4140 T src1[kElems], src2[kElems]; |
| 4141 simulator->get_neon_register<T, SIZE>(Vn, src1); |
| 4142 simulator->get_neon_register<T, SIZE>(Vm, src2); |
| 4143 for (int i = 0; i < kElems; i++) { |
| 4144 src1[i] = (src1[i] & src2[i]) != 0 ? -1 : 0; |
| 4145 } |
| 4146 simulator->set_neon_register<T, SIZE>(Vd, src1); |
| 4147 } |
| 4148 |
| 4149 template <typename T, int SIZE> |
| 4150 void Add(Simulator* simulator, int Vd, int Vm, int Vn) { |
| 4151 static const int kElems = SIZE / sizeof(T); |
| 4152 T src1[kElems], src2[kElems]; |
| 4153 simulator->get_neon_register<T, SIZE>(Vn, src1); |
| 4154 simulator->get_neon_register<T, SIZE>(Vm, src2); |
| 4155 for (int i = 0; i < kElems; i++) { |
| 4156 src1[i] += src2[i]; |
| 4157 } |
| 4158 simulator->set_neon_register<T, SIZE>(Vd, src1); |
| 4159 } |
| 4160 |
| 4161 template <typename T, int SIZE> |
| 4162 void Sub(Simulator* simulator, int Vd, int Vm, int Vn) { |
| 4163 static const int kElems = SIZE / sizeof(T); |
| 4164 T src1[kElems], src2[kElems]; |
| 4165 simulator->get_neon_register<T, SIZE>(Vn, src1); |
| 4166 simulator->get_neon_register<T, SIZE>(Vm, src2); |
| 4167 for (int i = 0; i < kElems; i++) { |
| 4168 src1[i] -= src2[i]; |
| 4169 } |
| 4170 simulator->set_neon_register<T, SIZE>(Vd, src1); |
| 4171 } |
| 4172 |
| 4173 template <typename T, int SIZE> |
| 4174 void Mul(Simulator* simulator, int Vd, int Vm, int Vn) { |
| 4175 static const int kElems = SIZE / sizeof(T); |
| 4176 T src1[kElems], src2[kElems]; |
| 4177 simulator->get_neon_register<T, SIZE>(Vn, src1); |
| 4178 simulator->get_neon_register<T, SIZE>(Vm, src2); |
| 4179 for (int i = 0; i < kElems; i++) { |
| 4180 src1[i] *= src2[i]; |
| 4181 } |
| 4182 simulator->set_neon_register<T, SIZE>(Vd, src1); |
| 4183 } |
| 4184 |
| 4185 template <typename T, int SIZE> |
| 4186 void ShiftLeft(Simulator* simulator, int Vd, int Vm, int shift) { |
| 4187 static const int kElems = SIZE / sizeof(T); |
| 4188 T src[kElems]; |
| 4189 simulator->get_neon_register<T, SIZE>(Vm, src); |
| 4190 for (int i = 0; i < kElems; i++) { |
| 4191 src[i] <<= shift; |
| 4192 } |
| 4193 simulator->set_neon_register<T, SIZE>(Vd, src); |
| 4194 } |
| 4195 |
| 4196 template <typename T, int SIZE> |
| 4197 void ShiftRight(Simulator* simulator, int Vd, int Vm, int shift) { |
| 4198 static const int kElems = SIZE / sizeof(T); |
| 4199 T src[kElems]; |
| 4200 simulator->get_neon_register<T, SIZE>(Vm, src); |
| 4201 for (int i = 0; i < kElems; i++) { |
| 4202 src[i] >>= shift; |
| 4203 } |
| 4204 simulator->set_neon_register<T, SIZE>(Vd, src); |
| 4205 } |
| 4206 |
| 4207 template <typename T, int SIZE> |
| 4208 void ArithmeticShiftRight(Simulator* simulator, int Vd, int Vm, int shift) { |
| 4209 static const int kElems = SIZE / sizeof(T); |
| 4210 T src[kElems]; |
| 4211 simulator->get_neon_register<T, SIZE>(Vm, src); |
| 4212 for (int i = 0; i < kElems; i++) { |
| 4213 src[i] = ArithmeticShiftRight(src[i], shift); |
| 4214 } |
| 4215 simulator->set_neon_register<T, SIZE>(Vd, src); |
| 4216 } |
| 4217 |
| 4218 template <typename T, int SIZE> |
| 4219 void CompareEqual(Simulator* simulator, int Vd, int Vm, int Vn) { |
| 4220 static const int kElems = SIZE / sizeof(T); |
| 4221 T src1[kElems], src2[kElems]; |
| 4222 simulator->get_neon_register<T, SIZE>(Vn, src1); |
| 4223 simulator->get_neon_register<T, SIZE>(Vm, src2); |
| 4224 for (int i = 0; i < kElems; i++) { |
| 4225 src1[i] = src1[i] == src2[i] ? -1 : 0; |
| 4226 } |
| 4227 simulator->set_neon_register<T, SIZE>(Vd, src1); |
| 4228 } |
| 4229 |
| 4230 template <typename T, int SIZE> |
| 4231 void CompareGreater(Simulator* simulator, int Vd, int Vm, int Vn, bool ge) { |
| 4232 static const int kElems = SIZE / sizeof(T); |
| 4233 T src1[kElems], src2[kElems]; |
| 4234 simulator->get_neon_register<T, SIZE>(Vn, src1); |
| 4235 simulator->get_neon_register<T, SIZE>(Vm, src2); |
| 4236 for (int i = 0; i < kElems; i++) { |
| 4237 if (ge) |
| 4238 src1[i] = src1[i] >= src2[i] ? -1 : 0; |
| 4239 else |
| 4240 src1[i] = src1[i] > src2[i] ? -1 : 0; |
| 4241 } |
| 4242 simulator->set_neon_register<T, SIZE>(Vd, src1); |
| 4243 } |
| 4244 |
| 4245 template <typename T> |
| 4246 T MinMax(T a, T b, bool is_min) { |
| 4247 return is_min ? std::min(a, b) : std::max(a, b); |
| 4248 } |
| 4249 |
| 4250 template <typename T, int SIZE> |
| 4251 void MinMax(Simulator* simulator, int Vd, int Vm, int Vn, bool min) { |
| 4252 static const int kElems = SIZE / sizeof(T); |
| 4253 T src1[kElems], src2[kElems]; |
| 4254 simulator->get_neon_register<T, SIZE>(Vn, src1); |
| 4255 simulator->get_neon_register<T, SIZE>(Vm, src2); |
| 4256 for (int i = 0; i < kElems; i++) { |
| 4257 src1[i] = MinMax(src1[i], src2[i], min); |
| 4258 } |
| 4259 simulator->set_neon_register<T, SIZE>(Vd, src1); |
| 4260 } |
| 4261 |
| 4262 template <typename T> |
| 4263 void PairwiseMinMax(Simulator* simulator, int Vd, int Vm, int Vn, bool min) { |
| 4264 static const int kElems = kDoubleSize / sizeof(T); |
| 4265 static const int kPairs = kElems / 2; |
| 4266 T dst[kElems], src1[kElems], src2[kElems]; |
| 4267 simulator->get_neon_register<T, kDoubleSize>(Vn, src1); |
| 4268 simulator->get_neon_register<T, kDoubleSize>(Vm, src2); |
| 4269 for (int i = 0; i < kPairs; i++) { |
| 4270 dst[i] = MinMax(src1[i * 2], src1[i * 2 + 1], min); |
| 4271 dst[i + kPairs] = MinMax(src2[i * 2], src2[i * 2 + 1], min); |
| 4272 } |
| 4273 simulator->set_neon_register<T, kDoubleSize>(Vd, dst); |
4079 } | 4274 } |
4080 | 4275 |
4081 void Simulator::DecodeSpecialCondition(Instruction* instr) { | 4276 void Simulator::DecodeSpecialCondition(Instruction* instr) { |
4082 switch (instr->SpecialValue()) { | 4277 switch (instr->SpecialValue()) { |
4083 case 4: { | 4278 case 4: { |
4084 int Vd, Vm, Vn; | 4279 int Vd, Vm, Vn; |
4085 if (instr->Bit(6) == 0) { | 4280 if (instr->Bit(6) == 0) { |
4086 Vd = instr->VFPDRegValue(kDoublePrecision); | 4281 Vd = instr->VFPDRegValue(kDoublePrecision); |
4087 Vm = instr->VFPMRegValue(kDoublePrecision); | 4282 Vm = instr->VFPMRegValue(kDoublePrecision); |
4088 Vn = instr->VFPNRegValue(kDoublePrecision); | 4283 Vn = instr->VFPNRegValue(kDoublePrecision); |
(...skipping 25 matching lines...) Expand all Loading... |
4114 UNIMPLEMENTED(); | 4309 UNIMPLEMENTED(); |
4115 } | 4310 } |
4116 break; | 4311 break; |
4117 } | 4312 } |
4118 case 0x1: { | 4313 case 0x1: { |
4119 if (instr->Bits(21, 20) == 2 && instr->Bit(6) == 1 && | 4314 if (instr->Bits(21, 20) == 2 && instr->Bit(6) == 1 && |
4120 instr->Bit(4) == 1) { | 4315 instr->Bit(4) == 1) { |
4121 // vmov Qd, Qm. | 4316 // vmov Qd, Qm. |
4122 // vorr, Qd, Qm, Qn. | 4317 // vorr, Qd, Qm, Qn. |
4123 uint32_t src1[4]; | 4318 uint32_t src1[4]; |
4124 get_q_register(Vm, src1); | 4319 get_neon_register(Vm, src1); |
4125 if (Vm != Vn) { | 4320 if (Vm != Vn) { |
4126 uint32_t src2[4]; | 4321 uint32_t src2[4]; |
4127 get_q_register(Vn, src2); | 4322 get_neon_register(Vn, src2); |
4128 for (int i = 0; i < 4; i++) { | 4323 for (int i = 0; i < 4; i++) { |
4129 src1[i] = src1[i] | src2[i]; | 4324 src1[i] = src1[i] | src2[i]; |
4130 } | 4325 } |
4131 } | 4326 } |
4132 set_q_register(Vd, src1); | 4327 set_neon_register(Vd, src1); |
4133 } else if (instr->Bits(21, 20) == 0 && instr->Bit(6) == 1 && | 4328 } else if (instr->Bits(21, 20) == 0 && instr->Bit(6) == 1 && |
4134 instr->Bit(4) == 1) { | 4329 instr->Bit(4) == 1) { |
4135 // vand Qd, Qm, Qn. | 4330 // vand Qd, Qm, Qn. |
4136 uint32_t src1[4], src2[4]; | 4331 uint32_t src1[4], src2[4]; |
4137 get_q_register(Vn, src1); | 4332 get_neon_register(Vn, src1); |
4138 get_q_register(Vm, src2); | 4333 get_neon_register(Vm, src2); |
4139 for (int i = 0; i < 4; i++) { | 4334 for (int i = 0; i < 4; i++) { |
4140 src1[i] = src1[i] & src2[i]; | 4335 src1[i] = src1[i] & src2[i]; |
4141 } | 4336 } |
4142 set_q_register(Vd, src1); | 4337 set_neon_register(Vd, src1); |
4143 } else { | 4338 } else { |
4144 UNIMPLEMENTED(); | 4339 UNIMPLEMENTED(); |
4145 } | 4340 } |
4146 break; | 4341 break; |
4147 } | 4342 } |
4148 case 0x2: { | 4343 case 0x2: { |
4149 if (instr->Bit(4) == 1) { | 4344 if (instr->Bit(4) == 1) { |
4150 // vqsub.s<size> Qd, Qm, Qn. | 4345 // vqsub.s<size> Qd, Qm, Qn. |
4151 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); | 4346 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
4152 switch (size) { | 4347 switch (size) { |
(...skipping 13 matching lines...) Expand all Loading... |
4166 } else { | 4361 } else { |
4167 UNIMPLEMENTED(); | 4362 UNIMPLEMENTED(); |
4168 } | 4363 } |
4169 break; | 4364 break; |
4170 } | 4365 } |
4171 case 0x3: { | 4366 case 0x3: { |
4172 // vcge/vcgt.s<size> Qd, Qm, Qn. | 4367 // vcge/vcgt.s<size> Qd, Qm, Qn. |
4173 bool ge = instr->Bit(4) == 1; | 4368 bool ge = instr->Bit(4) == 1; |
4174 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); | 4369 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
4175 switch (size) { | 4370 switch (size) { |
4176 case Neon8: { | 4371 case Neon8: |
4177 int8_t src1[16], src2[16]; | 4372 CompareGreater<int8_t, kSimd128Size>(this, Vd, Vm, Vn, ge); |
4178 get_q_register(Vn, src1); | |
4179 get_q_register(Vm, src2); | |
4180 for (int i = 0; i < 16; i++) { | |
4181 if (ge) | |
4182 src1[i] = src1[i] >= src2[i] ? 0xFF : 0; | |
4183 else | |
4184 src1[i] = src1[i] > src2[i] ? 0xFF : 0; | |
4185 } | |
4186 set_q_register(Vd, src1); | |
4187 break; | 4373 break; |
4188 } | 4374 case Neon16: |
4189 case Neon16: { | 4375 CompareGreater<int16_t, kSimd128Size>(this, Vd, Vm, Vn, ge); |
4190 int16_t src1[8], src2[8]; | |
4191 get_q_register(Vn, src1); | |
4192 get_q_register(Vm, src2); | |
4193 for (int i = 0; i < 8; i++) { | |
4194 if (ge) | |
4195 src1[i] = src1[i] >= src2[i] ? 0xFFFF : 0; | |
4196 else | |
4197 src1[i] = src1[i] > src2[i] ? 0xFFFF : 0; | |
4198 } | |
4199 set_q_register(Vd, src1); | |
4200 break; | 4376 break; |
4201 } | 4377 case Neon32: |
4202 case Neon32: { | 4378 CompareGreater<int32_t, kSimd128Size>(this, Vd, Vm, Vn, ge); |
4203 int32_t src1[4], src2[4]; | |
4204 get_q_register(Vn, src1); | |
4205 get_q_register(Vm, src2); | |
4206 for (int i = 0; i < 4; i++) { | |
4207 if (ge) | |
4208 src1[i] = src1[i] >= src2[i] ? 0xFFFFFFFF : 0; | |
4209 else | |
4210 src1[i] = src1[i] > src2[i] ? 0xFFFFFFFF : 0; | |
4211 } | |
4212 set_q_register(Vd, src1); | |
4213 break; | 4379 break; |
4214 } | |
4215 default: | 4380 default: |
4216 UNREACHABLE(); | 4381 UNREACHABLE(); |
4217 break; | 4382 break; |
4218 } | 4383 } |
4219 break; | 4384 break; |
4220 } | 4385 } |
4221 case 0x6: { | 4386 case 0x6: { |
4222 // vmin/vmax.s<size> Qd, Qm, Qn. | 4387 // vmin/vmax.s<size> Qd, Qm, Qn. |
4223 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); | 4388 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
4224 bool min = instr->Bit(4) != 0; | 4389 bool min = instr->Bit(4) != 0; |
4225 switch (size) { | 4390 switch (size) { |
4226 case Neon8: { | 4391 case Neon8: |
4227 int8_t src1[16], src2[16]; | 4392 MinMax<int8_t, kSimd128Size>(this, Vd, Vm, Vn, min); |
4228 get_q_register(Vn, src1); | |
4229 get_q_register(Vm, src2); | |
4230 for (int i = 0; i < 16; i++) { | |
4231 src1[i] = MinMax(src1[i], src2[i], min); | |
4232 } | |
4233 set_q_register(Vd, src1); | |
4234 break; | 4393 break; |
4235 } | 4394 case Neon16: |
4236 case Neon16: { | 4395 MinMax<int16_t, kSimd128Size>(this, Vd, Vm, Vn, min); |
4237 int16_t src1[8], src2[8]; | |
4238 get_q_register(Vn, src1); | |
4239 get_q_register(Vm, src2); | |
4240 for (int i = 0; i < 8; i++) { | |
4241 src1[i] = MinMax(src1[i], src2[i], min); | |
4242 } | |
4243 set_q_register(Vd, src1); | |
4244 break; | 4396 break; |
4245 } | 4397 case Neon32: |
4246 case Neon32: { | 4398 MinMax<int32_t, kSimd128Size>(this, Vd, Vm, Vn, min); |
4247 int32_t src1[4], src2[4]; | |
4248 get_q_register(Vn, src1); | |
4249 get_q_register(Vm, src2); | |
4250 for (int i = 0; i < 4; i++) { | |
4251 src1[i] = MinMax(src1[i], src2[i], min); | |
4252 } | |
4253 set_q_register(Vd, src1); | |
4254 break; | 4399 break; |
4255 } | |
4256 default: | 4400 default: |
4257 UNREACHABLE(); | 4401 UNREACHABLE(); |
4258 break; | 4402 break; |
4259 } | 4403 } |
4260 break; | 4404 break; |
4261 } | 4405 } |
4262 case 0x8: { | 4406 case 0x8: { |
4263 // vadd/vtst | 4407 // vadd/vtst |
4264 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); | 4408 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
4265 if (instr->Bit(4) == 0) { | 4409 if (instr->Bit(4) == 0) { |
4266 // vadd.i<size> Qd, Qm, Qn. | 4410 // vadd.i<size> Qd, Qm, Qn. |
4267 switch (size) { | 4411 switch (size) { |
4268 case Neon8: { | 4412 case Neon8: |
4269 uint8_t src1[16], src2[16]; | 4413 Add<uint8_t, kSimd128Size>(this, Vd, Vm, Vn); |
4270 get_q_register(Vn, src1); | |
4271 get_q_register(Vm, src2); | |
4272 for (int i = 0; i < 16; i++) { | |
4273 src1[i] += src2[i]; | |
4274 } | |
4275 set_q_register(Vd, src1); | |
4276 break; | 4414 break; |
4277 } | 4415 case Neon16: |
4278 case Neon16: { | 4416 Add<uint16_t, kSimd128Size>(this, Vd, Vm, Vn); |
4279 uint16_t src1[8], src2[8]; | |
4280 get_q_register(Vn, src1); | |
4281 get_q_register(Vm, src2); | |
4282 for (int i = 0; i < 8; i++) { | |
4283 src1[i] += src2[i]; | |
4284 } | |
4285 set_q_register(Vd, src1); | |
4286 break; | 4417 break; |
4287 } | 4418 case Neon32: |
4288 case Neon32: { | 4419 Add<uint32_t, kSimd128Size>(this, Vd, Vm, Vn); |
4289 uint32_t src1[4], src2[4]; | |
4290 get_q_register(Vn, src1); | |
4291 get_q_register(Vm, src2); | |
4292 for (int i = 0; i < 4; i++) { | |
4293 src1[i] += src2[i]; | |
4294 } | |
4295 set_q_register(Vd, src1); | |
4296 break; | 4420 break; |
4297 } | |
4298 default: | 4421 default: |
4299 UNREACHABLE(); | 4422 UNREACHABLE(); |
4300 break; | 4423 break; |
4301 } | 4424 } |
4302 } else { | 4425 } else { |
4303 // vtst.i<size> Qd, Qm, Qn. | 4426 // vtst.i<size> Qd, Qm, Qn. |
4304 switch (size) { | 4427 switch (size) { |
4305 case Neon8: { | 4428 case Neon8: |
4306 uint8_t src1[16], src2[16]; | 4429 Test<uint8_t, kSimd128Size>(this, Vd, Vm, Vn); |
4307 get_q_register(Vn, src1); | |
4308 get_q_register(Vm, src2); | |
4309 for (int i = 0; i < 16; i++) { | |
4310 src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFu : 0; | |
4311 } | |
4312 set_q_register(Vd, src1); | |
4313 break; | 4430 break; |
4314 } | 4431 case Neon16: |
4315 case Neon16: { | 4432 Test<uint16_t, kSimd128Size>(this, Vd, Vm, Vn); |
4316 uint16_t src1[8], src2[8]; | |
4317 get_q_register(Vn, src1); | |
4318 get_q_register(Vm, src2); | |
4319 for (int i = 0; i < 8; i++) { | |
4320 src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFFFu : 0; | |
4321 } | |
4322 set_q_register(Vd, src1); | |
4323 break; | 4433 break; |
4324 } | 4434 case Neon32: |
4325 case Neon32: { | 4435 Test<uint32_t, kSimd128Size>(this, Vd, Vm, Vn); |
4326 uint32_t src1[4], src2[4]; | |
4327 get_q_register(Vn, src1); | |
4328 get_q_register(Vm, src2); | |
4329 for (int i = 0; i < 4; i++) { | |
4330 src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFFFFFFFu : 0; | |
4331 } | |
4332 set_q_register(Vd, src1); | |
4333 break; | 4436 break; |
4334 } | |
4335 default: | 4437 default: |
4336 UNREACHABLE(); | 4438 UNREACHABLE(); |
4337 break; | 4439 break; |
4338 } | 4440 } |
4339 } | 4441 } |
4340 break; | 4442 break; |
4341 } | 4443 } |
4342 case 0x9: { | 4444 case 0x9: { |
4343 if (instr->Bit(6) == 1 && instr->Bit(4) == 1) { | 4445 if (instr->Bit(6) == 1 && instr->Bit(4) == 1) { |
4344 // vmul.i<size> Qd, Qm, Qn. | 4446 // vmul.i<size> Qd, Qm, Qn. |
4345 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); | 4447 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
4346 switch (size) { | 4448 switch (size) { |
4347 case Neon8: { | 4449 case Neon8: |
4348 uint8_t src1[16], src2[16]; | 4450 Mul<uint8_t, kSimd128Size>(this, Vd, Vm, Vn); |
4349 get_q_register(Vn, src1); | |
4350 get_q_register(Vm, src2); | |
4351 for (int i = 0; i < 16; i++) { | |
4352 src1[i] *= src2[i]; | |
4353 } | |
4354 set_q_register(Vd, src1); | |
4355 break; | 4451 break; |
4356 } | 4452 case Neon16: |
4357 case Neon16: { | 4453 Mul<uint16_t, kSimd128Size>(this, Vd, Vm, Vn); |
4358 uint16_t src1[8], src2[8]; | |
4359 get_q_register(Vn, src1); | |
4360 get_q_register(Vm, src2); | |
4361 for (int i = 0; i < 8; i++) { | |
4362 src1[i] *= src2[i]; | |
4363 } | |
4364 set_q_register(Vd, src1); | |
4365 break; | 4454 break; |
4366 } | 4455 case Neon32: |
4367 case Neon32: { | 4456 Mul<uint32_t, kSimd128Size>(this, Vd, Vm, Vn); |
4368 uint32_t src1[4], src2[4]; | |
4369 get_q_register(Vn, src1); | |
4370 get_q_register(Vm, src2); | |
4371 for (int i = 0; i < 4; i++) { | |
4372 src1[i] *= src2[i]; | |
4373 } | |
4374 set_q_register(Vd, src1); | |
4375 break; | 4457 break; |
4376 } | |
4377 default: | 4458 default: |
4378 UNREACHABLE(); | 4459 UNREACHABLE(); |
4379 break; | 4460 break; |
4380 } | 4461 } |
4381 } else { | 4462 } else { |
4382 UNIMPLEMENTED(); | 4463 UNIMPLEMENTED(); |
4383 } | 4464 } |
4384 break; | 4465 break; |
4385 } | 4466 } |
4386 case 0xa: { | 4467 case 0xa: { |
4387 // vpmin/vpmax.s<size> Dd, Dm, Dn. | 4468 // vpmin/vpmax.s<size> Dd, Dm, Dn. |
4388 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); | 4469 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
4389 bool min = instr->Bit(4) != 0; | 4470 bool min = instr->Bit(4) != 0; |
4390 switch (size) { | 4471 switch (size) { |
4391 case Neon8: { | 4472 case Neon8: |
4392 int8_t dst[8], src1[8], src2[8]; | 4473 PairwiseMinMax<int8_t>(this, Vd, Vm, Vn, min); |
4393 get_d_register(Vn, src1); | |
4394 get_d_register(Vm, src2); | |
4395 for (int i = 0; i < 4; i++) { | |
4396 dst[i + 0] = MinMax(src1[i * 2], src1[i * 2 + 1], min); | |
4397 dst[i + 4] = MinMax(src2[i * 2], src2[i * 2 + 1], min); | |
4398 } | |
4399 set_d_register(Vd, dst); | |
4400 break; | 4474 break; |
4401 } | 4475 case Neon16: |
4402 case Neon16: { | 4476 PairwiseMinMax<int16_t>(this, Vd, Vm, Vn, min); |
4403 int16_t dst[4], src1[4], src2[4]; | |
4404 get_d_register(Vn, src1); | |
4405 get_d_register(Vm, src2); | |
4406 for (int i = 0; i < 2; i++) { | |
4407 dst[i + 0] = MinMax(src1[i * 2], src1[i * 2 + 1], min); | |
4408 dst[i + 2] = MinMax(src2[i * 2], src2[i * 2 + 1], min); | |
4409 } | |
4410 set_d_register(Vd, dst); | |
4411 break; | 4477 break; |
4412 } | 4478 case Neon32: |
4413 case Neon32: { | 4479 PairwiseMinMax<int32_t>(this, Vd, Vm, Vn, min); |
4414 int32_t dst[2], src1[2], src2[2]; | |
4415 get_d_register(Vn, src1); | |
4416 get_d_register(Vm, src2); | |
4417 dst[0] = MinMax(src1[0], src1[1], min); | |
4418 dst[1] = MinMax(src2[0], src2[1], min); | |
4419 set_d_register(Vd, dst); | |
4420 break; | 4480 break; |
4421 } | |
4422 default: | 4481 default: |
4423 UNREACHABLE(); | 4482 UNREACHABLE(); |
4424 break; | 4483 break; |
4425 } | 4484 } |
4426 break; | 4485 break; |
4427 } | 4486 } |
4428 case 0xd: { | 4487 case 0xd: { |
4429 if (instr->Bit(4) == 0) { | 4488 if (instr->Bit(4) == 0) { |
4430 float src1[4], src2[4]; | 4489 float src1[4], src2[4]; |
4431 get_q_register(Vn, src1); | 4490 get_neon_register(Vn, src1); |
4432 get_q_register(Vm, src2); | 4491 get_neon_register(Vm, src2); |
4433 for (int i = 0; i < 4; i++) { | 4492 for (int i = 0; i < 4; i++) { |
4434 if (instr->Bit(21) == 0) { | 4493 if (instr->Bit(21) == 0) { |
4435 // vadd.f32 Qd, Qm, Qn. | 4494 // vadd.f32 Qd, Qm, Qn. |
4436 src1[i] = src1[i] + src2[i]; | 4495 src1[i] = src1[i] + src2[i]; |
4437 } else { | 4496 } else { |
4438 // vsub.f32 Qd, Qm, Qn. | 4497 // vsub.f32 Qd, Qm, Qn. |
4439 src1[i] = src1[i] - src2[i]; | 4498 src1[i] = src1[i] - src2[i]; |
4440 } | 4499 } |
4441 } | 4500 } |
4442 set_q_register(Vd, src1); | 4501 set_neon_register(Vd, src1); |
4443 } else { | 4502 } else { |
4444 UNIMPLEMENTED(); | 4503 UNIMPLEMENTED(); |
4445 } | 4504 } |
4446 break; | 4505 break; |
4447 } | 4506 } |
4448 case 0xe: { | 4507 case 0xe: { |
4449 if (instr->Bits(21, 20) == 0 && instr->Bit(4) == 0) { | 4508 if (instr->Bits(21, 20) == 0 && instr->Bit(4) == 0) { |
4450 // vceq.f32. | 4509 // vceq.f32. |
4451 float src1[4], src2[4]; | 4510 float src1[4], src2[4]; |
4452 get_q_register(Vn, src1); | 4511 get_neon_register(Vn, src1); |
4453 get_q_register(Vm, src2); | 4512 get_neon_register(Vm, src2); |
4454 uint32_t dst[4]; | 4513 uint32_t dst[4]; |
4455 for (int i = 0; i < 4; i++) { | 4514 for (int i = 0; i < 4; i++) { |
4456 dst[i] = (src1[i] == src2[i]) ? 0xFFFFFFFF : 0; | 4515 dst[i] = (src1[i] == src2[i]) ? 0xFFFFFFFF : 0; |
4457 } | 4516 } |
4458 set_q_register(Vd, dst); | 4517 set_neon_register(Vd, dst); |
4459 } else { | 4518 } else { |
4460 UNIMPLEMENTED(); | 4519 UNIMPLEMENTED(); |
4461 } | 4520 } |
4462 break; | 4521 break; |
4463 } | 4522 } |
4464 case 0xf: { | 4523 case 0xf: { |
4465 if (instr->Bit(20) == 0 && instr->Bit(6) == 1) { | 4524 if (instr->Bit(20) == 0 && instr->Bit(6) == 1) { |
4466 float src1[4], src2[4]; | 4525 float src1[4], src2[4]; |
4467 get_q_register(Vn, src1); | 4526 get_neon_register(Vn, src1); |
4468 get_q_register(Vm, src2); | 4527 get_neon_register(Vm, src2); |
4469 if (instr->Bit(4) == 1) { | 4528 if (instr->Bit(4) == 1) { |
4470 if (instr->Bit(21) == 0) { | 4529 if (instr->Bit(21) == 0) { |
4471 // vrecps.f32 Qd, Qm, Qn. | 4530 // vrecps.f32 Qd, Qm, Qn. |
4472 for (int i = 0; i < 4; i++) { | 4531 for (int i = 0; i < 4; i++) { |
4473 src1[i] = 2.0f - src1[i] * src2[i]; | 4532 src1[i] = 2.0f - src1[i] * src2[i]; |
4474 } | 4533 } |
4475 } else { | 4534 } else { |
4476 // vrsqrts.f32 Qd, Qm, Qn. | 4535 // vrsqrts.f32 Qd, Qm, Qn. |
4477 for (int i = 0; i < 4; i++) { | 4536 for (int i = 0; i < 4; i++) { |
4478 src1[i] = (3.0f - src1[i] * src2[i]) * 0.5f; | 4537 src1[i] = (3.0f - src1[i] * src2[i]) * 0.5f; |
4479 } | 4538 } |
4480 } | 4539 } |
4481 } else { | 4540 } else { |
4482 // vmin/vmax.f32 Qd, Qm, Qn. | 4541 // vmin/vmax.f32 Qd, Qm, Qn. |
4483 bool min = instr->Bit(21) == 1; | 4542 bool min = instr->Bit(21) == 1; |
4484 for (int i = 0; i < 4; i++) { | 4543 for (int i = 0; i < 4; i++) { |
4485 src1[i] = MinMax(src1[i], src2[i], min); | 4544 src1[i] = MinMax(src1[i], src2[i], min); |
4486 } | 4545 } |
4487 } | 4546 } |
4488 set_q_register(Vd, src1); | 4547 set_neon_register(Vd, src1); |
4489 } else { | 4548 } else { |
4490 UNIMPLEMENTED(); | 4549 UNIMPLEMENTED(); |
4491 } | 4550 } |
4492 break; | 4551 break; |
4493 } | 4552 } |
4494 default: | 4553 default: |
4495 UNIMPLEMENTED(); | 4554 UNIMPLEMENTED(); |
4496 break; | 4555 break; |
4497 } | 4556 } |
4498 break; | 4557 break; |
(...skipping 20 matching lines...) Expand all Loading... |
4519 UNIMPLEMENTED(); | 4578 UNIMPLEMENTED(); |
4520 break; | 4579 break; |
4521 } | 4580 } |
4522 } else if (instr->Bits(21, 20) == 3 && instr->Bit(4) == 0) { | 4581 } else if (instr->Bits(21, 20) == 3 && instr->Bit(4) == 0) { |
4523 // vext. | 4582 // vext. |
4524 int imm4 = instr->Bits(11, 8); | 4583 int imm4 = instr->Bits(11, 8); |
4525 int Vd = instr->VFPDRegValue(kSimd128Precision); | 4584 int Vd = instr->VFPDRegValue(kSimd128Precision); |
4526 int Vm = instr->VFPMRegValue(kSimd128Precision); | 4585 int Vm = instr->VFPMRegValue(kSimd128Precision); |
4527 int Vn = instr->VFPNRegValue(kSimd128Precision); | 4586 int Vn = instr->VFPNRegValue(kSimd128Precision); |
4528 uint8_t src1[16], src2[16], dst[16]; | 4587 uint8_t src1[16], src2[16], dst[16]; |
4529 get_q_register(Vn, src1); | 4588 get_neon_register(Vn, src1); |
4530 get_q_register(Vm, src2); | 4589 get_neon_register(Vm, src2); |
4531 int boundary = kSimd128Size - imm4; | 4590 int boundary = kSimd128Size - imm4; |
4532 int i = 0; | 4591 int i = 0; |
4533 for (; i < boundary; i++) { | 4592 for (; i < boundary; i++) { |
4534 dst[i] = src1[i + imm4]; | 4593 dst[i] = src1[i + imm4]; |
4535 } | 4594 } |
4536 for (; i < 16; i++) { | 4595 for (; i < 16; i++) { |
4537 dst[i] = src2[i - boundary]; | 4596 dst[i] = src2[i - boundary]; |
4538 } | 4597 } |
4539 set_q_register(Vd, dst); | 4598 set_neon_register(Vd, dst); |
4540 } else if (instr->Bits(11, 7) == 0xA && instr->Bit(4) == 1) { | 4599 } else if (instr->Bits(11, 7) == 0xA && instr->Bit(4) == 1) { |
4541 // vshl.i<size> Qd, Qm, shift | 4600 // vshl.i<size> Qd, Qm, shift |
4542 int size = base::bits::RoundDownToPowerOfTwo32(instr->Bits(21, 16)); | 4601 int size = base::bits::RoundDownToPowerOfTwo32(instr->Bits(21, 16)); |
4543 int shift = instr->Bits(21, 16) - size; | 4602 int shift = instr->Bits(21, 16) - size; |
4544 int Vd = instr->VFPDRegValue(kSimd128Precision); | 4603 int Vd = instr->VFPDRegValue(kSimd128Precision); |
4545 int Vm = instr->VFPMRegValue(kSimd128Precision); | 4604 int Vm = instr->VFPMRegValue(kSimd128Precision); |
4546 NeonSize ns = static_cast<NeonSize>(size / 16); | 4605 NeonSize ns = static_cast<NeonSize>(size / 16); |
4547 switch (ns) { | 4606 switch (ns) { |
4548 case Neon8: { | 4607 case Neon8: |
4549 uint8_t src[16]; | 4608 ShiftLeft<uint8_t, kSimd128Size>(this, Vd, Vm, shift); |
4550 get_q_register(Vm, src); | |
4551 for (int i = 0; i < 16; i++) { | |
4552 src[i] <<= shift; | |
4553 } | |
4554 set_q_register(Vd, src); | |
4555 break; | 4609 break; |
4556 } | 4610 case Neon16: |
4557 case Neon16: { | 4611 ShiftLeft<uint16_t, kSimd128Size>(this, Vd, Vm, shift); |
4558 uint16_t src[8]; | |
4559 get_q_register(Vm, src); | |
4560 for (int i = 0; i < 8; i++) { | |
4561 src[i] <<= shift; | |
4562 } | |
4563 set_q_register(Vd, src); | |
4564 break; | 4612 break; |
4565 } | 4613 case Neon32: |
4566 case Neon32: { | 4614 ShiftLeft<uint32_t, kSimd128Size>(this, Vd, Vm, shift); |
4567 uint32_t src[4]; | |
4568 get_q_register(Vm, src); | |
4569 for (int i = 0; i < 4; i++) { | |
4570 src[i] <<= shift; | |
4571 } | |
4572 set_q_register(Vd, src); | |
4573 break; | 4615 break; |
4574 } | |
4575 default: | 4616 default: |
4576 UNREACHABLE(); | 4617 UNREACHABLE(); |
4577 break; | 4618 break; |
4578 } | 4619 } |
4579 } else if (instr->Bits(11, 7) == 0 && instr->Bit(4) == 1) { | 4620 } else if (instr->Bits(11, 7) == 0 && instr->Bit(4) == 1) { |
4580 // vshr.s<size> Qd, Qm, shift | 4621 // vshr.s<size> Qd, Qm, shift |
4581 int size = base::bits::RoundDownToPowerOfTwo32(instr->Bits(21, 16)); | 4622 int size = base::bits::RoundDownToPowerOfTwo32(instr->Bits(21, 16)); |
4582 int shift = 2 * size - instr->Bits(21, 16); | 4623 int shift = 2 * size - instr->Bits(21, 16); |
4583 int Vd = instr->VFPDRegValue(kSimd128Precision); | 4624 int Vd = instr->VFPDRegValue(kSimd128Precision); |
4584 int Vm = instr->VFPMRegValue(kSimd128Precision); | 4625 int Vm = instr->VFPMRegValue(kSimd128Precision); |
4585 NeonSize ns = static_cast<NeonSize>(size / 16); | 4626 NeonSize ns = static_cast<NeonSize>(size / 16); |
4586 switch (ns) { | 4627 switch (ns) { |
4587 case Neon8: { | 4628 case Neon8: |
4588 int8_t src[16]; | 4629 ArithmeticShiftRight<int8_t, kSimd128Size>(this, Vd, Vm, shift); |
4589 get_q_register(Vm, src); | |
4590 for (int i = 0; i < 16; i++) { | |
4591 src[i] = ArithmeticShiftRight(src[i], shift); | |
4592 } | |
4593 set_q_register(Vd, src); | |
4594 break; | 4630 break; |
4595 } | 4631 case Neon16: |
4596 case Neon16: { | 4632 ArithmeticShiftRight<int16_t, kSimd128Size>(this, Vd, Vm, shift); |
4597 int16_t src[8]; | |
4598 get_q_register(Vm, src); | |
4599 for (int i = 0; i < 8; i++) { | |
4600 src[i] = ArithmeticShiftRight(src[i], shift); | |
4601 } | |
4602 set_q_register(Vd, src); | |
4603 break; | 4633 break; |
4604 } | 4634 case Neon32: |
4605 case Neon32: { | 4635 ArithmeticShiftRight<int32_t, kSimd128Size>(this, Vd, Vm, shift); |
4606 int32_t src[4]; | |
4607 get_q_register(Vm, src); | |
4608 for (int i = 0; i < 4; i++) { | |
4609 src[i] = ArithmeticShiftRight(src[i], shift); | |
4610 } | |
4611 set_q_register(Vd, src); | |
4612 break; | 4636 break; |
4613 } | |
4614 default: | 4637 default: |
4615 UNREACHABLE(); | 4638 UNREACHABLE(); |
4616 break; | 4639 break; |
4617 } | 4640 } |
4618 } else { | 4641 } else { |
4619 UNIMPLEMENTED(); | 4642 UNIMPLEMENTED(); |
4620 } | 4643 } |
4621 break; | 4644 break; |
4622 case 6: { | 4645 case 6: { |
4623 int Vd, Vm, Vn; | 4646 int Vd, Vm, Vn; |
(...skipping 27 matching lines...) Expand all Loading... |
4651 } | 4674 } |
4652 } else { | 4675 } else { |
4653 UNIMPLEMENTED(); | 4676 UNIMPLEMENTED(); |
4654 } | 4677 } |
4655 break; | 4678 break; |
4656 } | 4679 } |
4657 case 0x1: { | 4680 case 0x1: { |
4658 if (instr->Bits(21, 20) == 1 && instr->Bit(4) == 1) { | 4681 if (instr->Bits(21, 20) == 1 && instr->Bit(4) == 1) { |
4659 // vbsl.size Qd, Qm, Qn. | 4682 // vbsl.size Qd, Qm, Qn. |
4660 uint32_t dst[4], src1[4], src2[4]; | 4683 uint32_t dst[4], src1[4], src2[4]; |
4661 get_q_register(Vd, dst); | 4684 get_neon_register(Vd, dst); |
4662 get_q_register(Vn, src1); | 4685 get_neon_register(Vn, src1); |
4663 get_q_register(Vm, src2); | 4686 get_neon_register(Vm, src2); |
4664 for (int i = 0; i < 4; i++) { | 4687 for (int i = 0; i < 4; i++) { |
4665 dst[i] = (dst[i] & src1[i]) | (~dst[i] & src2[i]); | 4688 dst[i] = (dst[i] & src1[i]) | (~dst[i] & src2[i]); |
4666 } | 4689 } |
4667 set_q_register(Vd, dst); | 4690 set_neon_register(Vd, dst); |
4668 } else if (instr->Bits(21, 20) == 0 && instr->Bit(4) == 1) { | 4691 } else if (instr->Bits(21, 20) == 0 && instr->Bit(4) == 1) { |
4669 if (instr->Bit(6) == 0) { | 4692 if (instr->Bit(6) == 0) { |
4670 // veor Dd, Dn, Dm | 4693 // veor Dd, Dn, Dm |
4671 uint64_t src1, src2; | 4694 uint64_t src1, src2; |
4672 get_d_register(Vn, &src1); | 4695 get_d_register(Vn, &src1); |
4673 get_d_register(Vm, &src2); | 4696 get_d_register(Vm, &src2); |
4674 src1 ^= src2; | 4697 src1 ^= src2; |
4675 set_d_register(Vd, &src1); | 4698 set_d_register(Vd, &src1); |
4676 | 4699 |
4677 } else { | 4700 } else { |
4678 // veor Qd, Qn, Qm | 4701 // veor Qd, Qn, Qm |
4679 uint32_t src1[4], src2[4]; | 4702 uint32_t src1[4], src2[4]; |
4680 get_q_register(Vn, src1); | 4703 get_neon_register(Vn, src1); |
4681 get_q_register(Vm, src2); | 4704 get_neon_register(Vm, src2); |
4682 for (int i = 0; i < 4; i++) src1[i] ^= src2[i]; | 4705 for (int i = 0; i < 4; i++) src1[i] ^= src2[i]; |
4683 set_q_register(Vd, src1); | 4706 set_neon_register(Vd, src1); |
4684 } | 4707 } |
4685 } else { | 4708 } else { |
4686 UNIMPLEMENTED(); | 4709 UNIMPLEMENTED(); |
4687 } | 4710 } |
4688 break; | 4711 break; |
4689 } | 4712 } |
4690 case 0x2: { | 4713 case 0x2: { |
4691 if (instr->Bit(4) == 1) { | 4714 if (instr->Bit(4) == 1) { |
4692 // vqsub.u<size> Qd, Qm, Qn. | 4715 // vqsub.u<size> Qd, Qm, Qn. |
4693 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); | 4716 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
(...skipping 14 matching lines...) Expand all Loading... |
4708 } else { | 4731 } else { |
4709 UNIMPLEMENTED(); | 4732 UNIMPLEMENTED(); |
4710 } | 4733 } |
4711 break; | 4734 break; |
4712 } | 4735 } |
4713 case 0x3: { | 4736 case 0x3: { |
4714 // vcge/vcgt.u<size> Qd, Qm, Qn. | 4737 // vcge/vcgt.u<size> Qd, Qm, Qn. |
4715 bool ge = instr->Bit(4) == 1; | 4738 bool ge = instr->Bit(4) == 1; |
4716 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); | 4739 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
4717 switch (size) { | 4740 switch (size) { |
4718 case Neon8: { | 4741 case Neon8: |
4719 uint8_t src1[16], src2[16]; | 4742 CompareGreater<uint8_t, kSimd128Size>(this, Vd, Vm, Vn, ge); |
4720 get_q_register(Vn, src1); | |
4721 get_q_register(Vm, src2); | |
4722 for (int i = 0; i < 16; i++) { | |
4723 if (ge) | |
4724 src1[i] = src1[i] >= src2[i] ? 0xFFu : 0; | |
4725 else | |
4726 src1[i] = src1[i] > src2[i] ? 0xFFu : 0; | |
4727 } | |
4728 set_q_register(Vd, src1); | |
4729 break; | 4743 break; |
4730 } | 4744 case Neon16: |
4731 case Neon16: { | 4745 CompareGreater<uint16_t, kSimd128Size>(this, Vd, Vm, Vn, ge); |
4732 uint16_t src1[8], src2[8]; | |
4733 get_q_register(Vn, src1); | |
4734 get_q_register(Vm, src2); | |
4735 for (int i = 0; i < 8; i++) { | |
4736 if (ge) | |
4737 src1[i] = src1[i] >= src2[i] ? 0xFFFFu : 0; | |
4738 else | |
4739 src1[i] = src1[i] > src2[i] ? 0xFFFFu : 0; | |
4740 } | |
4741 set_q_register(Vd, src1); | |
4742 break; | 4746 break; |
4743 } | 4747 case Neon32: |
4744 case Neon32: { | 4748 CompareGreater<uint32_t, kSimd128Size>(this, Vd, Vm, Vn, ge); |
4745 uint32_t src1[4], src2[4]; | |
4746 get_q_register(Vn, src1); | |
4747 get_q_register(Vm, src2); | |
4748 for (int i = 0; i < 4; i++) { | |
4749 if (ge) | |
4750 src1[i] = src1[i] >= src2[i] ? 0xFFFFFFFFu : 0; | |
4751 else | |
4752 src1[i] = src1[i] > src2[i] ? 0xFFFFFFFFu : 0; | |
4753 } | |
4754 set_q_register(Vd, src1); | |
4755 break; | 4749 break; |
4756 } | |
4757 default: | 4750 default: |
4758 UNREACHABLE(); | 4751 UNREACHABLE(); |
4759 break; | 4752 break; |
4760 } | 4753 } |
4761 break; | 4754 break; |
4762 } | 4755 } |
4763 case 0x6: { | 4756 case 0x6: { |
4764 // vmin/vmax.u<size> Qd, Qm, Qn. | 4757 // vmin/vmax.u<size> Qd, Qm, Qn. |
4765 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); | 4758 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
4766 bool min = instr->Bit(4) != 0; | 4759 bool min = instr->Bit(4) != 0; |
4767 switch (size) { | 4760 switch (size) { |
4768 case Neon8: { | 4761 case Neon8: |
4769 uint8_t src1[16], src2[16]; | 4762 MinMax<uint8_t, kSimd128Size>(this, Vd, Vm, Vn, min); |
4770 get_q_register(Vn, src1); | |
4771 get_q_register(Vm, src2); | |
4772 for (int i = 0; i < 16; i++) { | |
4773 src1[i] = MinMax(src1[i], src2[i], min); | |
4774 } | |
4775 set_q_register(Vd, src1); | |
4776 break; | 4763 break; |
4777 } | 4764 case Neon16: |
4778 case Neon16: { | 4765 MinMax<uint16_t, kSimd128Size>(this, Vd, Vm, Vn, min); |
4779 uint16_t src1[8], src2[8]; | |
4780 get_q_register(Vn, src1); | |
4781 get_q_register(Vm, src2); | |
4782 for (int i = 0; i < 8; i++) { | |
4783 src1[i] = MinMax(src1[i], src2[i], min); | |
4784 } | |
4785 set_q_register(Vd, src1); | |
4786 break; | 4766 break; |
4787 } | 4767 case Neon32: |
4788 case Neon32: { | 4768 MinMax<uint32_t, kSimd128Size>(this, Vd, Vm, Vn, min); |
4789 uint32_t src1[4], src2[4]; | |
4790 get_q_register(Vn, src1); | |
4791 get_q_register(Vm, src2); | |
4792 for (int i = 0; i < 4; i++) { | |
4793 src1[i] = MinMax(src1[i], src2[i], min); | |
4794 } | |
4795 set_q_register(Vd, src1); | |
4796 break; | 4769 break; |
4797 } | |
4798 default: | 4770 default: |
4799 UNREACHABLE(); | 4771 UNREACHABLE(); |
4800 break; | 4772 break; |
4801 } | 4773 } |
4802 break; | 4774 break; |
4803 } | 4775 } |
4804 case 0x8: { | 4776 case 0x8: { |
4805 if (instr->Bit(4) == 0) { | 4777 if (instr->Bit(4) == 0) { |
4806 // vsub.size Qd, Qm, Qn. | 4778 // vsub.size Qd, Qm, Qn. |
4807 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); | 4779 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
4808 switch (size) { | 4780 switch (size) { |
4809 case Neon8: { | 4781 case Neon8: |
4810 uint8_t src1[16], src2[16]; | 4782 Sub<uint8_t, kSimd128Size>(this, Vd, Vm, Vn); |
4811 get_q_register(Vn, src1); | |
4812 get_q_register(Vm, src2); | |
4813 for (int i = 0; i < 16; i++) { | |
4814 src1[i] -= src2[i]; | |
4815 } | |
4816 set_q_register(Vd, src1); | |
4817 break; | 4783 break; |
4818 } | 4784 case Neon16: |
4819 case Neon16: { | 4785 Sub<uint16_t, kSimd128Size>(this, Vd, Vm, Vn); |
4820 uint16_t src1[8], src2[8]; | |
4821 get_q_register(Vn, src1); | |
4822 get_q_register(Vm, src2); | |
4823 for (int i = 0; i < 8; i++) { | |
4824 src1[i] -= src2[i]; | |
4825 } | |
4826 set_q_register(Vd, src1); | |
4827 break; | 4786 break; |
4828 } | 4787 case Neon32: |
4829 case Neon32: { | 4788 Sub<uint32_t, kSimd128Size>(this, Vd, Vm, Vn); |
4830 uint32_t src1[4], src2[4]; | |
4831 get_q_register(Vn, src1); | |
4832 get_q_register(Vm, src2); | |
4833 for (int i = 0; i < 4; i++) { | |
4834 src1[i] -= src2[i]; | |
4835 } | |
4836 set_q_register(Vd, src1); | |
4837 break; | 4789 break; |
4838 } | |
4839 default: | 4790 default: |
4840 UNREACHABLE(); | 4791 UNREACHABLE(); |
4841 break; | 4792 break; |
4842 } | 4793 } |
4843 } else { | 4794 } else { |
4844 // vceq.size Qd, Qm, Qn. | 4795 // vceq.size Qd, Qm, Qn. |
4845 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); | 4796 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
4846 switch (size) { | 4797 switch (size) { |
4847 case Neon8: { | 4798 case Neon8: |
4848 uint8_t src1[16], src2[16]; | 4799 CompareEqual<uint8_t, kSimd128Size>(this, Vd, Vm, Vn); |
4849 get_q_register(Vn, src1); | |
4850 get_q_register(Vm, src2); | |
4851 for (int i = 0; i < 16; i++) { | |
4852 src1[i] = (src1[i] == src2[i]) ? 0xFFu : 0; | |
4853 } | |
4854 set_q_register(Vd, src1); | |
4855 break; | 4800 break; |
4856 } | 4801 case Neon16: |
4857 case Neon16: { | 4802 CompareEqual<uint16_t, kSimd128Size>(this, Vd, Vm, Vn); |
4858 uint16_t src1[8], src2[8]; | |
4859 get_q_register(Vn, src1); | |
4860 get_q_register(Vm, src2); | |
4861 for (int i = 0; i < 8; i++) { | |
4862 src1[i] = (src1[i] == src2[i]) ? 0xFFFFu : 0; | |
4863 } | |
4864 set_q_register(Vd, src1); | |
4865 break; | 4803 break; |
4866 } | 4804 case Neon32: |
4867 case Neon32: { | 4805 CompareEqual<uint32_t, kSimd128Size>(this, Vd, Vm, Vn); |
4868 uint32_t src1[4], src2[4]; | |
4869 get_q_register(Vn, src1); | |
4870 get_q_register(Vm, src2); | |
4871 for (int i = 0; i < 4; i++) { | |
4872 src1[i] = (src1[i] == src2[i]) ? 0xFFFFFFFFu : 0; | |
4873 } | |
4874 set_q_register(Vd, src1); | |
4875 break; | 4806 break; |
4876 } | |
4877 default: | 4807 default: |
4878 UNREACHABLE(); | 4808 UNREACHABLE(); |
4879 break; | 4809 break; |
4880 } | 4810 } |
4881 } | 4811 } |
4882 break; | 4812 break; |
4883 } | 4813 } |
4884 case 0xa: { | 4814 case 0xa: { |
4885 // vpmin/vpmax.u<size> Dd, Dm, Dn. | 4815 // vpmin/vpmax.u<size> Dd, Dm, Dn. |
4886 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); | 4816 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
4887 bool min = instr->Bit(4) != 0; | 4817 bool min = instr->Bit(4) != 0; |
4888 switch (size) { | 4818 switch (size) { |
4889 case Neon8: { | 4819 case Neon8: |
4890 uint8_t dst[8], src1[8], src2[8]; | 4820 PairwiseMinMax<uint8_t>(this, Vd, Vm, Vn, min); |
4891 get_d_register(Vn, src1); | |
4892 get_d_register(Vm, src2); | |
4893 for (int i = 0; i < 4; i++) { | |
4894 dst[i + 0] = MinMax(src1[i * 2], src1[i * 2 + 1], min); | |
4895 dst[i + 4] = MinMax(src2[i * 2], src2[i * 2 + 1], min); | |
4896 } | |
4897 set_d_register(Vd, dst); | |
4898 break; | 4821 break; |
4899 } | 4822 case Neon16: |
4900 case Neon16: { | 4823 PairwiseMinMax<uint16_t>(this, Vd, Vm, Vn, min); |
4901 uint16_t dst[4], src1[4], src2[4]; | |
4902 get_d_register(Vn, src1); | |
4903 get_d_register(Vm, src2); | |
4904 for (int i = 0; i < 2; i++) { | |
4905 dst[i + 0] = MinMax(src1[i * 2], src1[i * 2 + 1], min); | |
4906 dst[i + 2] = MinMax(src2[i * 2], src2[i * 2 + 1], min); | |
4907 } | |
4908 set_d_register(Vd, dst); | |
4909 break; | 4824 break; |
4910 } | 4825 case Neon32: |
4911 case Neon32: { | 4826 PairwiseMinMax<uint32_t>(this, Vd, Vm, Vn, min); |
4912 uint32_t dst[2], src1[2], src2[2]; | |
4913 get_d_register(Vn, src1); | |
4914 get_d_register(Vm, src2); | |
4915 dst[0] = MinMax(src1[0], src1[1], min); | |
4916 dst[1] = MinMax(src2[0], src2[1], min); | |
4917 set_d_register(Vd, dst); | |
4918 break; | 4827 break; |
4919 } | |
4920 default: | 4828 default: |
4921 UNREACHABLE(); | 4829 UNREACHABLE(); |
4922 break; | 4830 break; |
4923 } | 4831 } |
4924 break; | 4832 break; |
4925 } | 4833 } |
4926 case 0xd: { | 4834 case 0xd: { |
4927 if (instr->Bit(21) == 0 && instr->Bit(6) == 1 && instr->Bit(4) == 1) { | 4835 if (instr->Bit(21) == 0 && instr->Bit(6) == 1 && instr->Bit(4) == 1) { |
4928 // vmul.f32 Qd, Qn, Qm | 4836 // vmul.f32 Qd, Qn, Qm |
4929 float src1[4], src2[4]; | 4837 float src1[4], src2[4]; |
4930 get_q_register(Vn, src1); | 4838 get_neon_register(Vn, src1); |
4931 get_q_register(Vm, src2); | 4839 get_neon_register(Vm, src2); |
4932 for (int i = 0; i < 4; i++) { | 4840 for (int i = 0; i < 4; i++) { |
4933 src1[i] = src1[i] * src2[i]; | 4841 src1[i] = src1[i] * src2[i]; |
4934 } | 4842 } |
4935 set_q_register(Vd, src1); | 4843 set_neon_register(Vd, src1); |
4936 } else { | 4844 } else { |
4937 UNIMPLEMENTED(); | 4845 UNIMPLEMENTED(); |
4938 } | 4846 } |
4939 break; | 4847 break; |
4940 } | 4848 } |
4941 case 0xe: { | 4849 case 0xe: { |
4942 if (instr->Bit(20) == 0 && instr->Bit(4) == 0) { | 4850 if (instr->Bit(20) == 0 && instr->Bit(4) == 0) { |
4943 // vcge/vcgt.f32 Qd, Qm, Qn | 4851 // vcge/vcgt.f32 Qd, Qm, Qn |
4944 bool ge = instr->Bit(21) == 0; | 4852 bool ge = instr->Bit(21) == 0; |
4945 float src1[4], src2[4]; | 4853 float src1[4], src2[4]; |
4946 get_q_register(Vn, src1); | 4854 get_neon_register(Vn, src1); |
4947 get_q_register(Vm, src2); | 4855 get_neon_register(Vm, src2); |
4948 uint32_t dst[4]; | 4856 uint32_t dst[4]; |
4949 for (int i = 0; i < 4; i++) { | 4857 for (int i = 0; i < 4; i++) { |
4950 if (ge) { | 4858 if (ge) { |
4951 dst[i] = src1[i] >= src2[i] ? 0xFFFFFFFFu : 0; | 4859 dst[i] = src1[i] >= src2[i] ? 0xFFFFFFFFu : 0; |
4952 } else { | 4860 } else { |
4953 dst[i] = src1[i] > src2[i] ? 0xFFFFFFFFu : 0; | 4861 dst[i] = src1[i] > src2[i] ? 0xFFFFFFFFu : 0; |
4954 } | 4862 } |
4955 } | 4863 } |
4956 set_q_register(Vd, dst); | 4864 set_neon_register(Vd, dst); |
4957 } else { | 4865 } else { |
4958 UNIMPLEMENTED(); | 4866 UNIMPLEMENTED(); |
4959 } | 4867 } |
4960 break; | 4868 break; |
4961 } | 4869 } |
4962 default: | 4870 default: |
4963 UNREACHABLE(); | 4871 UNREACHABLE(); |
4964 break; | 4872 break; |
4965 } | 4873 } |
4966 break; | 4874 break; |
(...skipping 20 matching lines...) Expand all Loading... |
4987 UNIMPLEMENTED(); | 4895 UNIMPLEMENTED(); |
4988 break; | 4896 break; |
4989 } | 4897 } |
4990 } else if (instr->Opc1Value() == 7 && instr->Bit(4) == 0) { | 4898 } else if (instr->Opc1Value() == 7 && instr->Bit(4) == 0) { |
4991 if (instr->Bits(19, 16) == 0xB && instr->Bits(11, 9) == 0x3 && | 4899 if (instr->Bits(19, 16) == 0xB && instr->Bits(11, 9) == 0x3 && |
4992 instr->Bit(6) == 1) { | 4900 instr->Bit(6) == 1) { |
4993 // vcvt.<Td>.<Tm> Qd, Qm. | 4901 // vcvt.<Td>.<Tm> Qd, Qm. |
4994 int Vd = instr->VFPDRegValue(kSimd128Precision); | 4902 int Vd = instr->VFPDRegValue(kSimd128Precision); |
4995 int Vm = instr->VFPMRegValue(kSimd128Precision); | 4903 int Vm = instr->VFPMRegValue(kSimd128Precision); |
4996 uint32_t q_data[4]; | 4904 uint32_t q_data[4]; |
4997 get_q_register(Vm, q_data); | 4905 get_neon_register(Vm, q_data); |
4998 int op = instr->Bits(8, 7); | 4906 int op = instr->Bits(8, 7); |
4999 for (int i = 0; i < 4; i++) { | 4907 for (int i = 0; i < 4; i++) { |
5000 switch (op) { | 4908 switch (op) { |
5001 case 0: | 4909 case 0: |
5002 // f32 <- s32, round towards nearest. | 4910 // f32 <- s32, round towards nearest. |
5003 q_data[i] = bit_cast<uint32_t>(std::round( | 4911 q_data[i] = bit_cast<uint32_t>(std::round( |
5004 static_cast<float>(bit_cast<int32_t>(q_data[i])))); | 4912 static_cast<float>(bit_cast<int32_t>(q_data[i])))); |
5005 break; | 4913 break; |
5006 case 1: | 4914 case 1: |
5007 // f32 <- u32, round towards nearest. | 4915 // f32 <- u32, round towards nearest. |
5008 q_data[i] = bit_cast<uint32_t>( | 4916 q_data[i] = bit_cast<uint32_t>( |
5009 std::round(static_cast<float>(q_data[i]))); | 4917 std::round(static_cast<float>(q_data[i]))); |
5010 break; | 4918 break; |
5011 case 2: | 4919 case 2: |
5012 // s32 <- f32, round to zero. | 4920 // s32 <- f32, round to zero. |
5013 q_data[i] = static_cast<uint32_t>( | 4921 q_data[i] = static_cast<uint32_t>( |
5014 ConvertDoubleToInt(bit_cast<float>(q_data[i]), false, RZ)); | 4922 ConvertDoubleToInt(bit_cast<float>(q_data[i]), false, RZ)); |
5015 break; | 4923 break; |
5016 case 3: | 4924 case 3: |
5017 // u32 <- f32, round to zero. | 4925 // u32 <- f32, round to zero. |
5018 q_data[i] = static_cast<uint32_t>( | 4926 q_data[i] = static_cast<uint32_t>( |
5019 ConvertDoubleToInt(bit_cast<float>(q_data[i]), true, RZ)); | 4927 ConvertDoubleToInt(bit_cast<float>(q_data[i]), true, RZ)); |
5020 break; | 4928 break; |
5021 } | 4929 } |
5022 } | 4930 } |
5023 set_q_register(Vd, q_data); | 4931 set_neon_register(Vd, q_data); |
5024 } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 7) == 0) { | 4932 } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 7) == 0) { |
5025 if (instr->Bit(6) == 0) { | 4933 if (instr->Bit(6) == 0) { |
5026 // vswp Dd, Dm. | 4934 // vswp Dd, Dm. |
5027 uint64_t dval, mval; | 4935 uint64_t dval, mval; |
5028 int vd = instr->VFPDRegValue(kDoublePrecision); | 4936 int vd = instr->VFPDRegValue(kDoublePrecision); |
5029 int vm = instr->VFPMRegValue(kDoublePrecision); | 4937 int vm = instr->VFPMRegValue(kDoublePrecision); |
5030 get_d_register(vd, &dval); | 4938 get_d_register(vd, &dval); |
5031 get_d_register(vm, &mval); | 4939 get_d_register(vm, &mval); |
5032 set_d_register(vm, &dval); | 4940 set_d_register(vm, &dval); |
5033 set_d_register(vd, &mval); | 4941 set_d_register(vd, &mval); |
5034 } else { | 4942 } else { |
5035 // vswp Qd, Qm. | 4943 // vswp Qd, Qm. |
5036 uint32_t dval[4], mval[4]; | 4944 uint32_t dval[4], mval[4]; |
5037 int vd = instr->VFPDRegValue(kSimd128Precision); | 4945 int vd = instr->VFPDRegValue(kSimd128Precision); |
5038 int vm = instr->VFPMRegValue(kSimd128Precision); | 4946 int vm = instr->VFPMRegValue(kSimd128Precision); |
5039 get_q_register(vd, dval); | 4947 get_neon_register(vd, dval); |
5040 get_q_register(vm, mval); | 4948 get_neon_register(vm, mval); |
5041 set_q_register(vm, dval); | 4949 set_neon_register(vm, dval); |
5042 set_q_register(vd, mval); | 4950 set_neon_register(vd, mval); |
5043 } | 4951 } |
5044 } else if (instr->Bits(11, 7) == 0x18) { | 4952 } else if (instr->Bits(11, 7) == 0x18) { |
5045 // vdup.32 Qd, Sm. | 4953 // vdup.32 Qd, Sm. |
5046 int vd = instr->VFPDRegValue(kSimd128Precision); | 4954 int vd = instr->VFPDRegValue(kSimd128Precision); |
5047 int vm = instr->VFPMRegValue(kDoublePrecision); | 4955 int vm = instr->VFPMRegValue(kDoublePrecision); |
5048 int index = instr->Bit(19); | 4956 int index = instr->Bit(19); |
5049 uint32_t s_data = get_s_register(vm * 2 + index); | 4957 uint32_t s_data = get_s_register(vm * 2 + index); |
5050 uint32_t q_data[4]; | 4958 uint32_t q_data[4]; |
5051 for (int i = 0; i < 4; i++) q_data[i] = s_data; | 4959 for (int i = 0; i < 4; i++) q_data[i] = s_data; |
5052 set_q_register(vd, q_data); | 4960 set_neon_register(vd, q_data); |
5053 } else if (instr->Bits(19, 16) == 0 && instr->Bits(11, 6) == 0x17) { | 4961 } else if (instr->Bits(19, 16) == 0 && instr->Bits(11, 6) == 0x17) { |
5054 // vmvn Qd, Qm. | 4962 // vmvn Qd, Qm. |
5055 int vd = instr->VFPDRegValue(kSimd128Precision); | 4963 int vd = instr->VFPDRegValue(kSimd128Precision); |
5056 int vm = instr->VFPMRegValue(kSimd128Precision); | 4964 int vm = instr->VFPMRegValue(kSimd128Precision); |
5057 uint32_t q_data[4]; | 4965 uint32_t q_data[4]; |
5058 get_q_register(vm, q_data); | 4966 get_neon_register(vm, q_data); |
5059 for (int i = 0; i < 4; i++) q_data[i] = ~q_data[i]; | 4967 for (int i = 0; i < 4; i++) q_data[i] = ~q_data[i]; |
5060 set_q_register(vd, q_data); | 4968 set_neon_register(vd, q_data); |
5061 } else if (instr->Bits(11, 10) == 0x2) { | 4969 } else if (instr->Bits(11, 10) == 0x2) { |
5062 // vtb[l,x] Dd, <list>, Dm. | 4970 // vtb[l,x] Dd, <list>, Dm. |
5063 int vd = instr->VFPDRegValue(kDoublePrecision); | 4971 int vd = instr->VFPDRegValue(kDoublePrecision); |
5064 int vn = instr->VFPNRegValue(kDoublePrecision); | 4972 int vn = instr->VFPNRegValue(kDoublePrecision); |
5065 int vm = instr->VFPMRegValue(kDoublePrecision); | 4973 int vm = instr->VFPMRegValue(kDoublePrecision); |
5066 int table_len = (instr->Bits(9, 8) + 1) * kDoubleSize; | 4974 int table_len = (instr->Bits(9, 8) + 1) * kDoubleSize; |
5067 bool vtbx = instr->Bit(6) != 0; // vtbl / vtbx | 4975 bool vtbx = instr->Bit(6) != 0; // vtbl / vtbx |
5068 uint64_t destination = 0, indices = 0, result = 0; | 4976 uint64_t destination = 0, indices = 0, result = 0; |
5069 get_d_register(vd, &destination); | 4977 get_d_register(vd, &destination); |
5070 get_d_register(vm, &indices); | 4978 get_d_register(vm, &indices); |
5071 for (int i = 0; i < kDoubleSize; i++) { | 4979 for (int i = 0; i < kDoubleSize; i++) { |
5072 int shift = i * kBitsPerByte; | 4980 int shift = i * kBitsPerByte; |
5073 int index = (indices >> shift) & 0xFF; | 4981 int index = (indices >> shift) & 0xFF; |
5074 if (index < table_len) { | 4982 if (index < table_len) { |
5075 uint64_t table; | 4983 uint64_t table; |
5076 get_d_register(vn + index / kDoubleSize, &table); | 4984 get_d_register(vn + index / kDoubleSize, &table); |
5077 result |= | 4985 result |= |
5078 ((table >> ((index % kDoubleSize) * kBitsPerByte)) & 0xFF) | 4986 ((table >> ((index % kDoubleSize) * kBitsPerByte)) & 0xFF) |
5079 << shift; | 4987 << shift; |
5080 } else if (vtbx) { | 4988 } else if (vtbx) { |
5081 result |= destination & (0xFFull << shift); | 4989 result |= destination & (0xFFull << shift); |
5082 } | 4990 } |
5083 } | 4991 } |
5084 set_d_register(vd, &result); | 4992 set_d_register(vd, &result); |
5085 } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 8) == 0x1 && | 4993 } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 8) == 0x1) { |
5086 instr->Bit(6) == 1) { | |
5087 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); | 4994 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); |
5088 int Vd = instr->VFPDRegValue(kSimd128Precision); | 4995 if (instr->Bit(6) == 0) { |
5089 int Vm = instr->VFPMRegValue(kSimd128Precision); | 4996 int Vd = instr->VFPDRegValue(kDoublePrecision); |
5090 if (instr->Bit(7) == 1) { | 4997 int Vm = instr->VFPMRegValue(kDoublePrecision); |
5091 // vzip.<size> Qd, Qm. | 4998 if (instr->Bit(7) == 1) { |
5092 switch (size) { | 4999 // vzip.<size> Dd, Dm. |
5093 case Neon8: { | 5000 switch (size) { |
5094 uint8_t src1[16], src2[16], dst1[16], dst2[16]; | 5001 case Neon8: |
5095 get_q_register(Vd, src1); | 5002 Zip<uint8_t, kDoubleSize>(this, Vd, Vm); |
5096 get_q_register(Vm, src2); | 5003 break; |
5097 for (int i = 0; i < 8; i++) { | 5004 case Neon16: |
5098 dst1[i * 2] = src1[i]; | 5005 Zip<uint16_t, kDoubleSize>(this, Vd, Vm); |
5099 dst1[i * 2 + 1] = src2[i]; | 5006 break; |
5100 dst2[i * 2] = src1[i + 8]; | 5007 case Neon32: |
5101 dst2[i * 2 + 1] = src2[i + 8]; | 5008 UNIMPLEMENTED(); |
5102 } | 5009 break; |
5103 set_q_register(Vd, dst1); | 5010 default: |
5104 set_q_register(Vm, dst2); | 5011 UNREACHABLE(); |
5105 break; | 5012 break; |
5106 } | 5013 } |
5107 case Neon16: { | 5014 } else { |
5108 uint16_t src1[8], src2[8], dst1[8], dst2[8]; | 5015 // vuzp.<size> Dd, Dm. |
5109 get_q_register(Vd, src1); | 5016 switch (size) { |
5110 get_q_register(Vm, src2); | 5017 case Neon8: |
5111 for (int i = 0; i < 4; i++) { | 5018 Unzip<uint8_t, kDoubleSize>(this, Vd, Vm); |
5112 dst1[i * 2] = src1[i]; | 5019 break; |
5113 dst1[i * 2 + 1] = src2[i]; | 5020 case Neon16: |
5114 dst2[i * 2] = src1[i + 4]; | 5021 Unzip<uint16_t, kDoubleSize>(this, Vd, Vm); |
5115 dst2[i * 2 + 1] = src2[i + 4]; | 5022 break; |
5116 } | 5023 case Neon32: |
5117 set_q_register(Vd, dst1); | 5024 UNIMPLEMENTED(); |
5118 set_q_register(Vm, dst2); | 5025 break; |
5119 break; | 5026 default: |
| 5027 UNREACHABLE(); |
| 5028 break; |
5120 } | 5029 } |
5121 case Neon32: { | |
5122 uint32_t src1[4], src2[4], dst1[4], dst2[4]; | |
5123 get_q_register(Vd, src1); | |
5124 get_q_register(Vm, src2); | |
5125 for (int i = 0; i < 2; i++) { | |
5126 dst1[i * 2] = src1[i]; | |
5127 dst1[i * 2 + 1] = src2[i]; | |
5128 dst2[i * 2] = src1[i + 2]; | |
5129 dst2[i * 2 + 1] = src2[i + 2]; | |
5130 } | |
5131 set_q_register(Vd, dst1); | |
5132 set_q_register(Vm, dst2); | |
5133 break; | |
5134 } | |
5135 default: | |
5136 UNREACHABLE(); | |
5137 break; | |
5138 } | 5030 } |
5139 } else { | 5031 } else { |
5140 // vuzp.<size> Qd, Qm. | 5032 int Vd = instr->VFPDRegValue(kSimd128Precision); |
5141 switch (size) { | 5033 int Vm = instr->VFPMRegValue(kSimd128Precision); |
5142 case Neon8: { | 5034 if (instr->Bit(7) == 1) { |
5143 uint8_t src1[16], src2[16], dst1[16], dst2[16]; | 5035 // vzip.<size> Qd, Qm. |
5144 get_q_register(Vd, src1); | 5036 switch (size) { |
5145 get_q_register(Vm, src2); | 5037 case Neon8: |
5146 for (int i = 0; i < 8; i++) { | 5038 Zip<uint8_t, kSimd128Size>(this, Vd, Vm); |
5147 dst1[i] = src1[i * 2]; | 5039 break; |
5148 dst1[i + 8] = src2[i * 2]; | 5040 case Neon16: |
5149 dst2[i] = src1[i * 2 + 1]; | 5041 Zip<uint16_t, kSimd128Size>(this, Vd, Vm); |
5150 dst2[i + 8] = src2[i * 2 + 1]; | 5042 break; |
5151 } | 5043 case Neon32: |
5152 set_q_register(Vd, dst1); | 5044 Zip<uint32_t, kSimd128Size>(this, Vd, Vm); |
5153 set_q_register(Vm, dst2); | 5045 break; |
5154 break; | 5046 default: |
| 5047 UNREACHABLE(); |
| 5048 break; |
5155 } | 5049 } |
5156 case Neon16: { | 5050 } else { |
5157 uint16_t src1[8], src2[8], dst1[8], dst2[8]; | 5051 // vuzp.<size> Qd, Qm. |
5158 get_q_register(Vd, src1); | 5052 switch (size) { |
5159 get_q_register(Vm, src2); | 5053 case Neon8: |
5160 for (int i = 0; i < 4; i++) { | 5054 Unzip<uint8_t, kSimd128Size>(this, Vd, Vm); |
5161 dst1[i] = src1[i * 2]; | 5055 break; |
5162 dst1[i + 4] = src2[i * 2]; | 5056 case Neon16: |
5163 dst2[i] = src1[i * 2 + 1]; | 5057 Unzip<uint16_t, kSimd128Size>(this, Vd, Vm); |
5164 dst2[i + 4] = src2[i * 2 + 1]; | 5058 break; |
5165 } | 5059 case Neon32: |
5166 set_q_register(Vd, dst1); | 5060 Unzip<uint32_t, kSimd128Size>(this, Vd, Vm); |
5167 set_q_register(Vm, dst2); | 5061 break; |
5168 break; | 5062 default: |
| 5063 UNREACHABLE(); |
| 5064 break; |
5169 } | 5065 } |
5170 case Neon32: { | |
5171 uint32_t src1[4], src2[4], dst1[4], dst2[4]; | |
5172 get_q_register(Vd, src1); | |
5173 get_q_register(Vm, src2); | |
5174 for (int i = 0; i < 2; i++) { | |
5175 dst1[i] = src1[i * 2]; | |
5176 dst1[i + 2] = src2[i * 2]; | |
5177 dst2[i] = src1[i * 2 + 1]; | |
5178 dst2[i + 2] = src2[i * 2 + 1]; | |
5179 } | |
5180 set_q_register(Vd, dst1); | |
5181 set_q_register(Vm, dst2); | |
5182 break; | |
5183 } | |
5184 default: | |
5185 UNREACHABLE(); | |
5186 break; | |
5187 } | 5066 } |
5188 } | 5067 } |
5189 } else if (instr->Bits(17, 16) == 0 && instr->Bits(11, 9) == 0) { | 5068 } else if (instr->Bits(17, 16) == 0 && instr->Bits(11, 9) == 0) { |
5190 // vrev<op>.size Qd, Qm | 5069 // vrev<op>.size Qd, Qm |
5191 int Vd = instr->VFPDRegValue(kSimd128Precision); | 5070 int Vd = instr->VFPDRegValue(kSimd128Precision); |
5192 int Vm = instr->VFPMRegValue(kSimd128Precision); | 5071 int Vm = instr->VFPMRegValue(kSimd128Precision); |
5193 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); | 5072 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); |
5194 NeonSize op = static_cast<NeonSize>(static_cast<int>(Neon64) - | 5073 NeonSize op = static_cast<NeonSize>(static_cast<int>(Neon64) - |
5195 instr->Bits(8, 7)); | 5074 instr->Bits(8, 7)); |
5196 switch (op) { | 5075 switch (op) { |
5197 case Neon16: { | 5076 case Neon16: { |
5198 DCHECK_EQ(Neon8, size); | 5077 DCHECK_EQ(Neon8, size); |
5199 uint8_t src[16]; | 5078 uint8_t src[16]; |
5200 get_q_register(Vm, src); | 5079 get_neon_register(Vm, src); |
5201 for (int i = 0; i < 16; i += 2) { | 5080 for (int i = 0; i < 16; i += 2) { |
5202 std::swap(src[i], src[i + 1]); | 5081 std::swap(src[i], src[i + 1]); |
5203 } | 5082 } |
5204 set_q_register(Vd, src); | 5083 set_neon_register(Vd, src); |
5205 break; | 5084 break; |
5206 } | 5085 } |
5207 case Neon32: { | 5086 case Neon32: { |
5208 switch (size) { | 5087 switch (size) { |
5209 case Neon16: { | 5088 case Neon16: { |
5210 uint16_t src[8]; | 5089 uint16_t src[8]; |
5211 get_q_register(Vm, src); | 5090 get_neon_register(Vm, src); |
5212 for (int i = 0; i < 8; i += 2) { | 5091 for (int i = 0; i < 8; i += 2) { |
5213 std::swap(src[i], src[i + 1]); | 5092 std::swap(src[i], src[i + 1]); |
5214 } | 5093 } |
5215 set_q_register(Vd, src); | 5094 set_neon_register(Vd, src); |
5216 break; | 5095 break; |
5217 } | 5096 } |
5218 case Neon8: { | 5097 case Neon8: { |
5219 uint8_t src[16]; | 5098 uint8_t src[16]; |
5220 get_q_register(Vm, src); | 5099 get_neon_register(Vm, src); |
5221 for (int i = 0; i < 4; i++) { | 5100 for (int i = 0; i < 4; i++) { |
5222 std::swap(src[i * 4], src[i * 4 + 3]); | 5101 std::swap(src[i * 4], src[i * 4 + 3]); |
5223 std::swap(src[i * 4 + 1], src[i * 4 + 2]); | 5102 std::swap(src[i * 4 + 1], src[i * 4 + 2]); |
5224 } | 5103 } |
5225 set_q_register(Vd, src); | 5104 set_neon_register(Vd, src); |
5226 break; | 5105 break; |
5227 } | 5106 } |
5228 default: | 5107 default: |
5229 UNREACHABLE(); | 5108 UNREACHABLE(); |
5230 break; | 5109 break; |
5231 } | 5110 } |
5232 break; | 5111 break; |
5233 } | 5112 } |
5234 case Neon64: { | 5113 case Neon64: { |
5235 switch (size) { | 5114 switch (size) { |
5236 case Neon32: { | 5115 case Neon32: { |
5237 uint32_t src[4]; | 5116 uint32_t src[4]; |
5238 get_q_register(Vm, src); | 5117 get_neon_register(Vm, src); |
5239 std::swap(src[0], src[1]); | 5118 std::swap(src[0], src[1]); |
5240 std::swap(src[2], src[3]); | 5119 std::swap(src[2], src[3]); |
5241 set_q_register(Vd, src); | 5120 set_neon_register(Vd, src); |
5242 break; | 5121 break; |
5243 } | 5122 } |
5244 case Neon16: { | 5123 case Neon16: { |
5245 uint16_t src[8]; | 5124 uint16_t src[8]; |
5246 get_q_register(Vm, src); | 5125 get_neon_register(Vm, src); |
5247 for (int i = 0; i < 4; i++) { | 5126 for (int i = 0; i < 4; i++) { |
5248 std::swap(src[i * 4], src[i * 4 + 3]); | 5127 std::swap(src[i * 4], src[i * 4 + 3]); |
5249 std::swap(src[i * 4 + 1], src[i * 4 + 2]); | 5128 std::swap(src[i * 4 + 1], src[i * 4 + 2]); |
5250 } | 5129 } |
5251 set_q_register(Vd, src); | 5130 set_neon_register(Vd, src); |
5252 break; | 5131 break; |
5253 } | 5132 } |
5254 case Neon8: { | 5133 case Neon8: { |
5255 uint8_t src[16]; | 5134 uint8_t src[16]; |
5256 get_q_register(Vm, src); | 5135 get_neon_register(Vm, src); |
5257 for (int i = 0; i < 4; i++) { | 5136 for (int i = 0; i < 4; i++) { |
5258 std::swap(src[i], src[7 - i]); | 5137 std::swap(src[i], src[7 - i]); |
5259 std::swap(src[i + 8], src[15 - i]); | 5138 std::swap(src[i + 8], src[15 - i]); |
5260 } | 5139 } |
5261 set_q_register(Vd, src); | 5140 set_neon_register(Vd, src); |
5262 break; | 5141 break; |
5263 } | 5142 } |
5264 default: | 5143 default: |
5265 UNREACHABLE(); | 5144 UNREACHABLE(); |
5266 break; | 5145 break; |
5267 } | 5146 } |
5268 break; | 5147 break; |
5269 } | 5148 } |
5270 default: | 5149 default: |
5271 UNREACHABLE(); | 5150 UNREACHABLE(); |
5272 break; | 5151 break; |
5273 } | 5152 } |
5274 } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 6) == 0x3) { | 5153 } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 7) == 0x1) { |
5275 int Vd = instr->VFPDRegValue(kSimd128Precision); | |
5276 int Vm = instr->VFPMRegValue(kSimd128Precision); | |
5277 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); | 5154 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); |
5278 // vtrn.<size> Qd, Qm. | 5155 if (instr->Bit(6) == 0) { |
5279 switch (size) { | 5156 int Vd = instr->VFPDRegValue(kDoublePrecision); |
5280 case Neon8: { | 5157 int Vm = instr->VFPMRegValue(kDoublePrecision); |
5281 uint8_t src[16], dst[16]; | 5158 // vtrn.<size> Dd, Dm. |
5282 get_q_register(Vd, dst); | 5159 switch (size) { |
5283 get_q_register(Vm, src); | 5160 case Neon8: |
5284 for (int i = 0; i < 8; i++) { | 5161 Transpose<uint8_t, kDoubleSize>(this, Vd, Vm); |
5285 std::swap(dst[2 * i + 1], src[2 * i]); | 5162 break; |
5286 } | 5163 case Neon16: |
5287 set_q_register(Vd, dst); | 5164 Transpose<uint16_t, kDoubleSize>(this, Vd, Vm); |
5288 set_q_register(Vm, src); | 5165 break; |
5289 break; | 5166 case Neon32: |
| 5167 Transpose<uint32_t, kDoubleSize>(this, Vd, Vm); |
| 5168 break; |
| 5169 default: |
| 5170 UNREACHABLE(); |
| 5171 break; |
5290 } | 5172 } |
5291 case Neon16: { | 5173 } else { |
5292 uint16_t src[8], dst[8]; | 5174 int Vd = instr->VFPDRegValue(kSimd128Precision); |
5293 get_q_register(Vd, dst); | 5175 int Vm = instr->VFPMRegValue(kSimd128Precision); |
5294 get_q_register(Vm, src); | 5176 // vtrn.<size> Qd, Qm. |
5295 for (int i = 0; i < 4; i++) { | 5177 switch (size) { |
5296 std::swap(dst[2 * i + 1], src[2 * i]); | 5178 case Neon8: |
5297 } | 5179 Transpose<uint8_t, kSimd128Size>(this, Vd, Vm); |
5298 set_q_register(Vd, dst); | 5180 break; |
5299 set_q_register(Vm, src); | 5181 case Neon16: |
5300 break; | 5182 Transpose<uint16_t, kSimd128Size>(this, Vd, Vm); |
| 5183 break; |
| 5184 case Neon32: |
| 5185 Transpose<uint32_t, kSimd128Size>(this, Vd, Vm); |
| 5186 break; |
| 5187 default: |
| 5188 UNREACHABLE(); |
| 5189 break; |
5301 } | 5190 } |
5302 case Neon32: { | |
5303 uint32_t src[4], dst[4]; | |
5304 get_q_register(Vd, dst); | |
5305 get_q_register(Vm, src); | |
5306 for (int i = 0; i < 2; i++) { | |
5307 std::swap(dst[2 * i + 1], src[2 * i]); | |
5308 } | |
5309 set_q_register(Vd, dst); | |
5310 set_q_register(Vm, src); | |
5311 break; | |
5312 } | |
5313 default: | |
5314 UNREACHABLE(); | |
5315 break; | |
5316 } | 5191 } |
5317 } else if (instr->Bits(17, 16) == 0x1 && instr->Bit(11) == 0) { | 5192 } else if (instr->Bits(17, 16) == 0x1 && instr->Bit(11) == 0) { |
5318 int Vd = instr->VFPDRegValue(kSimd128Precision); | 5193 int Vd = instr->VFPDRegValue(kSimd128Precision); |
5319 int Vm = instr->VFPMRegValue(kSimd128Precision); | 5194 int Vm = instr->VFPMRegValue(kSimd128Precision); |
5320 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); | 5195 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); |
5321 if (instr->Bits(9, 6) == 0xd) { | 5196 if (instr->Bits(9, 6) == 0xd) { |
5322 // vabs<type>.<size> Qd, Qm | 5197 // vabs<type>.<size> Qd, Qm |
5323 if (instr->Bit(10) != 0) { | 5198 if (instr->Bit(10) != 0) { |
5324 // floating point (clear sign bits) | 5199 // floating point (clear sign bits) |
5325 uint32_t src[4]; | 5200 uint32_t src[4]; |
5326 get_q_register(Vm, src); | 5201 get_neon_register(Vm, src); |
5327 for (int i = 0; i < 4; i++) { | 5202 for (int i = 0; i < 4; i++) { |
5328 src[i] &= ~0x80000000; | 5203 src[i] &= ~0x80000000; |
5329 } | 5204 } |
5330 set_q_register(Vd, src); | 5205 set_neon_register(Vd, src); |
5331 } else { | 5206 } else { |
5332 // signed integer | 5207 // signed integer |
5333 switch (size) { | 5208 switch (size) { |
5334 case Neon8: { | 5209 case Neon8: |
5335 int8_t src[16]; | 5210 Abs<int8_t, kSimd128Size>(this, Vd, Vm); |
5336 get_q_register(Vm, src); | |
5337 for (int i = 0; i < 16; i++) { | |
5338 src[i] = std::abs(src[i]); | |
5339 } | |
5340 set_q_register(Vd, src); | |
5341 break; | 5211 break; |
5342 } | 5212 case Neon16: |
5343 case Neon16: { | 5213 Abs<int16_t, kSimd128Size>(this, Vd, Vm); |
5344 int16_t src[8]; | |
5345 get_q_register(Vm, src); | |
5346 for (int i = 0; i < 8; i++) { | |
5347 src[i] = std::abs(src[i]); | |
5348 } | |
5349 set_q_register(Vd, src); | |
5350 break; | 5214 break; |
5351 } | 5215 case Neon32: |
5352 case Neon32: { | 5216 Abs<int32_t, kSimd128Size>(this, Vd, Vm); |
5353 int32_t src[4]; | |
5354 get_q_register(Vm, src); | |
5355 for (int i = 0; i < 4; i++) { | |
5356 src[i] = std::abs(src[i]); | |
5357 } | |
5358 set_q_register(Vd, src); | |
5359 break; | 5217 break; |
5360 } | |
5361 default: | 5218 default: |
5362 UNIMPLEMENTED(); | 5219 UNIMPLEMENTED(); |
5363 break; | 5220 break; |
5364 } | 5221 } |
5365 } | 5222 } |
5366 } else if (instr->Bits(9, 6) == 0xf) { | 5223 } else if (instr->Bits(9, 6) == 0xf) { |
5367 // vneg<type>.<size> Qd, Qm (signed integer) | 5224 // vneg<type>.<size> Qd, Qm (signed integer) |
5368 if (instr->Bit(10) != 0) { | 5225 if (instr->Bit(10) != 0) { |
5369 // floating point (toggle sign bits) | 5226 // floating point (toggle sign bits) |
5370 uint32_t src[4]; | 5227 uint32_t src[4]; |
5371 get_q_register(Vm, src); | 5228 get_neon_register(Vm, src); |
5372 for (int i = 0; i < 4; i++) { | 5229 for (int i = 0; i < 4; i++) { |
5373 src[i] ^= 0x80000000; | 5230 src[i] ^= 0x80000000; |
5374 } | 5231 } |
5375 set_q_register(Vd, src); | 5232 set_neon_register(Vd, src); |
5376 } else { | 5233 } else { |
5377 // signed integer | 5234 // signed integer |
5378 switch (size) { | 5235 switch (size) { |
5379 case Neon8: { | 5236 case Neon8: |
5380 int8_t src[16]; | 5237 Neg<int8_t, kSimd128Size>(this, Vd, Vm); |
5381 get_q_register(Vm, src); | |
5382 for (int i = 0; i < 16; i++) { | |
5383 src[i] = -src[i]; | |
5384 } | |
5385 set_q_register(Vd, src); | |
5386 break; | 5238 break; |
5387 } | |
5388 case Neon16: | 5239 case Neon16: |
5389 int16_t src[8]; | 5240 Neg<int16_t, kSimd128Size>(this, Vd, Vm); |
5390 get_q_register(Vm, src); | |
5391 for (int i = 0; i < 8; i++) { | |
5392 src[i] = -src[i]; | |
5393 } | |
5394 set_q_register(Vd, src); | |
5395 break; | 5241 break; |
5396 case Neon32: { | 5242 case Neon32: |
5397 int32_t src[4]; | 5243 Neg<int32_t, kSimd128Size>(this, Vd, Vm); |
5398 get_q_register(Vm, src); | |
5399 for (int i = 0; i < 4; i++) { | |
5400 src[i] = -src[i]; | |
5401 } | |
5402 set_q_register(Vd, src); | |
5403 break; | 5244 break; |
5404 } | |
5405 default: | 5245 default: |
5406 UNIMPLEMENTED(); | 5246 UNIMPLEMENTED(); |
5407 break; | 5247 break; |
5408 } | 5248 } |
5409 } | 5249 } |
5410 } else { | 5250 } else { |
5411 UNIMPLEMENTED(); | 5251 UNIMPLEMENTED(); |
5412 } | 5252 } |
5413 } else if (instr->Bits(19, 18) == 0x2 && instr->Bits(11, 8) == 0x5) { | 5253 } else if (instr->Bits(19, 18) == 0x2 && instr->Bits(11, 8) == 0x5) { |
5414 // vrecpe/vrsqrte.f32 Qd, Qm. | 5254 // vrecpe/vrsqrte.f32 Qd, Qm. |
5415 int Vd = instr->VFPDRegValue(kSimd128Precision); | 5255 int Vd = instr->VFPDRegValue(kSimd128Precision); |
5416 int Vm = instr->VFPMRegValue(kSimd128Precision); | 5256 int Vm = instr->VFPMRegValue(kSimd128Precision); |
5417 uint32_t src[4]; | 5257 uint32_t src[4]; |
5418 get_q_register(Vm, src); | 5258 get_neon_register(Vm, src); |
5419 if (instr->Bit(7) == 0) { | 5259 if (instr->Bit(7) == 0) { |
5420 for (int i = 0; i < 4; i++) { | 5260 for (int i = 0; i < 4; i++) { |
5421 float denom = bit_cast<float>(src[i]); | 5261 float denom = bit_cast<float>(src[i]); |
5422 div_zero_vfp_flag_ = (denom == 0); | 5262 div_zero_vfp_flag_ = (denom == 0); |
5423 float result = 1.0f / denom; | 5263 float result = 1.0f / denom; |
5424 result = canonicalizeNaN(result); | 5264 result = canonicalizeNaN(result); |
5425 src[i] = bit_cast<uint32_t>(result); | 5265 src[i] = bit_cast<uint32_t>(result); |
5426 } | 5266 } |
5427 } else { | 5267 } else { |
5428 lazily_initialize_fast_sqrt(isolate_); | 5268 lazily_initialize_fast_sqrt(isolate_); |
5429 for (int i = 0; i < 4; i++) { | 5269 for (int i = 0; i < 4; i++) { |
5430 float radicand = bit_cast<float>(src[i]); | 5270 float radicand = bit_cast<float>(src[i]); |
5431 float result = 1.0f / fast_sqrt(radicand, isolate_); | 5271 float result = 1.0f / fast_sqrt(radicand, isolate_); |
5432 result = canonicalizeNaN(result); | 5272 result = canonicalizeNaN(result); |
5433 src[i] = bit_cast<uint32_t>(result); | 5273 src[i] = bit_cast<uint32_t>(result); |
5434 } | 5274 } |
5435 } | 5275 } |
5436 set_q_register(Vd, src); | 5276 set_neon_register(Vd, src); |
5437 } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 8) == 0x2 && | 5277 } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 8) == 0x2 && |
5438 instr->Bits(7, 6) != 0) { | 5278 instr->Bits(7, 6) != 0) { |
5439 // vqmovn.<type><size> Dd, Qm. | 5279 // vqmovn.<type><size> Dd, Qm. |
5440 int Vd = instr->VFPDRegValue(kDoublePrecision); | 5280 int Vd = instr->VFPDRegValue(kDoublePrecision); |
5441 int Vm = instr->VFPMRegValue(kSimd128Precision); | 5281 int Vm = instr->VFPMRegValue(kSimd128Precision); |
5442 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); | 5282 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); |
5443 bool is_unsigned = instr->Bit(6) != 0; | 5283 bool is_unsigned = instr->Bit(6) != 0; |
5444 switch (size) { | 5284 switch (size) { |
5445 case Neon8: { | 5285 case Neon8: { |
5446 if (is_unsigned) { | 5286 if (is_unsigned) { |
(...skipping 27 matching lines...) Expand all Loading... |
5474 UNIMPLEMENTED(); | 5314 UNIMPLEMENTED(); |
5475 } | 5315 } |
5476 } else if (instr->Bits(11, 7) == 0 && instr->Bit(4) == 1) { | 5316 } else if (instr->Bits(11, 7) == 0 && instr->Bit(4) == 1) { |
5477 // vshr.u<size> Qd, Qm, shift | 5317 // vshr.u<size> Qd, Qm, shift |
5478 int size = base::bits::RoundDownToPowerOfTwo32(instr->Bits(21, 16)); | 5318 int size = base::bits::RoundDownToPowerOfTwo32(instr->Bits(21, 16)); |
5479 int shift = 2 * size - instr->Bits(21, 16); | 5319 int shift = 2 * size - instr->Bits(21, 16); |
5480 int Vd = instr->VFPDRegValue(kSimd128Precision); | 5320 int Vd = instr->VFPDRegValue(kSimd128Precision); |
5481 int Vm = instr->VFPMRegValue(kSimd128Precision); | 5321 int Vm = instr->VFPMRegValue(kSimd128Precision); |
5482 NeonSize ns = static_cast<NeonSize>(size / 16); | 5322 NeonSize ns = static_cast<NeonSize>(size / 16); |
5483 switch (ns) { | 5323 switch (ns) { |
5484 case Neon8: { | 5324 case Neon8: |
5485 uint8_t src[16]; | 5325 ShiftRight<uint8_t, kSimd128Size>(this, Vd, Vm, shift); |
5486 get_q_register(Vm, src); | |
5487 for (int i = 0; i < 16; i++) { | |
5488 src[i] >>= shift; | |
5489 } | |
5490 set_q_register(Vd, src); | |
5491 break; | 5326 break; |
5492 } | 5327 case Neon16: |
5493 case Neon16: { | 5328 ShiftRight<uint16_t, kSimd128Size>(this, Vd, Vm, shift); |
5494 uint16_t src[8]; | |
5495 get_q_register(Vm, src); | |
5496 for (int i = 0; i < 8; i++) { | |
5497 src[i] >>= shift; | |
5498 } | |
5499 set_q_register(Vd, src); | |
5500 break; | 5329 break; |
5501 } | 5330 case Neon32: |
5502 case Neon32: { | 5331 ShiftRight<uint32_t, kSimd128Size>(this, Vd, Vm, shift); |
5503 uint32_t src[4]; | |
5504 get_q_register(Vm, src); | |
5505 for (int i = 0; i < 4; i++) { | |
5506 src[i] >>= shift; | |
5507 } | |
5508 set_q_register(Vd, src); | |
5509 break; | 5332 break; |
5510 } | |
5511 default: | 5333 default: |
5512 UNREACHABLE(); | 5334 UNREACHABLE(); |
5513 break; | 5335 break; |
5514 } | 5336 } |
5515 } else { | 5337 } else { |
5516 UNIMPLEMENTED(); | 5338 UNIMPLEMENTED(); |
5517 } | 5339 } |
5518 break; | 5340 break; |
5519 case 8: | 5341 case 8: |
5520 if (instr->Bits(21, 20) == 0) { | 5342 if (instr->Bits(21, 20) == 0) { |
(...skipping 701 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6222 processor->prev_ = nullptr; | 6044 processor->prev_ = nullptr; |
6223 processor->next_ = nullptr; | 6045 processor->next_ = nullptr; |
6224 } | 6046 } |
6225 | 6047 |
6226 } // namespace internal | 6048 } // namespace internal |
6227 } // namespace v8 | 6049 } // namespace v8 |
6228 | 6050 |
6229 #endif // USE_SIMULATOR | 6051 #endif // USE_SIMULATOR |
6230 | 6052 |
6231 #endif // V8_TARGET_ARCH_ARM | 6053 #endif // V8_TARGET_ARCH_ARM |
OLD | NEW |