OLD | NEW |
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include <stdarg.h> | 5 #include <stdarg.h> |
6 #include <stdlib.h> | 6 #include <stdlib.h> |
7 #include <cmath> | 7 #include <cmath> |
8 | 8 |
9 #if V8_TARGET_ARCH_ARM | 9 #if V8_TARGET_ARCH_ARM |
10 | 10 |
(...skipping 878 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
889 DCHECK((dreg >= 0) && (dreg < DwVfpRegister::NumRegisters())); | 889 DCHECK((dreg >= 0) && (dreg < DwVfpRegister::NumRegisters())); |
890 memcpy(value, vfp_registers_ + dreg * 2, sizeof(*value) * 2); | 890 memcpy(value, vfp_registers_ + dreg * 2, sizeof(*value) * 2); |
891 } | 891 } |
892 | 892 |
893 | 893 |
894 void Simulator::set_d_register(int dreg, const uint32_t* value) { | 894 void Simulator::set_d_register(int dreg, const uint32_t* value) { |
895 DCHECK((dreg >= 0) && (dreg < DwVfpRegister::NumRegisters())); | 895 DCHECK((dreg >= 0) && (dreg < DwVfpRegister::NumRegisters())); |
896 memcpy(vfp_registers_ + dreg * 2, value, sizeof(*value) * 2); | 896 memcpy(vfp_registers_ + dreg * 2, value, sizeof(*value) * 2); |
897 } | 897 } |
898 | 898 |
899 template <typename T, int SIZE> | 899 template <typename T> |
900 void Simulator::get_neon_register(int reg, T (&value)[SIZE / sizeof(T)]) { | 900 void Simulator::get_d_register(int dreg, T* value) { |
901 DCHECK(SIZE == kSimd128Size || SIZE == kDoubleSize); | 901 DCHECK((dreg >= 0) && (dreg < num_d_registers)); |
902 DCHECK_LE(0, reg); | 902 memcpy(value, vfp_registers_ + dreg * 2, kDoubleSize); |
903 DCHECK_GT(SIZE == kSimd128Size ? num_q_registers : num_d_registers, reg); | |
904 memcpy(value, vfp_registers_ + reg * (SIZE / 4), SIZE); | |
905 } | 903 } |
906 | 904 |
907 template <typename T, int SIZE> | 905 template <typename T> |
908 void Simulator::set_neon_register(int reg, const T (&value)[SIZE / sizeof(T)]) { | 906 void Simulator::set_d_register(int dreg, const T* value) { |
909 DCHECK(SIZE == kSimd128Size || SIZE == kDoubleSize); | 907 DCHECK((dreg >= 0) && (dreg < num_d_registers)); |
910 DCHECK_LE(0, reg); | 908 memcpy(vfp_registers_ + dreg * 2, value, kDoubleSize); |
911 DCHECK_GT(SIZE == kSimd128Size ? num_q_registers : num_d_registers, reg); | 909 } |
912 memcpy(vfp_registers_ + reg * (SIZE / 4), value, SIZE); | 910 |
| 911 template <typename T> |
| 912 void Simulator::get_q_register(int qreg, T* value) { |
| 913 DCHECK((qreg >= 0) && (qreg < num_q_registers)); |
| 914 memcpy(value, vfp_registers_ + qreg * 4, kSimd128Size); |
| 915 } |
| 916 |
| 917 template <typename T> |
| 918 void Simulator::set_q_register(int qreg, const T* value) { |
| 919 DCHECK((qreg >= 0) && (qreg < num_q_registers)); |
| 920 memcpy(vfp_registers_ + qreg * 4, value, kSimd128Size); |
913 } | 921 } |
914 | 922 |
915 // Raw access to the PC register. | 923 // Raw access to the PC register. |
916 void Simulator::set_pc(int32_t value) { | 924 void Simulator::set_pc(int32_t value) { |
917 pc_modified_ = true; | 925 pc_modified_ = true; |
918 registers_[pc] = value; | 926 registers_[pc] = value; |
919 } | 927 } |
920 | 928 |
921 | 929 |
922 bool Simulator::has_bad_pc() const { | 930 bool Simulator::has_bad_pc() const { |
(...skipping 2570 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3493 case Neon32: { | 3501 case Neon32: { |
3494 for (int i = 0; i < 4; i++) { | 3502 for (int i = 0; i < 4; i++) { |
3495 q_data[i] = rt_value; | 3503 q_data[i] = rt_value; |
3496 } | 3504 } |
3497 break; | 3505 break; |
3498 } | 3506 } |
3499 default: | 3507 default: |
3500 UNREACHABLE(); | 3508 UNREACHABLE(); |
3501 break; | 3509 break; |
3502 } | 3510 } |
3503 set_neon_register(vd, q_data); | 3511 set_q_register(vd, q_data); |
3504 } | 3512 } |
3505 } else if ((instr->VLValue() == 0x1) && (instr->VCValue() == 0x1)) { | 3513 } else if ((instr->VLValue() == 0x1) && (instr->VCValue() == 0x1)) { |
3506 // vmov (scalar to ARM core register) | 3514 // vmov (scalar to ARM core register) |
3507 int vn = instr->VFPNRegValue(kDoublePrecision); | 3515 int vn = instr->VFPNRegValue(kDoublePrecision); |
3508 int rt = instr->RtValue(); | 3516 int rt = instr->RtValue(); |
3509 int opc1_opc2 = (instr->Bits(22, 21) << 2) | instr->Bits(6, 5); | 3517 int opc1_opc2 = (instr->Bits(22, 21) << 2) | instr->Bits(6, 5); |
3510 uint64_t data; | 3518 uint64_t data; |
3511 get_d_register(vn, &data); | 3519 get_d_register(vn, &data); |
3512 if ((opc1_opc2 & 0xb) == 0) { | 3520 if ((opc1_opc2 & 0xb) == 0) { |
3513 // NeonS32 / NeonU32 | 3521 // NeonS32 / NeonU32 |
(...skipping 466 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3980 break; | 3988 break; |
3981 default: | 3989 default: |
3982 UNIMPLEMENTED(); // Not used by V8. | 3990 UNIMPLEMENTED(); // Not used by V8. |
3983 } | 3991 } |
3984 } else { | 3992 } else { |
3985 UNIMPLEMENTED(); // Not used by V8. | 3993 UNIMPLEMENTED(); // Not used by V8. |
3986 } | 3994 } |
3987 } | 3995 } |
3988 | 3996 |
3989 // Templated operations for NEON instructions. | 3997 // Templated operations for NEON instructions. |
| 3998 // TODO(bbudge) Add more templates for use in DecodeSpecialCondition. |
3990 template <typename T, typename U> | 3999 template <typename T, typename U> |
3991 U Widen(T value) { | 4000 U Widen(T value) { |
3992 static_assert(sizeof(int64_t) > sizeof(T), "T must be int32_t or smaller"); | 4001 static_assert(sizeof(int64_t) > sizeof(T), "T must be int32_t or smaller"); |
3993 static_assert(sizeof(U) > sizeof(T), "T must smaller than U"); | 4002 static_assert(sizeof(U) > sizeof(T), "T must smaller than U"); |
3994 return static_cast<U>(value); | 4003 return static_cast<U>(value); |
3995 } | 4004 } |
3996 | 4005 |
3997 template <typename T, typename U> | 4006 template <typename T, typename U> |
3998 U Narrow(T value) { | 4007 U Narrow(T value) { |
3999 static_assert(sizeof(int8_t) < sizeof(T), "T must be int16_t or larger"); | 4008 static_assert(sizeof(int8_t) < sizeof(T), "T must be int16_t or larger"); |
4000 static_assert(sizeof(U) < sizeof(T), "T must larger than U"); | 4009 static_assert(sizeof(U) < sizeof(T), "T must larger than U"); |
4001 static_assert(std::is_unsigned<T>() == std::is_unsigned<U>(), | 4010 static_assert(std::is_unsigned<T>() == std::is_unsigned<U>(), |
4002 "Signed-ness of T and U must match"); | 4011 "Signed-ness of T and U must match"); |
4003 // Make sure value can be expressed in the smaller type; otherwise, the | 4012 // Make sure value can be expressed in the smaller type; otherwise, the |
4004 // casted result is implementation defined. | 4013 // casted result is implementation defined. |
4005 DCHECK_LE(std::numeric_limits<T>::min(), value); | 4014 DCHECK_LE(std::numeric_limits<T>::min(), value); |
4006 DCHECK_GE(std::numeric_limits<T>::max(), value); | 4015 DCHECK_GE(std::numeric_limits<T>::max(), value); |
4007 return static_cast<U>(value); | 4016 return static_cast<U>(value); |
4008 } | 4017 } |
4009 | 4018 |
4010 template <typename T> | 4019 template <typename T> |
4011 T Clamp(int64_t value) { | 4020 T Clamp(int64_t value) { |
4012 static_assert(sizeof(int64_t) > sizeof(T), "T must be int32_t or smaller"); | 4021 static_assert(sizeof(int64_t) > sizeof(T), "T must be int32_t or smaller"); |
4013 int64_t min = static_cast<int64_t>(std::numeric_limits<T>::min()); | 4022 int64_t min = static_cast<int64_t>(std::numeric_limits<T>::min()); |
4014 int64_t max = static_cast<int64_t>(std::numeric_limits<T>::max()); | 4023 int64_t max = static_cast<int64_t>(std::numeric_limits<T>::max()); |
4015 int64_t clamped = std::max(min, std::min(max, value)); | 4024 int64_t clamped = std::max(min, std::min(max, value)); |
4016 return static_cast<T>(clamped); | 4025 return static_cast<T>(clamped); |
4017 } | 4026 } |
4018 | 4027 |
| 4028 template <typename T> |
| 4029 T MinMax(T a, T b, bool is_min) { |
| 4030 return is_min ? std::min(a, b) : std::max(a, b); |
| 4031 } |
| 4032 |
4019 template <typename T, typename U> | 4033 template <typename T, typename U> |
4020 void Widen(Simulator* simulator, int Vd, int Vm) { | 4034 void Widen(Simulator* simulator, int Vd, int Vm) { |
4021 static const int kLanes = 8 / sizeof(T); | 4035 static const int kLanes = 8 / sizeof(T); |
4022 T src[kLanes]; | 4036 T src[kLanes]; |
4023 U dst[kLanes]; | 4037 U dst[kLanes]; |
4024 simulator->get_neon_register<T, kDoubleSize>(Vm, src); | 4038 simulator->get_d_register(Vm, src); |
4025 for (int i = 0; i < kLanes; i++) { | 4039 for (int i = 0; i < kLanes; i++) { |
4026 dst[i] = Widen<T, U>(src[i]); | 4040 dst[i] = Widen<T, U>(src[i]); |
4027 } | 4041 } |
4028 simulator->set_neon_register(Vd, dst); | 4042 simulator->set_q_register(Vd, dst); |
4029 } | |
4030 | |
4031 template <typename T, int SIZE> | |
4032 void Abs(Simulator* simulator, int Vd, int Vm) { | |
4033 static const int kElems = SIZE / sizeof(T); | |
4034 T src[kElems]; | |
4035 simulator->get_neon_register<T, SIZE>(Vm, src); | |
4036 for (int i = 0; i < kElems; i++) { | |
4037 src[i] = std::abs(src[i]); | |
4038 } | |
4039 simulator->set_neon_register<T, SIZE>(Vd, src); | |
4040 } | |
4041 | |
4042 template <typename T, int SIZE> | |
4043 void Neg(Simulator* simulator, int Vd, int Vm) { | |
4044 static const int kElems = SIZE / sizeof(T); | |
4045 T src[kElems]; | |
4046 simulator->get_neon_register<T, SIZE>(Vm, src); | |
4047 for (int i = 0; i < kElems; i++) { | |
4048 src[i] = -src[i]; | |
4049 } | |
4050 simulator->set_neon_register<T, SIZE>(Vd, src); | |
4051 } | 4043 } |
4052 | 4044 |
4053 template <typename T, typename U> | 4045 template <typename T, typename U> |
4054 void SaturatingNarrow(Simulator* simulator, int Vd, int Vm) { | 4046 void SaturatingNarrow(Simulator* simulator, int Vd, int Vm) { |
4055 static const int kLanes = 16 / sizeof(T); | 4047 static const int kLanes = 16 / sizeof(T); |
4056 T src[kLanes]; | 4048 T src[kLanes]; |
4057 U dst[kLanes]; | 4049 U dst[kLanes]; |
4058 simulator->get_neon_register(Vm, src); | 4050 simulator->get_q_register(Vm, src); |
4059 for (int i = 0; i < kLanes; i++) { | 4051 for (int i = 0; i < kLanes; i++) { |
4060 dst[i] = Narrow<T, U>(Clamp<U>(src[i])); | 4052 dst[i] = Narrow<T, U>(Clamp<U>(src[i])); |
4061 } | 4053 } |
4062 simulator->set_neon_register<U, kDoubleSize>(Vd, dst); | 4054 simulator->set_d_register(Vd, dst); |
4063 } | 4055 } |
4064 | 4056 |
4065 template <typename T> | 4057 template <typename T> |
4066 void AddSaturate(Simulator* simulator, int Vd, int Vm, int Vn) { | 4058 void AddSaturate(Simulator* simulator, int Vd, int Vm, int Vn) { |
4067 static const int kLanes = 16 / sizeof(T); | 4059 static const int kLanes = 16 / sizeof(T); |
4068 T src1[kLanes], src2[kLanes]; | 4060 T src1[kLanes], src2[kLanes]; |
4069 simulator->get_neon_register(Vn, src1); | 4061 simulator->get_q_register(Vn, src1); |
4070 simulator->get_neon_register(Vm, src2); | 4062 simulator->get_q_register(Vm, src2); |
4071 for (int i = 0; i < kLanes; i++) { | 4063 for (int i = 0; i < kLanes; i++) { |
4072 src1[i] = Clamp<T>(Widen<T, int64_t>(src1[i]) + Widen<T, int64_t>(src2[i])); | 4064 src1[i] = Clamp<T>(Widen<T, int64_t>(src1[i]) + Widen<T, int64_t>(src2[i])); |
4073 } | 4065 } |
4074 simulator->set_neon_register(Vd, src1); | 4066 simulator->set_q_register(Vd, src1); |
4075 } | 4067 } |
4076 | 4068 |
4077 template <typename T> | 4069 template <typename T> |
4078 void SubSaturate(Simulator* simulator, int Vd, int Vm, int Vn) { | 4070 void SubSaturate(Simulator* simulator, int Vd, int Vm, int Vn) { |
4079 static const int kLanes = 16 / sizeof(T); | 4071 static const int kLanes = 16 / sizeof(T); |
4080 T src1[kLanes], src2[kLanes]; | 4072 T src1[kLanes], src2[kLanes]; |
4081 simulator->get_neon_register(Vn, src1); | 4073 simulator->get_q_register(Vn, src1); |
4082 simulator->get_neon_register(Vm, src2); | 4074 simulator->get_q_register(Vm, src2); |
4083 for (int i = 0; i < kLanes; i++) { | 4075 for (int i = 0; i < kLanes; i++) { |
4084 src1[i] = Clamp<T>(Widen<T, int64_t>(src1[i]) - Widen<T, int64_t>(src2[i])); | 4076 src1[i] = Clamp<T>(Widen<T, int64_t>(src1[i]) - Widen<T, int64_t>(src2[i])); |
4085 } | 4077 } |
4086 simulator->set_neon_register(Vd, src1); | 4078 simulator->set_q_register(Vd, src1); |
4087 } | |
4088 | |
4089 template <typename T, int SIZE> | |
4090 void Zip(Simulator* simulator, int Vd, int Vm) { | |
4091 static const int kElems = SIZE / sizeof(T); | |
4092 static const int kPairs = kElems / 2; | |
4093 T src1[kElems], src2[kElems], dst1[kElems], dst2[kElems]; | |
4094 simulator->get_neon_register<T, SIZE>(Vd, src1); | |
4095 simulator->get_neon_register<T, SIZE>(Vm, src2); | |
4096 for (int i = 0; i < kPairs; i++) { | |
4097 dst1[i * 2] = src1[i]; | |
4098 dst1[i * 2 + 1] = src2[i]; | |
4099 dst2[i * 2] = src1[i + kPairs]; | |
4100 dst2[i * 2 + 1] = src2[i + kPairs]; | |
4101 } | |
4102 simulator->set_neon_register<T, SIZE>(Vd, dst1); | |
4103 simulator->set_neon_register<T, SIZE>(Vm, dst2); | |
4104 } | |
4105 | |
4106 template <typename T, int SIZE> | |
4107 void Unzip(Simulator* simulator, int Vd, int Vm) { | |
4108 static const int kElems = SIZE / sizeof(T); | |
4109 static const int kPairs = kElems / 2; | |
4110 T src1[kElems], src2[kElems], dst1[kElems], dst2[kElems]; | |
4111 simulator->get_neon_register<T, SIZE>(Vd, src1); | |
4112 simulator->get_neon_register<T, SIZE>(Vm, src2); | |
4113 for (int i = 0; i < kPairs; i++) { | |
4114 dst1[i] = src1[i * 2]; | |
4115 dst1[i + kPairs] = src2[i * 2]; | |
4116 dst2[i] = src1[i * 2 + 1]; | |
4117 dst2[i + kPairs] = src2[i * 2 + 1]; | |
4118 } | |
4119 simulator->set_neon_register<T, SIZE>(Vd, dst1); | |
4120 simulator->set_neon_register<T, SIZE>(Vm, dst2); | |
4121 } | |
4122 | |
4123 template <typename T, int SIZE> | |
4124 void Transpose(Simulator* simulator, int Vd, int Vm) { | |
4125 static const int kElems = SIZE / sizeof(T); | |
4126 static const int kPairs = kElems / 2; | |
4127 T src1[kElems], src2[kElems]; | |
4128 simulator->get_neon_register<T, SIZE>(Vd, src1); | |
4129 simulator->get_neon_register<T, SIZE>(Vm, src2); | |
4130 for (int i = 0; i < kPairs; i++) { | |
4131 std::swap(src1[2 * i + 1], src2[2 * i]); | |
4132 } | |
4133 simulator->set_neon_register<T, SIZE>(Vd, src1); | |
4134 simulator->set_neon_register<T, SIZE>(Vm, src2); | |
4135 } | |
4136 | |
4137 template <typename T, int SIZE> | |
4138 void Test(Simulator* simulator, int Vd, int Vm, int Vn) { | |
4139 static const int kElems = SIZE / sizeof(T); | |
4140 T src1[kElems], src2[kElems]; | |
4141 simulator->get_neon_register<T, SIZE>(Vn, src1); | |
4142 simulator->get_neon_register<T, SIZE>(Vm, src2); | |
4143 for (int i = 0; i < kElems; i++) { | |
4144 src1[i] = (src1[i] & src2[i]) != 0 ? -1 : 0; | |
4145 } | |
4146 simulator->set_neon_register<T, SIZE>(Vd, src1); | |
4147 } | |
4148 | |
4149 template <typename T, int SIZE> | |
4150 void Add(Simulator* simulator, int Vd, int Vm, int Vn) { | |
4151 static const int kElems = SIZE / sizeof(T); | |
4152 T src1[kElems], src2[kElems]; | |
4153 simulator->get_neon_register<T, SIZE>(Vn, src1); | |
4154 simulator->get_neon_register<T, SIZE>(Vm, src2); | |
4155 for (int i = 0; i < kElems; i++) { | |
4156 src1[i] += src2[i]; | |
4157 } | |
4158 simulator->set_neon_register<T, SIZE>(Vd, src1); | |
4159 } | |
4160 | |
4161 template <typename T, int SIZE> | |
4162 void Sub(Simulator* simulator, int Vd, int Vm, int Vn) { | |
4163 static const int kElems = SIZE / sizeof(T); | |
4164 T src1[kElems], src2[kElems]; | |
4165 simulator->get_neon_register<T, SIZE>(Vn, src1); | |
4166 simulator->get_neon_register<T, SIZE>(Vm, src2); | |
4167 for (int i = 0; i < kElems; i++) { | |
4168 src1[i] -= src2[i]; | |
4169 } | |
4170 simulator->set_neon_register<T, SIZE>(Vd, src1); | |
4171 } | |
4172 | |
4173 template <typename T, int SIZE> | |
4174 void Mul(Simulator* simulator, int Vd, int Vm, int Vn) { | |
4175 static const int kElems = SIZE / sizeof(T); | |
4176 T src1[kElems], src2[kElems]; | |
4177 simulator->get_neon_register<T, SIZE>(Vn, src1); | |
4178 simulator->get_neon_register<T, SIZE>(Vm, src2); | |
4179 for (int i = 0; i < kElems; i++) { | |
4180 src1[i] *= src2[i]; | |
4181 } | |
4182 simulator->set_neon_register<T, SIZE>(Vd, src1); | |
4183 } | |
4184 | |
4185 template <typename T, int SIZE> | |
4186 void ShiftLeft(Simulator* simulator, int Vd, int Vm, int shift) { | |
4187 static const int kElems = SIZE / sizeof(T); | |
4188 T src[kElems]; | |
4189 simulator->get_neon_register<T, SIZE>(Vm, src); | |
4190 for (int i = 0; i < kElems; i++) { | |
4191 src[i] <<= shift; | |
4192 } | |
4193 simulator->set_neon_register<T, SIZE>(Vd, src); | |
4194 } | |
4195 | |
4196 template <typename T, int SIZE> | |
4197 void ShiftRight(Simulator* simulator, int Vd, int Vm, int shift) { | |
4198 static const int kElems = SIZE / sizeof(T); | |
4199 T src[kElems]; | |
4200 simulator->get_neon_register<T, SIZE>(Vm, src); | |
4201 for (int i = 0; i < kElems; i++) { | |
4202 src[i] >>= shift; | |
4203 } | |
4204 simulator->set_neon_register<T, SIZE>(Vd, src); | |
4205 } | |
4206 | |
4207 template <typename T, int SIZE> | |
4208 void ArithmeticShiftRight(Simulator* simulator, int Vd, int Vm, int shift) { | |
4209 static const int kElems = SIZE / sizeof(T); | |
4210 T src[kElems]; | |
4211 simulator->get_neon_register<T, SIZE>(Vm, src); | |
4212 for (int i = 0; i < kElems; i++) { | |
4213 src[i] = ArithmeticShiftRight(src[i], shift); | |
4214 } | |
4215 simulator->set_neon_register<T, SIZE>(Vd, src); | |
4216 } | |
4217 | |
4218 template <typename T, int SIZE> | |
4219 void CompareEqual(Simulator* simulator, int Vd, int Vm, int Vn) { | |
4220 static const int kElems = SIZE / sizeof(T); | |
4221 T src1[kElems], src2[kElems]; | |
4222 simulator->get_neon_register<T, SIZE>(Vn, src1); | |
4223 simulator->get_neon_register<T, SIZE>(Vm, src2); | |
4224 for (int i = 0; i < kElems; i++) { | |
4225 src1[i] = src1[i] == src2[i] ? -1 : 0; | |
4226 } | |
4227 simulator->set_neon_register<T, SIZE>(Vd, src1); | |
4228 } | |
4229 | |
4230 template <typename T, int SIZE> | |
4231 void CompareGreater(Simulator* simulator, int Vd, int Vm, int Vn, bool ge) { | |
4232 static const int kElems = SIZE / sizeof(T); | |
4233 T src1[kElems], src2[kElems]; | |
4234 simulator->get_neon_register<T, SIZE>(Vn, src1); | |
4235 simulator->get_neon_register<T, SIZE>(Vm, src2); | |
4236 for (int i = 0; i < kElems; i++) { | |
4237 if (ge) | |
4238 src1[i] = src1[i] >= src2[i] ? -1 : 0; | |
4239 else | |
4240 src1[i] = src1[i] > src2[i] ? -1 : 0; | |
4241 } | |
4242 simulator->set_neon_register<T, SIZE>(Vd, src1); | |
4243 } | |
4244 | |
4245 template <typename T> | |
4246 T MinMax(T a, T b, bool is_min) { | |
4247 return is_min ? std::min(a, b) : std::max(a, b); | |
4248 } | |
4249 | |
4250 template <typename T, int SIZE> | |
4251 void MinMax(Simulator* simulator, int Vd, int Vm, int Vn, bool min) { | |
4252 static const int kElems = SIZE / sizeof(T); | |
4253 T src1[kElems], src2[kElems]; | |
4254 simulator->get_neon_register<T, SIZE>(Vn, src1); | |
4255 simulator->get_neon_register<T, SIZE>(Vm, src2); | |
4256 for (int i = 0; i < kElems; i++) { | |
4257 src1[i] = MinMax(src1[i], src2[i], min); | |
4258 } | |
4259 simulator->set_neon_register<T, SIZE>(Vd, src1); | |
4260 } | |
4261 | |
4262 template <typename T> | |
4263 void PairwiseMinMax(Simulator* simulator, int Vd, int Vm, int Vn, bool min) { | |
4264 static const int kElems = kDoubleSize / sizeof(T); | |
4265 static const int kPairs = kElems / 2; | |
4266 T dst[kElems], src1[kElems], src2[kElems]; | |
4267 simulator->get_neon_register<T, kDoubleSize>(Vn, src1); | |
4268 simulator->get_neon_register<T, kDoubleSize>(Vm, src2); | |
4269 for (int i = 0; i < kPairs; i++) { | |
4270 dst[i] = MinMax(src1[i * 2], src1[i * 2 + 1], min); | |
4271 dst[i + kPairs] = MinMax(src2[i * 2], src2[i * 2 + 1], min); | |
4272 } | |
4273 simulator->set_neon_register<T, kDoubleSize>(Vd, dst); | |
4274 } | 4079 } |
4275 | 4080 |
4276 void Simulator::DecodeSpecialCondition(Instruction* instr) { | 4081 void Simulator::DecodeSpecialCondition(Instruction* instr) { |
4277 switch (instr->SpecialValue()) { | 4082 switch (instr->SpecialValue()) { |
4278 case 4: { | 4083 case 4: { |
4279 int Vd, Vm, Vn; | 4084 int Vd, Vm, Vn; |
4280 if (instr->Bit(6) == 0) { | 4085 if (instr->Bit(6) == 0) { |
4281 Vd = instr->VFPDRegValue(kDoublePrecision); | 4086 Vd = instr->VFPDRegValue(kDoublePrecision); |
4282 Vm = instr->VFPMRegValue(kDoublePrecision); | 4087 Vm = instr->VFPMRegValue(kDoublePrecision); |
4283 Vn = instr->VFPNRegValue(kDoublePrecision); | 4088 Vn = instr->VFPNRegValue(kDoublePrecision); |
(...skipping 25 matching lines...) Expand all Loading... |
4309 UNIMPLEMENTED(); | 4114 UNIMPLEMENTED(); |
4310 } | 4115 } |
4311 break; | 4116 break; |
4312 } | 4117 } |
4313 case 0x1: { | 4118 case 0x1: { |
4314 if (instr->Bits(21, 20) == 2 && instr->Bit(6) == 1 && | 4119 if (instr->Bits(21, 20) == 2 && instr->Bit(6) == 1 && |
4315 instr->Bit(4) == 1) { | 4120 instr->Bit(4) == 1) { |
4316 // vmov Qd, Qm. | 4121 // vmov Qd, Qm. |
4317 // vorr, Qd, Qm, Qn. | 4122 // vorr, Qd, Qm, Qn. |
4318 uint32_t src1[4]; | 4123 uint32_t src1[4]; |
4319 get_neon_register(Vm, src1); | 4124 get_q_register(Vm, src1); |
4320 if (Vm != Vn) { | 4125 if (Vm != Vn) { |
4321 uint32_t src2[4]; | 4126 uint32_t src2[4]; |
4322 get_neon_register(Vn, src2); | 4127 get_q_register(Vn, src2); |
4323 for (int i = 0; i < 4; i++) { | 4128 for (int i = 0; i < 4; i++) { |
4324 src1[i] = src1[i] | src2[i]; | 4129 src1[i] = src1[i] | src2[i]; |
4325 } | 4130 } |
4326 } | 4131 } |
4327 set_neon_register(Vd, src1); | 4132 set_q_register(Vd, src1); |
4328 } else if (instr->Bits(21, 20) == 0 && instr->Bit(6) == 1 && | 4133 } else if (instr->Bits(21, 20) == 0 && instr->Bit(6) == 1 && |
4329 instr->Bit(4) == 1) { | 4134 instr->Bit(4) == 1) { |
4330 // vand Qd, Qm, Qn. | 4135 // vand Qd, Qm, Qn. |
4331 uint32_t src1[4], src2[4]; | 4136 uint32_t src1[4], src2[4]; |
4332 get_neon_register(Vn, src1); | 4137 get_q_register(Vn, src1); |
4333 get_neon_register(Vm, src2); | 4138 get_q_register(Vm, src2); |
4334 for (int i = 0; i < 4; i++) { | 4139 for (int i = 0; i < 4; i++) { |
4335 src1[i] = src1[i] & src2[i]; | 4140 src1[i] = src1[i] & src2[i]; |
4336 } | 4141 } |
4337 set_neon_register(Vd, src1); | 4142 set_q_register(Vd, src1); |
4338 } else { | 4143 } else { |
4339 UNIMPLEMENTED(); | 4144 UNIMPLEMENTED(); |
4340 } | 4145 } |
4341 break; | 4146 break; |
4342 } | 4147 } |
4343 case 0x2: { | 4148 case 0x2: { |
4344 if (instr->Bit(4) == 1) { | 4149 if (instr->Bit(4) == 1) { |
4345 // vqsub.s<size> Qd, Qm, Qn. | 4150 // vqsub.s<size> Qd, Qm, Qn. |
4346 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); | 4151 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
4347 switch (size) { | 4152 switch (size) { |
(...skipping 13 matching lines...) Expand all Loading... |
4361 } else { | 4166 } else { |
4362 UNIMPLEMENTED(); | 4167 UNIMPLEMENTED(); |
4363 } | 4168 } |
4364 break; | 4169 break; |
4365 } | 4170 } |
4366 case 0x3: { | 4171 case 0x3: { |
4367 // vcge/vcgt.s<size> Qd, Qm, Qn. | 4172 // vcge/vcgt.s<size> Qd, Qm, Qn. |
4368 bool ge = instr->Bit(4) == 1; | 4173 bool ge = instr->Bit(4) == 1; |
4369 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); | 4174 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
4370 switch (size) { | 4175 switch (size) { |
4371 case Neon8: | 4176 case Neon8: { |
4372 CompareGreater<int8_t, kSimd128Size>(this, Vd, Vm, Vn, ge); | 4177 int8_t src1[16], src2[16]; |
| 4178 get_q_register(Vn, src1); |
| 4179 get_q_register(Vm, src2); |
| 4180 for (int i = 0; i < 16; i++) { |
| 4181 if (ge) |
| 4182 src1[i] = src1[i] >= src2[i] ? 0xFF : 0; |
| 4183 else |
| 4184 src1[i] = src1[i] > src2[i] ? 0xFF : 0; |
| 4185 } |
| 4186 set_q_register(Vd, src1); |
4373 break; | 4187 break; |
4374 case Neon16: | 4188 } |
4375 CompareGreater<int16_t, kSimd128Size>(this, Vd, Vm, Vn, ge); | 4189 case Neon16: { |
| 4190 int16_t src1[8], src2[8]; |
| 4191 get_q_register(Vn, src1); |
| 4192 get_q_register(Vm, src2); |
| 4193 for (int i = 0; i < 8; i++) { |
| 4194 if (ge) |
| 4195 src1[i] = src1[i] >= src2[i] ? 0xFFFF : 0; |
| 4196 else |
| 4197 src1[i] = src1[i] > src2[i] ? 0xFFFF : 0; |
| 4198 } |
| 4199 set_q_register(Vd, src1); |
4376 break; | 4200 break; |
4377 case Neon32: | 4201 } |
4378 CompareGreater<int32_t, kSimd128Size>(this, Vd, Vm, Vn, ge); | 4202 case Neon32: { |
| 4203 int32_t src1[4], src2[4]; |
| 4204 get_q_register(Vn, src1); |
| 4205 get_q_register(Vm, src2); |
| 4206 for (int i = 0; i < 4; i++) { |
| 4207 if (ge) |
| 4208 src1[i] = src1[i] >= src2[i] ? 0xFFFFFFFF : 0; |
| 4209 else |
| 4210 src1[i] = src1[i] > src2[i] ? 0xFFFFFFFF : 0; |
| 4211 } |
| 4212 set_q_register(Vd, src1); |
4379 break; | 4213 break; |
| 4214 } |
4380 default: | 4215 default: |
4381 UNREACHABLE(); | 4216 UNREACHABLE(); |
4382 break; | 4217 break; |
4383 } | 4218 } |
4384 break; | 4219 break; |
4385 } | 4220 } |
4386 case 0x6: { | 4221 case 0x6: { |
4387 // vmin/vmax.s<size> Qd, Qm, Qn. | 4222 // vmin/vmax.s<size> Qd, Qm, Qn. |
4388 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); | 4223 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
4389 bool min = instr->Bit(4) != 0; | 4224 bool min = instr->Bit(4) != 0; |
4390 switch (size) { | 4225 switch (size) { |
4391 case Neon8: | 4226 case Neon8: { |
4392 MinMax<int8_t, kSimd128Size>(this, Vd, Vm, Vn, min); | 4227 int8_t src1[16], src2[16]; |
| 4228 get_q_register(Vn, src1); |
| 4229 get_q_register(Vm, src2); |
| 4230 for (int i = 0; i < 16; i++) { |
| 4231 src1[i] = MinMax(src1[i], src2[i], min); |
| 4232 } |
| 4233 set_q_register(Vd, src1); |
4393 break; | 4234 break; |
4394 case Neon16: | 4235 } |
4395 MinMax<int16_t, kSimd128Size>(this, Vd, Vm, Vn, min); | 4236 case Neon16: { |
| 4237 int16_t src1[8], src2[8]; |
| 4238 get_q_register(Vn, src1); |
| 4239 get_q_register(Vm, src2); |
| 4240 for (int i = 0; i < 8; i++) { |
| 4241 src1[i] = MinMax(src1[i], src2[i], min); |
| 4242 } |
| 4243 set_q_register(Vd, src1); |
4396 break; | 4244 break; |
4397 case Neon32: | 4245 } |
4398 MinMax<int32_t, kSimd128Size>(this, Vd, Vm, Vn, min); | 4246 case Neon32: { |
| 4247 int32_t src1[4], src2[4]; |
| 4248 get_q_register(Vn, src1); |
| 4249 get_q_register(Vm, src2); |
| 4250 for (int i = 0; i < 4; i++) { |
| 4251 src1[i] = MinMax(src1[i], src2[i], min); |
| 4252 } |
| 4253 set_q_register(Vd, src1); |
4399 break; | 4254 break; |
| 4255 } |
4400 default: | 4256 default: |
4401 UNREACHABLE(); | 4257 UNREACHABLE(); |
4402 break; | 4258 break; |
4403 } | 4259 } |
4404 break; | 4260 break; |
4405 } | 4261 } |
4406 case 0x8: { | 4262 case 0x8: { |
4407 // vadd/vtst | 4263 // vadd/vtst |
4408 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); | 4264 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
4409 if (instr->Bit(4) == 0) { | 4265 if (instr->Bit(4) == 0) { |
4410 // vadd.i<size> Qd, Qm, Qn. | 4266 // vadd.i<size> Qd, Qm, Qn. |
4411 switch (size) { | 4267 switch (size) { |
4412 case Neon8: | 4268 case Neon8: { |
4413 Add<uint8_t, kSimd128Size>(this, Vd, Vm, Vn); | 4269 uint8_t src1[16], src2[16]; |
| 4270 get_q_register(Vn, src1); |
| 4271 get_q_register(Vm, src2); |
| 4272 for (int i = 0; i < 16; i++) { |
| 4273 src1[i] += src2[i]; |
| 4274 } |
| 4275 set_q_register(Vd, src1); |
4414 break; | 4276 break; |
4415 case Neon16: | 4277 } |
4416 Add<uint16_t, kSimd128Size>(this, Vd, Vm, Vn); | 4278 case Neon16: { |
| 4279 uint16_t src1[8], src2[8]; |
| 4280 get_q_register(Vn, src1); |
| 4281 get_q_register(Vm, src2); |
| 4282 for (int i = 0; i < 8; i++) { |
| 4283 src1[i] += src2[i]; |
| 4284 } |
| 4285 set_q_register(Vd, src1); |
4417 break; | 4286 break; |
4418 case Neon32: | 4287 } |
4419 Add<uint32_t, kSimd128Size>(this, Vd, Vm, Vn); | 4288 case Neon32: { |
| 4289 uint32_t src1[4], src2[4]; |
| 4290 get_q_register(Vn, src1); |
| 4291 get_q_register(Vm, src2); |
| 4292 for (int i = 0; i < 4; i++) { |
| 4293 src1[i] += src2[i]; |
| 4294 } |
| 4295 set_q_register(Vd, src1); |
4420 break; | 4296 break; |
| 4297 } |
4421 default: | 4298 default: |
4422 UNREACHABLE(); | 4299 UNREACHABLE(); |
4423 break; | 4300 break; |
4424 } | 4301 } |
4425 } else { | 4302 } else { |
4426 // vtst.i<size> Qd, Qm, Qn. | 4303 // vtst.i<size> Qd, Qm, Qn. |
4427 switch (size) { | 4304 switch (size) { |
4428 case Neon8: | 4305 case Neon8: { |
4429 Test<uint8_t, kSimd128Size>(this, Vd, Vm, Vn); | 4306 uint8_t src1[16], src2[16]; |
| 4307 get_q_register(Vn, src1); |
| 4308 get_q_register(Vm, src2); |
| 4309 for (int i = 0; i < 16; i++) { |
| 4310 src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFu : 0; |
| 4311 } |
| 4312 set_q_register(Vd, src1); |
4430 break; | 4313 break; |
4431 case Neon16: | 4314 } |
4432 Test<uint16_t, kSimd128Size>(this, Vd, Vm, Vn); | 4315 case Neon16: { |
| 4316 uint16_t src1[8], src2[8]; |
| 4317 get_q_register(Vn, src1); |
| 4318 get_q_register(Vm, src2); |
| 4319 for (int i = 0; i < 8; i++) { |
| 4320 src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFFFu : 0; |
| 4321 } |
| 4322 set_q_register(Vd, src1); |
4433 break; | 4323 break; |
4434 case Neon32: | 4324 } |
4435 Test<uint32_t, kSimd128Size>(this, Vd, Vm, Vn); | 4325 case Neon32: { |
| 4326 uint32_t src1[4], src2[4]; |
| 4327 get_q_register(Vn, src1); |
| 4328 get_q_register(Vm, src2); |
| 4329 for (int i = 0; i < 4; i++) { |
| 4330 src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFFFFFFFu : 0; |
| 4331 } |
| 4332 set_q_register(Vd, src1); |
4436 break; | 4333 break; |
| 4334 } |
4437 default: | 4335 default: |
4438 UNREACHABLE(); | 4336 UNREACHABLE(); |
4439 break; | 4337 break; |
4440 } | 4338 } |
4441 } | 4339 } |
4442 break; | 4340 break; |
4443 } | 4341 } |
4444 case 0x9: { | 4342 case 0x9: { |
4445 if (instr->Bit(6) == 1 && instr->Bit(4) == 1) { | 4343 if (instr->Bit(6) == 1 && instr->Bit(4) == 1) { |
4446 // vmul.i<size> Qd, Qm, Qn. | 4344 // vmul.i<size> Qd, Qm, Qn. |
4447 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); | 4345 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
4448 switch (size) { | 4346 switch (size) { |
4449 case Neon8: | 4347 case Neon8: { |
4450 Mul<uint8_t, kSimd128Size>(this, Vd, Vm, Vn); | 4348 uint8_t src1[16], src2[16]; |
| 4349 get_q_register(Vn, src1); |
| 4350 get_q_register(Vm, src2); |
| 4351 for (int i = 0; i < 16; i++) { |
| 4352 src1[i] *= src2[i]; |
| 4353 } |
| 4354 set_q_register(Vd, src1); |
4451 break; | 4355 break; |
4452 case Neon16: | 4356 } |
4453 Mul<uint16_t, kSimd128Size>(this, Vd, Vm, Vn); | 4357 case Neon16: { |
| 4358 uint16_t src1[8], src2[8]; |
| 4359 get_q_register(Vn, src1); |
| 4360 get_q_register(Vm, src2); |
| 4361 for (int i = 0; i < 8; i++) { |
| 4362 src1[i] *= src2[i]; |
| 4363 } |
| 4364 set_q_register(Vd, src1); |
4454 break; | 4365 break; |
4455 case Neon32: | 4366 } |
4456 Mul<uint32_t, kSimd128Size>(this, Vd, Vm, Vn); | 4367 case Neon32: { |
| 4368 uint32_t src1[4], src2[4]; |
| 4369 get_q_register(Vn, src1); |
| 4370 get_q_register(Vm, src2); |
| 4371 for (int i = 0; i < 4; i++) { |
| 4372 src1[i] *= src2[i]; |
| 4373 } |
| 4374 set_q_register(Vd, src1); |
4457 break; | 4375 break; |
| 4376 } |
4458 default: | 4377 default: |
4459 UNREACHABLE(); | 4378 UNREACHABLE(); |
4460 break; | 4379 break; |
4461 } | 4380 } |
4462 } else { | 4381 } else { |
4463 UNIMPLEMENTED(); | 4382 UNIMPLEMENTED(); |
4464 } | 4383 } |
4465 break; | 4384 break; |
4466 } | 4385 } |
4467 case 0xa: { | 4386 case 0xa: { |
4468 // vpmin/vpmax.s<size> Dd, Dm, Dn. | 4387 // vpmin/vpmax.s<size> Dd, Dm, Dn. |
4469 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); | 4388 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
4470 bool min = instr->Bit(4) != 0; | 4389 bool min = instr->Bit(4) != 0; |
4471 switch (size) { | 4390 switch (size) { |
4472 case Neon8: | 4391 case Neon8: { |
4473 PairwiseMinMax<int8_t>(this, Vd, Vm, Vn, min); | 4392 int8_t dst[8], src1[8], src2[8]; |
| 4393 get_d_register(Vn, src1); |
| 4394 get_d_register(Vm, src2); |
| 4395 for (int i = 0; i < 4; i++) { |
| 4396 dst[i + 0] = MinMax(src1[i * 2], src1[i * 2 + 1], min); |
| 4397 dst[i + 4] = MinMax(src2[i * 2], src2[i * 2 + 1], min); |
| 4398 } |
| 4399 set_d_register(Vd, dst); |
4474 break; | 4400 break; |
4475 case Neon16: | 4401 } |
4476 PairwiseMinMax<int16_t>(this, Vd, Vm, Vn, min); | 4402 case Neon16: { |
| 4403 int16_t dst[4], src1[4], src2[4]; |
| 4404 get_d_register(Vn, src1); |
| 4405 get_d_register(Vm, src2); |
| 4406 for (int i = 0; i < 2; i++) { |
| 4407 dst[i + 0] = MinMax(src1[i * 2], src1[i * 2 + 1], min); |
| 4408 dst[i + 2] = MinMax(src2[i * 2], src2[i * 2 + 1], min); |
| 4409 } |
| 4410 set_d_register(Vd, dst); |
4477 break; | 4411 break; |
4478 case Neon32: | 4412 } |
4479 PairwiseMinMax<int32_t>(this, Vd, Vm, Vn, min); | 4413 case Neon32: { |
| 4414 int32_t dst[2], src1[2], src2[2]; |
| 4415 get_d_register(Vn, src1); |
| 4416 get_d_register(Vm, src2); |
| 4417 dst[0] = MinMax(src1[0], src1[1], min); |
| 4418 dst[1] = MinMax(src2[0], src2[1], min); |
| 4419 set_d_register(Vd, dst); |
4480 break; | 4420 break; |
| 4421 } |
4481 default: | 4422 default: |
4482 UNREACHABLE(); | 4423 UNREACHABLE(); |
4483 break; | 4424 break; |
4484 } | 4425 } |
4485 break; | 4426 break; |
4486 } | 4427 } |
4487 case 0xd: { | 4428 case 0xd: { |
4488 if (instr->Bit(4) == 0) { | 4429 if (instr->Bit(4) == 0) { |
4489 float src1[4], src2[4]; | 4430 float src1[4], src2[4]; |
4490 get_neon_register(Vn, src1); | 4431 get_q_register(Vn, src1); |
4491 get_neon_register(Vm, src2); | 4432 get_q_register(Vm, src2); |
4492 for (int i = 0; i < 4; i++) { | 4433 for (int i = 0; i < 4; i++) { |
4493 if (instr->Bit(21) == 0) { | 4434 if (instr->Bit(21) == 0) { |
4494 // vadd.f32 Qd, Qm, Qn. | 4435 // vadd.f32 Qd, Qm, Qn. |
4495 src1[i] = src1[i] + src2[i]; | 4436 src1[i] = src1[i] + src2[i]; |
4496 } else { | 4437 } else { |
4497 // vsub.f32 Qd, Qm, Qn. | 4438 // vsub.f32 Qd, Qm, Qn. |
4498 src1[i] = src1[i] - src2[i]; | 4439 src1[i] = src1[i] - src2[i]; |
4499 } | 4440 } |
4500 } | 4441 } |
4501 set_neon_register(Vd, src1); | 4442 set_q_register(Vd, src1); |
4502 } else { | 4443 } else { |
4503 UNIMPLEMENTED(); | 4444 UNIMPLEMENTED(); |
4504 } | 4445 } |
4505 break; | 4446 break; |
4506 } | 4447 } |
4507 case 0xe: { | 4448 case 0xe: { |
4508 if (instr->Bits(21, 20) == 0 && instr->Bit(4) == 0) { | 4449 if (instr->Bits(21, 20) == 0 && instr->Bit(4) == 0) { |
4509 // vceq.f32. | 4450 // vceq.f32. |
4510 float src1[4], src2[4]; | 4451 float src1[4], src2[4]; |
4511 get_neon_register(Vn, src1); | 4452 get_q_register(Vn, src1); |
4512 get_neon_register(Vm, src2); | 4453 get_q_register(Vm, src2); |
4513 uint32_t dst[4]; | 4454 uint32_t dst[4]; |
4514 for (int i = 0; i < 4; i++) { | 4455 for (int i = 0; i < 4; i++) { |
4515 dst[i] = (src1[i] == src2[i]) ? 0xFFFFFFFF : 0; | 4456 dst[i] = (src1[i] == src2[i]) ? 0xFFFFFFFF : 0; |
4516 } | 4457 } |
4517 set_neon_register(Vd, dst); | 4458 set_q_register(Vd, dst); |
4518 } else { | 4459 } else { |
4519 UNIMPLEMENTED(); | 4460 UNIMPLEMENTED(); |
4520 } | 4461 } |
4521 break; | 4462 break; |
4522 } | 4463 } |
4523 case 0xf: { | 4464 case 0xf: { |
4524 if (instr->Bit(20) == 0 && instr->Bit(6) == 1) { | 4465 if (instr->Bit(20) == 0 && instr->Bit(6) == 1) { |
4525 float src1[4], src2[4]; | 4466 float src1[4], src2[4]; |
4526 get_neon_register(Vn, src1); | 4467 get_q_register(Vn, src1); |
4527 get_neon_register(Vm, src2); | 4468 get_q_register(Vm, src2); |
4528 if (instr->Bit(4) == 1) { | 4469 if (instr->Bit(4) == 1) { |
4529 if (instr->Bit(21) == 0) { | 4470 if (instr->Bit(21) == 0) { |
4530 // vrecps.f32 Qd, Qm, Qn. | 4471 // vrecps.f32 Qd, Qm, Qn. |
4531 for (int i = 0; i < 4; i++) { | 4472 for (int i = 0; i < 4; i++) { |
4532 src1[i] = 2.0f - src1[i] * src2[i]; | 4473 src1[i] = 2.0f - src1[i] * src2[i]; |
4533 } | 4474 } |
4534 } else { | 4475 } else { |
4535 // vrsqrts.f32 Qd, Qm, Qn. | 4476 // vrsqrts.f32 Qd, Qm, Qn. |
4536 for (int i = 0; i < 4; i++) { | 4477 for (int i = 0; i < 4; i++) { |
4537 src1[i] = (3.0f - src1[i] * src2[i]) * 0.5f; | 4478 src1[i] = (3.0f - src1[i] * src2[i]) * 0.5f; |
4538 } | 4479 } |
4539 } | 4480 } |
4540 } else { | 4481 } else { |
4541 // vmin/vmax.f32 Qd, Qm, Qn. | 4482 // vmin/vmax.f32 Qd, Qm, Qn. |
4542 bool min = instr->Bit(21) == 1; | 4483 bool min = instr->Bit(21) == 1; |
4543 for (int i = 0; i < 4; i++) { | 4484 for (int i = 0; i < 4; i++) { |
4544 src1[i] = MinMax(src1[i], src2[i], min); | 4485 src1[i] = MinMax(src1[i], src2[i], min); |
4545 } | 4486 } |
4546 } | 4487 } |
4547 set_neon_register(Vd, src1); | 4488 set_q_register(Vd, src1); |
4548 } else { | 4489 } else { |
4549 UNIMPLEMENTED(); | 4490 UNIMPLEMENTED(); |
4550 } | 4491 } |
4551 break; | 4492 break; |
4552 } | 4493 } |
4553 default: | 4494 default: |
4554 UNIMPLEMENTED(); | 4495 UNIMPLEMENTED(); |
4555 break; | 4496 break; |
4556 } | 4497 } |
4557 break; | 4498 break; |
(...skipping 20 matching lines...) Expand all Loading... |
4578 UNIMPLEMENTED(); | 4519 UNIMPLEMENTED(); |
4579 break; | 4520 break; |
4580 } | 4521 } |
4581 } else if (instr->Bits(21, 20) == 3 && instr->Bit(4) == 0) { | 4522 } else if (instr->Bits(21, 20) == 3 && instr->Bit(4) == 0) { |
4582 // vext. | 4523 // vext. |
4583 int imm4 = instr->Bits(11, 8); | 4524 int imm4 = instr->Bits(11, 8); |
4584 int Vd = instr->VFPDRegValue(kSimd128Precision); | 4525 int Vd = instr->VFPDRegValue(kSimd128Precision); |
4585 int Vm = instr->VFPMRegValue(kSimd128Precision); | 4526 int Vm = instr->VFPMRegValue(kSimd128Precision); |
4586 int Vn = instr->VFPNRegValue(kSimd128Precision); | 4527 int Vn = instr->VFPNRegValue(kSimd128Precision); |
4587 uint8_t src1[16], src2[16], dst[16]; | 4528 uint8_t src1[16], src2[16], dst[16]; |
4588 get_neon_register(Vn, src1); | 4529 get_q_register(Vn, src1); |
4589 get_neon_register(Vm, src2); | 4530 get_q_register(Vm, src2); |
4590 int boundary = kSimd128Size - imm4; | 4531 int boundary = kSimd128Size - imm4; |
4591 int i = 0; | 4532 int i = 0; |
4592 for (; i < boundary; i++) { | 4533 for (; i < boundary; i++) { |
4593 dst[i] = src1[i + imm4]; | 4534 dst[i] = src1[i + imm4]; |
4594 } | 4535 } |
4595 for (; i < 16; i++) { | 4536 for (; i < 16; i++) { |
4596 dst[i] = src2[i - boundary]; | 4537 dst[i] = src2[i - boundary]; |
4597 } | 4538 } |
4598 set_neon_register(Vd, dst); | 4539 set_q_register(Vd, dst); |
4599 } else if (instr->Bits(11, 7) == 0xA && instr->Bit(4) == 1) { | 4540 } else if (instr->Bits(11, 7) == 0xA && instr->Bit(4) == 1) { |
4600 // vshl.i<size> Qd, Qm, shift | 4541 // vshl.i<size> Qd, Qm, shift |
4601 int size = base::bits::RoundDownToPowerOfTwo32(instr->Bits(21, 16)); | 4542 int size = base::bits::RoundDownToPowerOfTwo32(instr->Bits(21, 16)); |
4602 int shift = instr->Bits(21, 16) - size; | 4543 int shift = instr->Bits(21, 16) - size; |
4603 int Vd = instr->VFPDRegValue(kSimd128Precision); | 4544 int Vd = instr->VFPDRegValue(kSimd128Precision); |
4604 int Vm = instr->VFPMRegValue(kSimd128Precision); | 4545 int Vm = instr->VFPMRegValue(kSimd128Precision); |
4605 NeonSize ns = static_cast<NeonSize>(size / 16); | 4546 NeonSize ns = static_cast<NeonSize>(size / 16); |
4606 switch (ns) { | 4547 switch (ns) { |
4607 case Neon8: | 4548 case Neon8: { |
4608 ShiftLeft<uint8_t, kSimd128Size>(this, Vd, Vm, shift); | 4549 uint8_t src[16]; |
| 4550 get_q_register(Vm, src); |
| 4551 for (int i = 0; i < 16; i++) { |
| 4552 src[i] <<= shift; |
| 4553 } |
| 4554 set_q_register(Vd, src); |
4609 break; | 4555 break; |
4610 case Neon16: | 4556 } |
4611 ShiftLeft<uint16_t, kSimd128Size>(this, Vd, Vm, shift); | 4557 case Neon16: { |
| 4558 uint16_t src[8]; |
| 4559 get_q_register(Vm, src); |
| 4560 for (int i = 0; i < 8; i++) { |
| 4561 src[i] <<= shift; |
| 4562 } |
| 4563 set_q_register(Vd, src); |
4612 break; | 4564 break; |
4613 case Neon32: | 4565 } |
4614 ShiftLeft<uint32_t, kSimd128Size>(this, Vd, Vm, shift); | 4566 case Neon32: { |
| 4567 uint32_t src[4]; |
| 4568 get_q_register(Vm, src); |
| 4569 for (int i = 0; i < 4; i++) { |
| 4570 src[i] <<= shift; |
| 4571 } |
| 4572 set_q_register(Vd, src); |
4615 break; | 4573 break; |
| 4574 } |
4616 default: | 4575 default: |
4617 UNREACHABLE(); | 4576 UNREACHABLE(); |
4618 break; | 4577 break; |
4619 } | 4578 } |
4620 } else if (instr->Bits(11, 7) == 0 && instr->Bit(4) == 1) { | 4579 } else if (instr->Bits(11, 7) == 0 && instr->Bit(4) == 1) { |
4621 // vshr.s<size> Qd, Qm, shift | 4580 // vshr.s<size> Qd, Qm, shift |
4622 int size = base::bits::RoundDownToPowerOfTwo32(instr->Bits(21, 16)); | 4581 int size = base::bits::RoundDownToPowerOfTwo32(instr->Bits(21, 16)); |
4623 int shift = 2 * size - instr->Bits(21, 16); | 4582 int shift = 2 * size - instr->Bits(21, 16); |
4624 int Vd = instr->VFPDRegValue(kSimd128Precision); | 4583 int Vd = instr->VFPDRegValue(kSimd128Precision); |
4625 int Vm = instr->VFPMRegValue(kSimd128Precision); | 4584 int Vm = instr->VFPMRegValue(kSimd128Precision); |
4626 NeonSize ns = static_cast<NeonSize>(size / 16); | 4585 NeonSize ns = static_cast<NeonSize>(size / 16); |
4627 switch (ns) { | 4586 switch (ns) { |
4628 case Neon8: | 4587 case Neon8: { |
4629 ArithmeticShiftRight<int8_t, kSimd128Size>(this, Vd, Vm, shift); | 4588 int8_t src[16]; |
| 4589 get_q_register(Vm, src); |
| 4590 for (int i = 0; i < 16; i++) { |
| 4591 src[i] = ArithmeticShiftRight(src[i], shift); |
| 4592 } |
| 4593 set_q_register(Vd, src); |
4630 break; | 4594 break; |
4631 case Neon16: | 4595 } |
4632 ArithmeticShiftRight<int16_t, kSimd128Size>(this, Vd, Vm, shift); | 4596 case Neon16: { |
| 4597 int16_t src[8]; |
| 4598 get_q_register(Vm, src); |
| 4599 for (int i = 0; i < 8; i++) { |
| 4600 src[i] = ArithmeticShiftRight(src[i], shift); |
| 4601 } |
| 4602 set_q_register(Vd, src); |
4633 break; | 4603 break; |
4634 case Neon32: | 4604 } |
4635 ArithmeticShiftRight<int32_t, kSimd128Size>(this, Vd, Vm, shift); | 4605 case Neon32: { |
| 4606 int32_t src[4]; |
| 4607 get_q_register(Vm, src); |
| 4608 for (int i = 0; i < 4; i++) { |
| 4609 src[i] = ArithmeticShiftRight(src[i], shift); |
| 4610 } |
| 4611 set_q_register(Vd, src); |
4636 break; | 4612 break; |
| 4613 } |
4637 default: | 4614 default: |
4638 UNREACHABLE(); | 4615 UNREACHABLE(); |
4639 break; | 4616 break; |
4640 } | 4617 } |
4641 } else { | 4618 } else { |
4642 UNIMPLEMENTED(); | 4619 UNIMPLEMENTED(); |
4643 } | 4620 } |
4644 break; | 4621 break; |
4645 case 6: { | 4622 case 6: { |
4646 int Vd, Vm, Vn; | 4623 int Vd, Vm, Vn; |
(...skipping 27 matching lines...) Expand all Loading... |
4674 } | 4651 } |
4675 } else { | 4652 } else { |
4676 UNIMPLEMENTED(); | 4653 UNIMPLEMENTED(); |
4677 } | 4654 } |
4678 break; | 4655 break; |
4679 } | 4656 } |
4680 case 0x1: { | 4657 case 0x1: { |
4681 if (instr->Bits(21, 20) == 1 && instr->Bit(4) == 1) { | 4658 if (instr->Bits(21, 20) == 1 && instr->Bit(4) == 1) { |
4682 // vbsl.size Qd, Qm, Qn. | 4659 // vbsl.size Qd, Qm, Qn. |
4683 uint32_t dst[4], src1[4], src2[4]; | 4660 uint32_t dst[4], src1[4], src2[4]; |
4684 get_neon_register(Vd, dst); | 4661 get_q_register(Vd, dst); |
4685 get_neon_register(Vn, src1); | 4662 get_q_register(Vn, src1); |
4686 get_neon_register(Vm, src2); | 4663 get_q_register(Vm, src2); |
4687 for (int i = 0; i < 4; i++) { | 4664 for (int i = 0; i < 4; i++) { |
4688 dst[i] = (dst[i] & src1[i]) | (~dst[i] & src2[i]); | 4665 dst[i] = (dst[i] & src1[i]) | (~dst[i] & src2[i]); |
4689 } | 4666 } |
4690 set_neon_register(Vd, dst); | 4667 set_q_register(Vd, dst); |
4691 } else if (instr->Bits(21, 20) == 0 && instr->Bit(4) == 1) { | 4668 } else if (instr->Bits(21, 20) == 0 && instr->Bit(4) == 1) { |
4692 if (instr->Bit(6) == 0) { | 4669 if (instr->Bit(6) == 0) { |
4693 // veor Dd, Dn, Dm | 4670 // veor Dd, Dn, Dm |
4694 uint64_t src1, src2; | 4671 uint64_t src1, src2; |
4695 get_d_register(Vn, &src1); | 4672 get_d_register(Vn, &src1); |
4696 get_d_register(Vm, &src2); | 4673 get_d_register(Vm, &src2); |
4697 src1 ^= src2; | 4674 src1 ^= src2; |
4698 set_d_register(Vd, &src1); | 4675 set_d_register(Vd, &src1); |
4699 | 4676 |
4700 } else { | 4677 } else { |
4701 // veor Qd, Qn, Qm | 4678 // veor Qd, Qn, Qm |
4702 uint32_t src1[4], src2[4]; | 4679 uint32_t src1[4], src2[4]; |
4703 get_neon_register(Vn, src1); | 4680 get_q_register(Vn, src1); |
4704 get_neon_register(Vm, src2); | 4681 get_q_register(Vm, src2); |
4705 for (int i = 0; i < 4; i++) src1[i] ^= src2[i]; | 4682 for (int i = 0; i < 4; i++) src1[i] ^= src2[i]; |
4706 set_neon_register(Vd, src1); | 4683 set_q_register(Vd, src1); |
4707 } | 4684 } |
4708 } else { | 4685 } else { |
4709 UNIMPLEMENTED(); | 4686 UNIMPLEMENTED(); |
4710 } | 4687 } |
4711 break; | 4688 break; |
4712 } | 4689 } |
4713 case 0x2: { | 4690 case 0x2: { |
4714 if (instr->Bit(4) == 1) { | 4691 if (instr->Bit(4) == 1) { |
4715 // vqsub.u<size> Qd, Qm, Qn. | 4692 // vqsub.u<size> Qd, Qm, Qn. |
4716 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); | 4693 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
(...skipping 14 matching lines...) Expand all Loading... |
4731 } else { | 4708 } else { |
4732 UNIMPLEMENTED(); | 4709 UNIMPLEMENTED(); |
4733 } | 4710 } |
4734 break; | 4711 break; |
4735 } | 4712 } |
4736 case 0x3: { | 4713 case 0x3: { |
4737 // vcge/vcgt.u<size> Qd, Qm, Qn. | 4714 // vcge/vcgt.u<size> Qd, Qm, Qn. |
4738 bool ge = instr->Bit(4) == 1; | 4715 bool ge = instr->Bit(4) == 1; |
4739 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); | 4716 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
4740 switch (size) { | 4717 switch (size) { |
4741 case Neon8: | 4718 case Neon8: { |
4742 CompareGreater<uint8_t, kSimd128Size>(this, Vd, Vm, Vn, ge); | 4719 uint8_t src1[16], src2[16]; |
| 4720 get_q_register(Vn, src1); |
| 4721 get_q_register(Vm, src2); |
| 4722 for (int i = 0; i < 16; i++) { |
| 4723 if (ge) |
| 4724 src1[i] = src1[i] >= src2[i] ? 0xFFu : 0; |
| 4725 else |
| 4726 src1[i] = src1[i] > src2[i] ? 0xFFu : 0; |
| 4727 } |
| 4728 set_q_register(Vd, src1); |
4743 break; | 4729 break; |
4744 case Neon16: | 4730 } |
4745 CompareGreater<uint16_t, kSimd128Size>(this, Vd, Vm, Vn, ge); | 4731 case Neon16: { |
| 4732 uint16_t src1[8], src2[8]; |
| 4733 get_q_register(Vn, src1); |
| 4734 get_q_register(Vm, src2); |
| 4735 for (int i = 0; i < 8; i++) { |
| 4736 if (ge) |
| 4737 src1[i] = src1[i] >= src2[i] ? 0xFFFFu : 0; |
| 4738 else |
| 4739 src1[i] = src1[i] > src2[i] ? 0xFFFFu : 0; |
| 4740 } |
| 4741 set_q_register(Vd, src1); |
4746 break; | 4742 break; |
4747 case Neon32: | 4743 } |
4748 CompareGreater<uint32_t, kSimd128Size>(this, Vd, Vm, Vn, ge); | 4744 case Neon32: { |
| 4745 uint32_t src1[4], src2[4]; |
| 4746 get_q_register(Vn, src1); |
| 4747 get_q_register(Vm, src2); |
| 4748 for (int i = 0; i < 4; i++) { |
| 4749 if (ge) |
| 4750 src1[i] = src1[i] >= src2[i] ? 0xFFFFFFFFu : 0; |
| 4751 else |
| 4752 src1[i] = src1[i] > src2[i] ? 0xFFFFFFFFu : 0; |
| 4753 } |
| 4754 set_q_register(Vd, src1); |
4749 break; | 4755 break; |
| 4756 } |
4750 default: | 4757 default: |
4751 UNREACHABLE(); | 4758 UNREACHABLE(); |
4752 break; | 4759 break; |
4753 } | 4760 } |
4754 break; | 4761 break; |
4755 } | 4762 } |
4756 case 0x6: { | 4763 case 0x6: { |
4757 // vmin/vmax.u<size> Qd, Qm, Qn. | 4764 // vmin/vmax.u<size> Qd, Qm, Qn. |
4758 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); | 4765 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
4759 bool min = instr->Bit(4) != 0; | 4766 bool min = instr->Bit(4) != 0; |
4760 switch (size) { | 4767 switch (size) { |
4761 case Neon8: | 4768 case Neon8: { |
4762 MinMax<uint8_t, kSimd128Size>(this, Vd, Vm, Vn, min); | 4769 uint8_t src1[16], src2[16]; |
| 4770 get_q_register(Vn, src1); |
| 4771 get_q_register(Vm, src2); |
| 4772 for (int i = 0; i < 16; i++) { |
| 4773 src1[i] = MinMax(src1[i], src2[i], min); |
| 4774 } |
| 4775 set_q_register(Vd, src1); |
4763 break; | 4776 break; |
4764 case Neon16: | 4777 } |
4765 MinMax<uint16_t, kSimd128Size>(this, Vd, Vm, Vn, min); | 4778 case Neon16: { |
| 4779 uint16_t src1[8], src2[8]; |
| 4780 get_q_register(Vn, src1); |
| 4781 get_q_register(Vm, src2); |
| 4782 for (int i = 0; i < 8; i++) { |
| 4783 src1[i] = MinMax(src1[i], src2[i], min); |
| 4784 } |
| 4785 set_q_register(Vd, src1); |
4766 break; | 4786 break; |
4767 case Neon32: | 4787 } |
4768 MinMax<uint32_t, kSimd128Size>(this, Vd, Vm, Vn, min); | 4788 case Neon32: { |
| 4789 uint32_t src1[4], src2[4]; |
| 4790 get_q_register(Vn, src1); |
| 4791 get_q_register(Vm, src2); |
| 4792 for (int i = 0; i < 4; i++) { |
| 4793 src1[i] = MinMax(src1[i], src2[i], min); |
| 4794 } |
| 4795 set_q_register(Vd, src1); |
4769 break; | 4796 break; |
| 4797 } |
4770 default: | 4798 default: |
4771 UNREACHABLE(); | 4799 UNREACHABLE(); |
4772 break; | 4800 break; |
4773 } | 4801 } |
4774 break; | 4802 break; |
4775 } | 4803 } |
4776 case 0x8: { | 4804 case 0x8: { |
4777 if (instr->Bit(4) == 0) { | 4805 if (instr->Bit(4) == 0) { |
4778 // vsub.size Qd, Qm, Qn. | 4806 // vsub.size Qd, Qm, Qn. |
4779 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); | 4807 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
4780 switch (size) { | 4808 switch (size) { |
4781 case Neon8: | 4809 case Neon8: { |
4782 Sub<uint8_t, kSimd128Size>(this, Vd, Vm, Vn); | 4810 uint8_t src1[16], src2[16]; |
| 4811 get_q_register(Vn, src1); |
| 4812 get_q_register(Vm, src2); |
| 4813 for (int i = 0; i < 16; i++) { |
| 4814 src1[i] -= src2[i]; |
| 4815 } |
| 4816 set_q_register(Vd, src1); |
4783 break; | 4817 break; |
4784 case Neon16: | 4818 } |
4785 Sub<uint16_t, kSimd128Size>(this, Vd, Vm, Vn); | 4819 case Neon16: { |
| 4820 uint16_t src1[8], src2[8]; |
| 4821 get_q_register(Vn, src1); |
| 4822 get_q_register(Vm, src2); |
| 4823 for (int i = 0; i < 8; i++) { |
| 4824 src1[i] -= src2[i]; |
| 4825 } |
| 4826 set_q_register(Vd, src1); |
4786 break; | 4827 break; |
4787 case Neon32: | 4828 } |
4788 Sub<uint32_t, kSimd128Size>(this, Vd, Vm, Vn); | 4829 case Neon32: { |
| 4830 uint32_t src1[4], src2[4]; |
| 4831 get_q_register(Vn, src1); |
| 4832 get_q_register(Vm, src2); |
| 4833 for (int i = 0; i < 4; i++) { |
| 4834 src1[i] -= src2[i]; |
| 4835 } |
| 4836 set_q_register(Vd, src1); |
4789 break; | 4837 break; |
| 4838 } |
4790 default: | 4839 default: |
4791 UNREACHABLE(); | 4840 UNREACHABLE(); |
4792 break; | 4841 break; |
4793 } | 4842 } |
4794 } else { | 4843 } else { |
4795 // vceq.size Qd, Qm, Qn. | 4844 // vceq.size Qd, Qm, Qn. |
4796 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); | 4845 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
4797 switch (size) { | 4846 switch (size) { |
4798 case Neon8: | 4847 case Neon8: { |
4799 CompareEqual<uint8_t, kSimd128Size>(this, Vd, Vm, Vn); | 4848 uint8_t src1[16], src2[16]; |
| 4849 get_q_register(Vn, src1); |
| 4850 get_q_register(Vm, src2); |
| 4851 for (int i = 0; i < 16; i++) { |
| 4852 src1[i] = (src1[i] == src2[i]) ? 0xFFu : 0; |
| 4853 } |
| 4854 set_q_register(Vd, src1); |
4800 break; | 4855 break; |
4801 case Neon16: | 4856 } |
4802 CompareEqual<uint16_t, kSimd128Size>(this, Vd, Vm, Vn); | 4857 case Neon16: { |
| 4858 uint16_t src1[8], src2[8]; |
| 4859 get_q_register(Vn, src1); |
| 4860 get_q_register(Vm, src2); |
| 4861 for (int i = 0; i < 8; i++) { |
| 4862 src1[i] = (src1[i] == src2[i]) ? 0xFFFFu : 0; |
| 4863 } |
| 4864 set_q_register(Vd, src1); |
4803 break; | 4865 break; |
4804 case Neon32: | 4866 } |
4805 CompareEqual<uint32_t, kSimd128Size>(this, Vd, Vm, Vn); | 4867 case Neon32: { |
| 4868 uint32_t src1[4], src2[4]; |
| 4869 get_q_register(Vn, src1); |
| 4870 get_q_register(Vm, src2); |
| 4871 for (int i = 0; i < 4; i++) { |
| 4872 src1[i] = (src1[i] == src2[i]) ? 0xFFFFFFFFu : 0; |
| 4873 } |
| 4874 set_q_register(Vd, src1); |
4806 break; | 4875 break; |
| 4876 } |
4807 default: | 4877 default: |
4808 UNREACHABLE(); | 4878 UNREACHABLE(); |
4809 break; | 4879 break; |
4810 } | 4880 } |
4811 } | 4881 } |
4812 break; | 4882 break; |
4813 } | 4883 } |
4814 case 0xa: { | 4884 case 0xa: { |
4815 // vpmin/vpmax.u<size> Dd, Dm, Dn. | 4885 // vpmin/vpmax.u<size> Dd, Dm, Dn. |
4816 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); | 4886 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
4817 bool min = instr->Bit(4) != 0; | 4887 bool min = instr->Bit(4) != 0; |
4818 switch (size) { | 4888 switch (size) { |
4819 case Neon8: | 4889 case Neon8: { |
4820 PairwiseMinMax<uint8_t>(this, Vd, Vm, Vn, min); | 4890 uint8_t dst[8], src1[8], src2[8]; |
| 4891 get_d_register(Vn, src1); |
| 4892 get_d_register(Vm, src2); |
| 4893 for (int i = 0; i < 4; i++) { |
| 4894 dst[i + 0] = MinMax(src1[i * 2], src1[i * 2 + 1], min); |
| 4895 dst[i + 4] = MinMax(src2[i * 2], src2[i * 2 + 1], min); |
| 4896 } |
| 4897 set_d_register(Vd, dst); |
4821 break; | 4898 break; |
4822 case Neon16: | 4899 } |
4823 PairwiseMinMax<uint16_t>(this, Vd, Vm, Vn, min); | 4900 case Neon16: { |
| 4901 uint16_t dst[4], src1[4], src2[4]; |
| 4902 get_d_register(Vn, src1); |
| 4903 get_d_register(Vm, src2); |
| 4904 for (int i = 0; i < 2; i++) { |
| 4905 dst[i + 0] = MinMax(src1[i * 2], src1[i * 2 + 1], min); |
| 4906 dst[i + 2] = MinMax(src2[i * 2], src2[i * 2 + 1], min); |
| 4907 } |
| 4908 set_d_register(Vd, dst); |
4824 break; | 4909 break; |
4825 case Neon32: | 4910 } |
4826 PairwiseMinMax<uint32_t>(this, Vd, Vm, Vn, min); | 4911 case Neon32: { |
| 4912 uint32_t dst[2], src1[2], src2[2]; |
| 4913 get_d_register(Vn, src1); |
| 4914 get_d_register(Vm, src2); |
| 4915 dst[0] = MinMax(src1[0], src1[1], min); |
| 4916 dst[1] = MinMax(src2[0], src2[1], min); |
| 4917 set_d_register(Vd, dst); |
4827 break; | 4918 break; |
| 4919 } |
4828 default: | 4920 default: |
4829 UNREACHABLE(); | 4921 UNREACHABLE(); |
4830 break; | 4922 break; |
4831 } | 4923 } |
4832 break; | 4924 break; |
4833 } | 4925 } |
4834 case 0xd: { | 4926 case 0xd: { |
4835 if (instr->Bit(21) == 0 && instr->Bit(6) == 1 && instr->Bit(4) == 1) { | 4927 if (instr->Bit(21) == 0 && instr->Bit(6) == 1 && instr->Bit(4) == 1) { |
4836 // vmul.f32 Qd, Qn, Qm | 4928 // vmul.f32 Qd, Qn, Qm |
4837 float src1[4], src2[4]; | 4929 float src1[4], src2[4]; |
4838 get_neon_register(Vn, src1); | 4930 get_q_register(Vn, src1); |
4839 get_neon_register(Vm, src2); | 4931 get_q_register(Vm, src2); |
4840 for (int i = 0; i < 4; i++) { | 4932 for (int i = 0; i < 4; i++) { |
4841 src1[i] = src1[i] * src2[i]; | 4933 src1[i] = src1[i] * src2[i]; |
4842 } | 4934 } |
4843 set_neon_register(Vd, src1); | 4935 set_q_register(Vd, src1); |
4844 } else { | 4936 } else { |
4845 UNIMPLEMENTED(); | 4937 UNIMPLEMENTED(); |
4846 } | 4938 } |
4847 break; | 4939 break; |
4848 } | 4940 } |
4849 case 0xe: { | 4941 case 0xe: { |
4850 if (instr->Bit(20) == 0 && instr->Bit(4) == 0) { | 4942 if (instr->Bit(20) == 0 && instr->Bit(4) == 0) { |
4851 // vcge/vcgt.f32 Qd, Qm, Qn | 4943 // vcge/vcgt.f32 Qd, Qm, Qn |
4852 bool ge = instr->Bit(21) == 0; | 4944 bool ge = instr->Bit(21) == 0; |
4853 float src1[4], src2[4]; | 4945 float src1[4], src2[4]; |
4854 get_neon_register(Vn, src1); | 4946 get_q_register(Vn, src1); |
4855 get_neon_register(Vm, src2); | 4947 get_q_register(Vm, src2); |
4856 uint32_t dst[4]; | 4948 uint32_t dst[4]; |
4857 for (int i = 0; i < 4; i++) { | 4949 for (int i = 0; i < 4; i++) { |
4858 if (ge) { | 4950 if (ge) { |
4859 dst[i] = src1[i] >= src2[i] ? 0xFFFFFFFFu : 0; | 4951 dst[i] = src1[i] >= src2[i] ? 0xFFFFFFFFu : 0; |
4860 } else { | 4952 } else { |
4861 dst[i] = src1[i] > src2[i] ? 0xFFFFFFFFu : 0; | 4953 dst[i] = src1[i] > src2[i] ? 0xFFFFFFFFu : 0; |
4862 } | 4954 } |
4863 } | 4955 } |
4864 set_neon_register(Vd, dst); | 4956 set_q_register(Vd, dst); |
4865 } else { | 4957 } else { |
4866 UNIMPLEMENTED(); | 4958 UNIMPLEMENTED(); |
4867 } | 4959 } |
4868 break; | 4960 break; |
4869 } | 4961 } |
4870 default: | 4962 default: |
4871 UNREACHABLE(); | 4963 UNREACHABLE(); |
4872 break; | 4964 break; |
4873 } | 4965 } |
4874 break; | 4966 break; |
(...skipping 20 matching lines...) Expand all Loading... |
4895 UNIMPLEMENTED(); | 4987 UNIMPLEMENTED(); |
4896 break; | 4988 break; |
4897 } | 4989 } |
4898 } else if (instr->Opc1Value() == 7 && instr->Bit(4) == 0) { | 4990 } else if (instr->Opc1Value() == 7 && instr->Bit(4) == 0) { |
4899 if (instr->Bits(19, 16) == 0xB && instr->Bits(11, 9) == 0x3 && | 4991 if (instr->Bits(19, 16) == 0xB && instr->Bits(11, 9) == 0x3 && |
4900 instr->Bit(6) == 1) { | 4992 instr->Bit(6) == 1) { |
4901 // vcvt.<Td>.<Tm> Qd, Qm. | 4993 // vcvt.<Td>.<Tm> Qd, Qm. |
4902 int Vd = instr->VFPDRegValue(kSimd128Precision); | 4994 int Vd = instr->VFPDRegValue(kSimd128Precision); |
4903 int Vm = instr->VFPMRegValue(kSimd128Precision); | 4995 int Vm = instr->VFPMRegValue(kSimd128Precision); |
4904 uint32_t q_data[4]; | 4996 uint32_t q_data[4]; |
4905 get_neon_register(Vm, q_data); | 4997 get_q_register(Vm, q_data); |
4906 int op = instr->Bits(8, 7); | 4998 int op = instr->Bits(8, 7); |
4907 for (int i = 0; i < 4; i++) { | 4999 for (int i = 0; i < 4; i++) { |
4908 switch (op) { | 5000 switch (op) { |
4909 case 0: | 5001 case 0: |
4910 // f32 <- s32, round towards nearest. | 5002 // f32 <- s32, round towards nearest. |
4911 q_data[i] = bit_cast<uint32_t>(std::round( | 5003 q_data[i] = bit_cast<uint32_t>(std::round( |
4912 static_cast<float>(bit_cast<int32_t>(q_data[i])))); | 5004 static_cast<float>(bit_cast<int32_t>(q_data[i])))); |
4913 break; | 5005 break; |
4914 case 1: | 5006 case 1: |
4915 // f32 <- u32, round towards nearest. | 5007 // f32 <- u32, round towards nearest. |
4916 q_data[i] = bit_cast<uint32_t>( | 5008 q_data[i] = bit_cast<uint32_t>( |
4917 std::round(static_cast<float>(q_data[i]))); | 5009 std::round(static_cast<float>(q_data[i]))); |
4918 break; | 5010 break; |
4919 case 2: | 5011 case 2: |
4920 // s32 <- f32, round to zero. | 5012 // s32 <- f32, round to zero. |
4921 q_data[i] = static_cast<uint32_t>( | 5013 q_data[i] = static_cast<uint32_t>( |
4922 ConvertDoubleToInt(bit_cast<float>(q_data[i]), false, RZ)); | 5014 ConvertDoubleToInt(bit_cast<float>(q_data[i]), false, RZ)); |
4923 break; | 5015 break; |
4924 case 3: | 5016 case 3: |
4925 // u32 <- f32, round to zero. | 5017 // u32 <- f32, round to zero. |
4926 q_data[i] = static_cast<uint32_t>( | 5018 q_data[i] = static_cast<uint32_t>( |
4927 ConvertDoubleToInt(bit_cast<float>(q_data[i]), true, RZ)); | 5019 ConvertDoubleToInt(bit_cast<float>(q_data[i]), true, RZ)); |
4928 break; | 5020 break; |
4929 } | 5021 } |
4930 } | 5022 } |
4931 set_neon_register(Vd, q_data); | 5023 set_q_register(Vd, q_data); |
4932 } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 7) == 0) { | 5024 } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 7) == 0) { |
4933 if (instr->Bit(6) == 0) { | 5025 if (instr->Bit(6) == 0) { |
4934 // vswp Dd, Dm. | 5026 // vswp Dd, Dm. |
4935 uint64_t dval, mval; | 5027 uint64_t dval, mval; |
4936 int vd = instr->VFPDRegValue(kDoublePrecision); | 5028 int vd = instr->VFPDRegValue(kDoublePrecision); |
4937 int vm = instr->VFPMRegValue(kDoublePrecision); | 5029 int vm = instr->VFPMRegValue(kDoublePrecision); |
4938 get_d_register(vd, &dval); | 5030 get_d_register(vd, &dval); |
4939 get_d_register(vm, &mval); | 5031 get_d_register(vm, &mval); |
4940 set_d_register(vm, &dval); | 5032 set_d_register(vm, &dval); |
4941 set_d_register(vd, &mval); | 5033 set_d_register(vd, &mval); |
4942 } else { | 5034 } else { |
4943 // vswp Qd, Qm. | 5035 // vswp Qd, Qm. |
4944 uint32_t dval[4], mval[4]; | 5036 uint32_t dval[4], mval[4]; |
4945 int vd = instr->VFPDRegValue(kSimd128Precision); | 5037 int vd = instr->VFPDRegValue(kSimd128Precision); |
4946 int vm = instr->VFPMRegValue(kSimd128Precision); | 5038 int vm = instr->VFPMRegValue(kSimd128Precision); |
4947 get_neon_register(vd, dval); | 5039 get_q_register(vd, dval); |
4948 get_neon_register(vm, mval); | 5040 get_q_register(vm, mval); |
4949 set_neon_register(vm, dval); | 5041 set_q_register(vm, dval); |
4950 set_neon_register(vd, mval); | 5042 set_q_register(vd, mval); |
4951 } | 5043 } |
4952 } else if (instr->Bits(11, 7) == 0x18) { | 5044 } else if (instr->Bits(11, 7) == 0x18) { |
4953 // vdup.32 Qd, Sm. | 5045 // vdup.32 Qd, Sm. |
4954 int vd = instr->VFPDRegValue(kSimd128Precision); | 5046 int vd = instr->VFPDRegValue(kSimd128Precision); |
4955 int vm = instr->VFPMRegValue(kDoublePrecision); | 5047 int vm = instr->VFPMRegValue(kDoublePrecision); |
4956 int index = instr->Bit(19); | 5048 int index = instr->Bit(19); |
4957 uint32_t s_data = get_s_register(vm * 2 + index); | 5049 uint32_t s_data = get_s_register(vm * 2 + index); |
4958 uint32_t q_data[4]; | 5050 uint32_t q_data[4]; |
4959 for (int i = 0; i < 4; i++) q_data[i] = s_data; | 5051 for (int i = 0; i < 4; i++) q_data[i] = s_data; |
4960 set_neon_register(vd, q_data); | 5052 set_q_register(vd, q_data); |
4961 } else if (instr->Bits(19, 16) == 0 && instr->Bits(11, 6) == 0x17) { | 5053 } else if (instr->Bits(19, 16) == 0 && instr->Bits(11, 6) == 0x17) { |
4962 // vmvn Qd, Qm. | 5054 // vmvn Qd, Qm. |
4963 int vd = instr->VFPDRegValue(kSimd128Precision); | 5055 int vd = instr->VFPDRegValue(kSimd128Precision); |
4964 int vm = instr->VFPMRegValue(kSimd128Precision); | 5056 int vm = instr->VFPMRegValue(kSimd128Precision); |
4965 uint32_t q_data[4]; | 5057 uint32_t q_data[4]; |
4966 get_neon_register(vm, q_data); | 5058 get_q_register(vm, q_data); |
4967 for (int i = 0; i < 4; i++) q_data[i] = ~q_data[i]; | 5059 for (int i = 0; i < 4; i++) q_data[i] = ~q_data[i]; |
4968 set_neon_register(vd, q_data); | 5060 set_q_register(vd, q_data); |
4969 } else if (instr->Bits(11, 10) == 0x2) { | 5061 } else if (instr->Bits(11, 10) == 0x2) { |
4970 // vtb[l,x] Dd, <list>, Dm. | 5062 // vtb[l,x] Dd, <list>, Dm. |
4971 int vd = instr->VFPDRegValue(kDoublePrecision); | 5063 int vd = instr->VFPDRegValue(kDoublePrecision); |
4972 int vn = instr->VFPNRegValue(kDoublePrecision); | 5064 int vn = instr->VFPNRegValue(kDoublePrecision); |
4973 int vm = instr->VFPMRegValue(kDoublePrecision); | 5065 int vm = instr->VFPMRegValue(kDoublePrecision); |
4974 int table_len = (instr->Bits(9, 8) + 1) * kDoubleSize; | 5066 int table_len = (instr->Bits(9, 8) + 1) * kDoubleSize; |
4975 bool vtbx = instr->Bit(6) != 0; // vtbl / vtbx | 5067 bool vtbx = instr->Bit(6) != 0; // vtbl / vtbx |
4976 uint64_t destination = 0, indices = 0, result = 0; | 5068 uint64_t destination = 0, indices = 0, result = 0; |
4977 get_d_register(vd, &destination); | 5069 get_d_register(vd, &destination); |
4978 get_d_register(vm, &indices); | 5070 get_d_register(vm, &indices); |
4979 for (int i = 0; i < kDoubleSize; i++) { | 5071 for (int i = 0; i < kDoubleSize; i++) { |
4980 int shift = i * kBitsPerByte; | 5072 int shift = i * kBitsPerByte; |
4981 int index = (indices >> shift) & 0xFF; | 5073 int index = (indices >> shift) & 0xFF; |
4982 if (index < table_len) { | 5074 if (index < table_len) { |
4983 uint64_t table; | 5075 uint64_t table; |
4984 get_d_register(vn + index / kDoubleSize, &table); | 5076 get_d_register(vn + index / kDoubleSize, &table); |
4985 result |= | 5077 result |= |
4986 ((table >> ((index % kDoubleSize) * kBitsPerByte)) & 0xFF) | 5078 ((table >> ((index % kDoubleSize) * kBitsPerByte)) & 0xFF) |
4987 << shift; | 5079 << shift; |
4988 } else if (vtbx) { | 5080 } else if (vtbx) { |
4989 result |= destination & (0xFFull << shift); | 5081 result |= destination & (0xFFull << shift); |
4990 } | 5082 } |
4991 } | 5083 } |
4992 set_d_register(vd, &result); | 5084 set_d_register(vd, &result); |
4993 } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 8) == 0x1) { | 5085 } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 8) == 0x1 && |
| 5086 instr->Bit(6) == 1) { |
4994 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); | 5087 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); |
4995 if (instr->Bit(6) == 0) { | 5088 int Vd = instr->VFPDRegValue(kSimd128Precision); |
4996 int Vd = instr->VFPDRegValue(kDoublePrecision); | 5089 int Vm = instr->VFPMRegValue(kSimd128Precision); |
4997 int Vm = instr->VFPMRegValue(kDoublePrecision); | 5090 if (instr->Bit(7) == 1) { |
4998 if (instr->Bit(7) == 1) { | 5091 // vzip.<size> Qd, Qm. |
4999 // vzip.<size> Dd, Dm. | 5092 switch (size) { |
5000 switch (size) { | 5093 case Neon8: { |
5001 case Neon8: | 5094 uint8_t src1[16], src2[16], dst1[16], dst2[16]; |
5002 Zip<uint8_t, kDoubleSize>(this, Vd, Vm); | 5095 get_q_register(Vd, src1); |
5003 break; | 5096 get_q_register(Vm, src2); |
5004 case Neon16: | 5097 for (int i = 0; i < 8; i++) { |
5005 Zip<uint16_t, kDoubleSize>(this, Vd, Vm); | 5098 dst1[i * 2] = src1[i]; |
5006 break; | 5099 dst1[i * 2 + 1] = src2[i]; |
5007 case Neon32: | 5100 dst2[i * 2] = src1[i + 8]; |
5008 Zip<uint32_t, kDoubleSize>(this, Vd, Vm); | 5101 dst2[i * 2 + 1] = src2[i + 8]; |
5009 break; | 5102 } |
5010 default: | 5103 set_q_register(Vd, dst1); |
5011 UNREACHABLE(); | 5104 set_q_register(Vm, dst2); |
5012 break; | 5105 break; |
5013 } | 5106 } |
5014 } else { | 5107 case Neon16: { |
5015 // vuzp.<size> Dd, Dm. | 5108 uint16_t src1[8], src2[8], dst1[8], dst2[8]; |
5016 switch (size) { | 5109 get_q_register(Vd, src1); |
5017 case Neon8: | 5110 get_q_register(Vm, src2); |
5018 Unzip<uint8_t, kDoubleSize>(this, Vd, Vm); | 5111 for (int i = 0; i < 4; i++) { |
5019 break; | 5112 dst1[i * 2] = src1[i]; |
5020 case Neon16: | 5113 dst1[i * 2 + 1] = src2[i]; |
5021 Unzip<uint16_t, kDoubleSize>(this, Vd, Vm); | 5114 dst2[i * 2] = src1[i + 4]; |
5022 break; | 5115 dst2[i * 2 + 1] = src2[i + 4]; |
5023 case Neon32: | 5116 } |
5024 Unzip<uint32_t, kDoubleSize>(this, Vd, Vm); | 5117 set_q_register(Vd, dst1); |
5025 break; | 5118 set_q_register(Vm, dst2); |
5026 default: | 5119 break; |
5027 UNREACHABLE(); | |
5028 break; | |
5029 } | 5120 } |
| 5121 case Neon32: { |
| 5122 uint32_t src1[4], src2[4], dst1[4], dst2[4]; |
| 5123 get_q_register(Vd, src1); |
| 5124 get_q_register(Vm, src2); |
| 5125 for (int i = 0; i < 2; i++) { |
| 5126 dst1[i * 2] = src1[i]; |
| 5127 dst1[i * 2 + 1] = src2[i]; |
| 5128 dst2[i * 2] = src1[i + 2]; |
| 5129 dst2[i * 2 + 1] = src2[i + 2]; |
| 5130 } |
| 5131 set_q_register(Vd, dst1); |
| 5132 set_q_register(Vm, dst2); |
| 5133 break; |
| 5134 } |
| 5135 default: |
| 5136 UNREACHABLE(); |
| 5137 break; |
5030 } | 5138 } |
5031 } else { | 5139 } else { |
5032 int Vd = instr->VFPDRegValue(kSimd128Precision); | 5140 // vuzp.<size> Qd, Qm. |
5033 int Vm = instr->VFPMRegValue(kSimd128Precision); | 5141 switch (size) { |
5034 if (instr->Bit(7) == 1) { | 5142 case Neon8: { |
5035 // vzip.<size> Qd, Qm. | 5143 uint8_t src1[16], src2[16], dst1[16], dst2[16]; |
5036 switch (size) { | 5144 get_q_register(Vd, src1); |
5037 case Neon8: | 5145 get_q_register(Vm, src2); |
5038 Zip<uint8_t, kSimd128Size>(this, Vd, Vm); | 5146 for (int i = 0; i < 8; i++) { |
5039 break; | 5147 dst1[i] = src1[i * 2]; |
5040 case Neon16: | 5148 dst1[i + 8] = src2[i * 2]; |
5041 Zip<uint16_t, kSimd128Size>(this, Vd, Vm); | 5149 dst2[i] = src1[i * 2 + 1]; |
5042 break; | 5150 dst2[i + 8] = src2[i * 2 + 1]; |
5043 case Neon32: | 5151 } |
5044 Zip<uint32_t, kSimd128Size>(this, Vd, Vm); | 5152 set_q_register(Vd, dst1); |
5045 break; | 5153 set_q_register(Vm, dst2); |
5046 default: | 5154 break; |
5047 UNREACHABLE(); | |
5048 break; | |
5049 } | 5155 } |
5050 } else { | 5156 case Neon16: { |
5051 // vuzp.<size> Qd, Qm. | 5157 uint16_t src1[8], src2[8], dst1[8], dst2[8]; |
5052 switch (size) { | 5158 get_q_register(Vd, src1); |
5053 case Neon8: | 5159 get_q_register(Vm, src2); |
5054 Unzip<uint8_t, kSimd128Size>(this, Vd, Vm); | 5160 for (int i = 0; i < 4; i++) { |
5055 break; | 5161 dst1[i] = src1[i * 2]; |
5056 case Neon16: | 5162 dst1[i + 4] = src2[i * 2]; |
5057 Unzip<uint16_t, kSimd128Size>(this, Vd, Vm); | 5163 dst2[i] = src1[i * 2 + 1]; |
5058 break; | 5164 dst2[i + 4] = src2[i * 2 + 1]; |
5059 case Neon32: | 5165 } |
5060 Unzip<uint32_t, kSimd128Size>(this, Vd, Vm); | 5166 set_q_register(Vd, dst1); |
5061 break; | 5167 set_q_register(Vm, dst2); |
5062 default: | 5168 break; |
5063 UNREACHABLE(); | |
5064 break; | |
5065 } | 5169 } |
| 5170 case Neon32: { |
| 5171 uint32_t src1[4], src2[4], dst1[4], dst2[4]; |
| 5172 get_q_register(Vd, src1); |
| 5173 get_q_register(Vm, src2); |
| 5174 for (int i = 0; i < 2; i++) { |
| 5175 dst1[i] = src1[i * 2]; |
| 5176 dst1[i + 2] = src2[i * 2]; |
| 5177 dst2[i] = src1[i * 2 + 1]; |
| 5178 dst2[i + 2] = src2[i * 2 + 1]; |
| 5179 } |
| 5180 set_q_register(Vd, dst1); |
| 5181 set_q_register(Vm, dst2); |
| 5182 break; |
| 5183 } |
| 5184 default: |
| 5185 UNREACHABLE(); |
| 5186 break; |
5066 } | 5187 } |
5067 } | 5188 } |
5068 } else if (instr->Bits(17, 16) == 0 && instr->Bits(11, 9) == 0) { | 5189 } else if (instr->Bits(17, 16) == 0 && instr->Bits(11, 9) == 0) { |
5069 // vrev<op>.size Qd, Qm | 5190 // vrev<op>.size Qd, Qm |
5070 int Vd = instr->VFPDRegValue(kSimd128Precision); | 5191 int Vd = instr->VFPDRegValue(kSimd128Precision); |
5071 int Vm = instr->VFPMRegValue(kSimd128Precision); | 5192 int Vm = instr->VFPMRegValue(kSimd128Precision); |
5072 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); | 5193 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); |
5073 NeonSize op = static_cast<NeonSize>(static_cast<int>(Neon64) - | 5194 NeonSize op = static_cast<NeonSize>(static_cast<int>(Neon64) - |
5074 instr->Bits(8, 7)); | 5195 instr->Bits(8, 7)); |
5075 switch (op) { | 5196 switch (op) { |
5076 case Neon16: { | 5197 case Neon16: { |
5077 DCHECK_EQ(Neon8, size); | 5198 DCHECK_EQ(Neon8, size); |
5078 uint8_t src[16]; | 5199 uint8_t src[16]; |
5079 get_neon_register(Vm, src); | 5200 get_q_register(Vm, src); |
5080 for (int i = 0; i < 16; i += 2) { | 5201 for (int i = 0; i < 16; i += 2) { |
5081 std::swap(src[i], src[i + 1]); | 5202 std::swap(src[i], src[i + 1]); |
5082 } | 5203 } |
5083 set_neon_register(Vd, src); | 5204 set_q_register(Vd, src); |
5084 break; | 5205 break; |
5085 } | 5206 } |
5086 case Neon32: { | 5207 case Neon32: { |
5087 switch (size) { | 5208 switch (size) { |
5088 case Neon16: { | 5209 case Neon16: { |
5089 uint16_t src[8]; | 5210 uint16_t src[8]; |
5090 get_neon_register(Vm, src); | 5211 get_q_register(Vm, src); |
5091 for (int i = 0; i < 8; i += 2) { | 5212 for (int i = 0; i < 8; i += 2) { |
5092 std::swap(src[i], src[i + 1]); | 5213 std::swap(src[i], src[i + 1]); |
5093 } | 5214 } |
5094 set_neon_register(Vd, src); | 5215 set_q_register(Vd, src); |
5095 break; | 5216 break; |
5096 } | 5217 } |
5097 case Neon8: { | 5218 case Neon8: { |
5098 uint8_t src[16]; | 5219 uint8_t src[16]; |
5099 get_neon_register(Vm, src); | 5220 get_q_register(Vm, src); |
5100 for (int i = 0; i < 4; i++) { | 5221 for (int i = 0; i < 4; i++) { |
5101 std::swap(src[i * 4], src[i * 4 + 3]); | 5222 std::swap(src[i * 4], src[i * 4 + 3]); |
5102 std::swap(src[i * 4 + 1], src[i * 4 + 2]); | 5223 std::swap(src[i * 4 + 1], src[i * 4 + 2]); |
5103 } | 5224 } |
5104 set_neon_register(Vd, src); | 5225 set_q_register(Vd, src); |
5105 break; | 5226 break; |
5106 } | 5227 } |
5107 default: | 5228 default: |
5108 UNREACHABLE(); | 5229 UNREACHABLE(); |
5109 break; | 5230 break; |
5110 } | 5231 } |
5111 break; | 5232 break; |
5112 } | 5233 } |
5113 case Neon64: { | 5234 case Neon64: { |
5114 switch (size) { | 5235 switch (size) { |
5115 case Neon32: { | 5236 case Neon32: { |
5116 uint32_t src[4]; | 5237 uint32_t src[4]; |
5117 get_neon_register(Vm, src); | 5238 get_q_register(Vm, src); |
5118 std::swap(src[0], src[1]); | 5239 std::swap(src[0], src[1]); |
5119 std::swap(src[2], src[3]); | 5240 std::swap(src[2], src[3]); |
5120 set_neon_register(Vd, src); | 5241 set_q_register(Vd, src); |
5121 break; | 5242 break; |
5122 } | 5243 } |
5123 case Neon16: { | 5244 case Neon16: { |
5124 uint16_t src[8]; | 5245 uint16_t src[8]; |
5125 get_neon_register(Vm, src); | 5246 get_q_register(Vm, src); |
5126 for (int i = 0; i < 4; i++) { | 5247 for (int i = 0; i < 4; i++) { |
5127 std::swap(src[i * 4], src[i * 4 + 3]); | 5248 std::swap(src[i * 4], src[i * 4 + 3]); |
5128 std::swap(src[i * 4 + 1], src[i * 4 + 2]); | 5249 std::swap(src[i * 4 + 1], src[i * 4 + 2]); |
5129 } | 5250 } |
5130 set_neon_register(Vd, src); | 5251 set_q_register(Vd, src); |
5131 break; | 5252 break; |
5132 } | 5253 } |
5133 case Neon8: { | 5254 case Neon8: { |
5134 uint8_t src[16]; | 5255 uint8_t src[16]; |
5135 get_neon_register(Vm, src); | 5256 get_q_register(Vm, src); |
5136 for (int i = 0; i < 4; i++) { | 5257 for (int i = 0; i < 4; i++) { |
5137 std::swap(src[i], src[7 - i]); | 5258 std::swap(src[i], src[7 - i]); |
5138 std::swap(src[i + 8], src[15 - i]); | 5259 std::swap(src[i + 8], src[15 - i]); |
5139 } | 5260 } |
5140 set_neon_register(Vd, src); | 5261 set_q_register(Vd, src); |
5141 break; | 5262 break; |
5142 } | 5263 } |
5143 default: | 5264 default: |
5144 UNREACHABLE(); | 5265 UNREACHABLE(); |
5145 break; | 5266 break; |
5146 } | 5267 } |
5147 break; | 5268 break; |
5148 } | 5269 } |
5149 default: | 5270 default: |
5150 UNREACHABLE(); | 5271 UNREACHABLE(); |
5151 break; | 5272 break; |
5152 } | 5273 } |
5153 } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 7) == 0x1) { | 5274 } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 6) == 0x3) { |
| 5275 int Vd = instr->VFPDRegValue(kSimd128Precision); |
| 5276 int Vm = instr->VFPMRegValue(kSimd128Precision); |
5154 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); | 5277 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); |
5155 if (instr->Bit(6) == 0) { | 5278 // vtrn.<size> Qd, Qm. |
5156 int Vd = instr->VFPDRegValue(kDoublePrecision); | 5279 switch (size) { |
5157 int Vm = instr->VFPMRegValue(kDoublePrecision); | 5280 case Neon8: { |
5158 // vtrn.<size> Dd, Dm. | 5281 uint8_t src[16], dst[16]; |
5159 switch (size) { | 5282 get_q_register(Vd, dst); |
5160 case Neon8: | 5283 get_q_register(Vm, src); |
5161 Transpose<uint8_t, kDoubleSize>(this, Vd, Vm); | 5284 for (int i = 0; i < 8; i++) { |
5162 break; | 5285 std::swap(dst[2 * i + 1], src[2 * i]); |
5163 case Neon16: | 5286 } |
5164 Transpose<uint16_t, kDoubleSize>(this, Vd, Vm); | 5287 set_q_register(Vd, dst); |
5165 break; | 5288 set_q_register(Vm, src); |
5166 case Neon32: | 5289 break; |
5167 Transpose<uint32_t, kDoubleSize>(this, Vd, Vm); | |
5168 break; | |
5169 default: | |
5170 UNREACHABLE(); | |
5171 break; | |
5172 } | 5290 } |
5173 } else { | 5291 case Neon16: { |
5174 int Vd = instr->VFPDRegValue(kSimd128Precision); | 5292 uint16_t src[8], dst[8]; |
5175 int Vm = instr->VFPMRegValue(kSimd128Precision); | 5293 get_q_register(Vd, dst); |
5176 // vtrn.<size> Qd, Qm. | 5294 get_q_register(Vm, src); |
5177 switch (size) { | 5295 for (int i = 0; i < 4; i++) { |
5178 case Neon8: | 5296 std::swap(dst[2 * i + 1], src[2 * i]); |
5179 Transpose<uint8_t, kSimd128Size>(this, Vd, Vm); | 5297 } |
5180 break; | 5298 set_q_register(Vd, dst); |
5181 case Neon16: | 5299 set_q_register(Vm, src); |
5182 Transpose<uint16_t, kSimd128Size>(this, Vd, Vm); | 5300 break; |
5183 break; | |
5184 case Neon32: | |
5185 Transpose<uint32_t, kSimd128Size>(this, Vd, Vm); | |
5186 break; | |
5187 default: | |
5188 UNREACHABLE(); | |
5189 break; | |
5190 } | 5301 } |
| 5302 case Neon32: { |
| 5303 uint32_t src[4], dst[4]; |
| 5304 get_q_register(Vd, dst); |
| 5305 get_q_register(Vm, src); |
| 5306 for (int i = 0; i < 2; i++) { |
| 5307 std::swap(dst[2 * i + 1], src[2 * i]); |
| 5308 } |
| 5309 set_q_register(Vd, dst); |
| 5310 set_q_register(Vm, src); |
| 5311 break; |
| 5312 } |
| 5313 default: |
| 5314 UNREACHABLE(); |
| 5315 break; |
5191 } | 5316 } |
5192 } else if (instr->Bits(17, 16) == 0x1 && instr->Bit(11) == 0) { | 5317 } else if (instr->Bits(17, 16) == 0x1 && instr->Bit(11) == 0) { |
5193 int Vd = instr->VFPDRegValue(kSimd128Precision); | 5318 int Vd = instr->VFPDRegValue(kSimd128Precision); |
5194 int Vm = instr->VFPMRegValue(kSimd128Precision); | 5319 int Vm = instr->VFPMRegValue(kSimd128Precision); |
5195 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); | 5320 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); |
5196 if (instr->Bits(9, 6) == 0xd) { | 5321 if (instr->Bits(9, 6) == 0xd) { |
5197 // vabs<type>.<size> Qd, Qm | 5322 // vabs<type>.<size> Qd, Qm |
5198 if (instr->Bit(10) != 0) { | 5323 if (instr->Bit(10) != 0) { |
5199 // floating point (clear sign bits) | 5324 // floating point (clear sign bits) |
5200 uint32_t src[4]; | 5325 uint32_t src[4]; |
5201 get_neon_register(Vm, src); | 5326 get_q_register(Vm, src); |
5202 for (int i = 0; i < 4; i++) { | 5327 for (int i = 0; i < 4; i++) { |
5203 src[i] &= ~0x80000000; | 5328 src[i] &= ~0x80000000; |
5204 } | 5329 } |
5205 set_neon_register(Vd, src); | 5330 set_q_register(Vd, src); |
5206 } else { | 5331 } else { |
5207 // signed integer | 5332 // signed integer |
5208 switch (size) { | 5333 switch (size) { |
5209 case Neon8: | 5334 case Neon8: { |
5210 Abs<int8_t, kSimd128Size>(this, Vd, Vm); | 5335 int8_t src[16]; |
| 5336 get_q_register(Vm, src); |
| 5337 for (int i = 0; i < 16; i++) { |
| 5338 src[i] = std::abs(src[i]); |
| 5339 } |
| 5340 set_q_register(Vd, src); |
5211 break; | 5341 break; |
5212 case Neon16: | 5342 } |
5213 Abs<int16_t, kSimd128Size>(this, Vd, Vm); | 5343 case Neon16: { |
| 5344 int16_t src[8]; |
| 5345 get_q_register(Vm, src); |
| 5346 for (int i = 0; i < 8; i++) { |
| 5347 src[i] = std::abs(src[i]); |
| 5348 } |
| 5349 set_q_register(Vd, src); |
5214 break; | 5350 break; |
5215 case Neon32: | 5351 } |
5216 Abs<int32_t, kSimd128Size>(this, Vd, Vm); | 5352 case Neon32: { |
| 5353 int32_t src[4]; |
| 5354 get_q_register(Vm, src); |
| 5355 for (int i = 0; i < 4; i++) { |
| 5356 src[i] = std::abs(src[i]); |
| 5357 } |
| 5358 set_q_register(Vd, src); |
5217 break; | 5359 break; |
| 5360 } |
5218 default: | 5361 default: |
5219 UNIMPLEMENTED(); | 5362 UNIMPLEMENTED(); |
5220 break; | 5363 break; |
5221 } | 5364 } |
5222 } | 5365 } |
5223 } else if (instr->Bits(9, 6) == 0xf) { | 5366 } else if (instr->Bits(9, 6) == 0xf) { |
5224 // vneg<type>.<size> Qd, Qm (signed integer) | 5367 // vneg<type>.<size> Qd, Qm (signed integer) |
5225 if (instr->Bit(10) != 0) { | 5368 if (instr->Bit(10) != 0) { |
5226 // floating point (toggle sign bits) | 5369 // floating point (toggle sign bits) |
5227 uint32_t src[4]; | 5370 uint32_t src[4]; |
5228 get_neon_register(Vm, src); | 5371 get_q_register(Vm, src); |
5229 for (int i = 0; i < 4; i++) { | 5372 for (int i = 0; i < 4; i++) { |
5230 src[i] ^= 0x80000000; | 5373 src[i] ^= 0x80000000; |
5231 } | 5374 } |
5232 set_neon_register(Vd, src); | 5375 set_q_register(Vd, src); |
5233 } else { | 5376 } else { |
5234 // signed integer | 5377 // signed integer |
5235 switch (size) { | 5378 switch (size) { |
5236 case Neon8: | 5379 case Neon8: { |
5237 Neg<int8_t, kSimd128Size>(this, Vd, Vm); | 5380 int8_t src[16]; |
| 5381 get_q_register(Vm, src); |
| 5382 for (int i = 0; i < 16; i++) { |
| 5383 src[i] = -src[i]; |
| 5384 } |
| 5385 set_q_register(Vd, src); |
5238 break; | 5386 break; |
| 5387 } |
5239 case Neon16: | 5388 case Neon16: |
5240 Neg<int16_t, kSimd128Size>(this, Vd, Vm); | 5389 int16_t src[8]; |
| 5390 get_q_register(Vm, src); |
| 5391 for (int i = 0; i < 8; i++) { |
| 5392 src[i] = -src[i]; |
| 5393 } |
| 5394 set_q_register(Vd, src); |
5241 break; | 5395 break; |
5242 case Neon32: | 5396 case Neon32: { |
5243 Neg<int32_t, kSimd128Size>(this, Vd, Vm); | 5397 int32_t src[4]; |
| 5398 get_q_register(Vm, src); |
| 5399 for (int i = 0; i < 4; i++) { |
| 5400 src[i] = -src[i]; |
| 5401 } |
| 5402 set_q_register(Vd, src); |
5244 break; | 5403 break; |
| 5404 } |
5245 default: | 5405 default: |
5246 UNIMPLEMENTED(); | 5406 UNIMPLEMENTED(); |
5247 break; | 5407 break; |
5248 } | 5408 } |
5249 } | 5409 } |
5250 } else { | 5410 } else { |
5251 UNIMPLEMENTED(); | 5411 UNIMPLEMENTED(); |
5252 } | 5412 } |
5253 } else if (instr->Bits(19, 18) == 0x2 && instr->Bits(11, 8) == 0x5) { | 5413 } else if (instr->Bits(19, 18) == 0x2 && instr->Bits(11, 8) == 0x5) { |
5254 // vrecpe/vrsqrte.f32 Qd, Qm. | 5414 // vrecpe/vrsqrte.f32 Qd, Qm. |
5255 int Vd = instr->VFPDRegValue(kSimd128Precision); | 5415 int Vd = instr->VFPDRegValue(kSimd128Precision); |
5256 int Vm = instr->VFPMRegValue(kSimd128Precision); | 5416 int Vm = instr->VFPMRegValue(kSimd128Precision); |
5257 uint32_t src[4]; | 5417 uint32_t src[4]; |
5258 get_neon_register(Vm, src); | 5418 get_q_register(Vm, src); |
5259 if (instr->Bit(7) == 0) { | 5419 if (instr->Bit(7) == 0) { |
5260 for (int i = 0; i < 4; i++) { | 5420 for (int i = 0; i < 4; i++) { |
5261 float denom = bit_cast<float>(src[i]); | 5421 float denom = bit_cast<float>(src[i]); |
5262 div_zero_vfp_flag_ = (denom == 0); | 5422 div_zero_vfp_flag_ = (denom == 0); |
5263 float result = 1.0f / denom; | 5423 float result = 1.0f / denom; |
5264 result = canonicalizeNaN(result); | 5424 result = canonicalizeNaN(result); |
5265 src[i] = bit_cast<uint32_t>(result); | 5425 src[i] = bit_cast<uint32_t>(result); |
5266 } | 5426 } |
5267 } else { | 5427 } else { |
5268 lazily_initialize_fast_sqrt(isolate_); | 5428 lazily_initialize_fast_sqrt(isolate_); |
5269 for (int i = 0; i < 4; i++) { | 5429 for (int i = 0; i < 4; i++) { |
5270 float radicand = bit_cast<float>(src[i]); | 5430 float radicand = bit_cast<float>(src[i]); |
5271 float result = 1.0f / fast_sqrt(radicand, isolate_); | 5431 float result = 1.0f / fast_sqrt(radicand, isolate_); |
5272 result = canonicalizeNaN(result); | 5432 result = canonicalizeNaN(result); |
5273 src[i] = bit_cast<uint32_t>(result); | 5433 src[i] = bit_cast<uint32_t>(result); |
5274 } | 5434 } |
5275 } | 5435 } |
5276 set_neon_register(Vd, src); | 5436 set_q_register(Vd, src); |
5277 } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 8) == 0x2 && | 5437 } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 8) == 0x2 && |
5278 instr->Bits(7, 6) != 0) { | 5438 instr->Bits(7, 6) != 0) { |
5279 // vqmovn.<type><size> Dd, Qm. | 5439 // vqmovn.<type><size> Dd, Qm. |
5280 int Vd = instr->VFPDRegValue(kDoublePrecision); | 5440 int Vd = instr->VFPDRegValue(kDoublePrecision); |
5281 int Vm = instr->VFPMRegValue(kSimd128Precision); | 5441 int Vm = instr->VFPMRegValue(kSimd128Precision); |
5282 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); | 5442 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); |
5283 bool is_unsigned = instr->Bit(6) != 0; | 5443 bool is_unsigned = instr->Bit(6) != 0; |
5284 switch (size) { | 5444 switch (size) { |
5285 case Neon8: { | 5445 case Neon8: { |
5286 if (is_unsigned) { | 5446 if (is_unsigned) { |
(...skipping 27 matching lines...) Expand all Loading... |
5314 UNIMPLEMENTED(); | 5474 UNIMPLEMENTED(); |
5315 } | 5475 } |
5316 } else if (instr->Bits(11, 7) == 0 && instr->Bit(4) == 1) { | 5476 } else if (instr->Bits(11, 7) == 0 && instr->Bit(4) == 1) { |
5317 // vshr.u<size> Qd, Qm, shift | 5477 // vshr.u<size> Qd, Qm, shift |
5318 int size = base::bits::RoundDownToPowerOfTwo32(instr->Bits(21, 16)); | 5478 int size = base::bits::RoundDownToPowerOfTwo32(instr->Bits(21, 16)); |
5319 int shift = 2 * size - instr->Bits(21, 16); | 5479 int shift = 2 * size - instr->Bits(21, 16); |
5320 int Vd = instr->VFPDRegValue(kSimd128Precision); | 5480 int Vd = instr->VFPDRegValue(kSimd128Precision); |
5321 int Vm = instr->VFPMRegValue(kSimd128Precision); | 5481 int Vm = instr->VFPMRegValue(kSimd128Precision); |
5322 NeonSize ns = static_cast<NeonSize>(size / 16); | 5482 NeonSize ns = static_cast<NeonSize>(size / 16); |
5323 switch (ns) { | 5483 switch (ns) { |
5324 case Neon8: | 5484 case Neon8: { |
5325 ShiftRight<uint8_t, kSimd128Size>(this, Vd, Vm, shift); | 5485 uint8_t src[16]; |
| 5486 get_q_register(Vm, src); |
| 5487 for (int i = 0; i < 16; i++) { |
| 5488 src[i] >>= shift; |
| 5489 } |
| 5490 set_q_register(Vd, src); |
5326 break; | 5491 break; |
5327 case Neon16: | 5492 } |
5328 ShiftRight<uint16_t, kSimd128Size>(this, Vd, Vm, shift); | 5493 case Neon16: { |
| 5494 uint16_t src[8]; |
| 5495 get_q_register(Vm, src); |
| 5496 for (int i = 0; i < 8; i++) { |
| 5497 src[i] >>= shift; |
| 5498 } |
| 5499 set_q_register(Vd, src); |
5329 break; | 5500 break; |
5330 case Neon32: | 5501 } |
5331 ShiftRight<uint32_t, kSimd128Size>(this, Vd, Vm, shift); | 5502 case Neon32: { |
| 5503 uint32_t src[4]; |
| 5504 get_q_register(Vm, src); |
| 5505 for (int i = 0; i < 4; i++) { |
| 5506 src[i] >>= shift; |
| 5507 } |
| 5508 set_q_register(Vd, src); |
5332 break; | 5509 break; |
| 5510 } |
5333 default: | 5511 default: |
5334 UNREACHABLE(); | 5512 UNREACHABLE(); |
5335 break; | 5513 break; |
5336 } | 5514 } |
5337 } else { | 5515 } else { |
5338 UNIMPLEMENTED(); | 5516 UNIMPLEMENTED(); |
5339 } | 5517 } |
5340 break; | 5518 break; |
5341 case 8: | 5519 case 8: |
5342 if (instr->Bits(21, 20) == 0) { | 5520 if (instr->Bits(21, 20) == 0) { |
(...skipping 701 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6044 processor->prev_ = nullptr; | 6222 processor->prev_ = nullptr; |
6045 processor->next_ = nullptr; | 6223 processor->next_ = nullptr; |
6046 } | 6224 } |
6047 | 6225 |
6048 } // namespace internal | 6226 } // namespace internal |
6049 } // namespace v8 | 6227 } // namespace v8 |
6050 | 6228 |
6051 #endif // USE_SIMULATOR | 6229 #endif // USE_SIMULATOR |
6052 | 6230 |
6053 #endif // V8_TARGET_ARCH_ARM | 6231 #endif // V8_TARGET_ARCH_ARM |
OLD | NEW |