Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(91)

Side by Side Diff: src/arm/simulator-arm.cc

Issue 2810703003: Revert of [ARM] Implement D-register versions of vzip, vuzp, and vtrn. (Closed)
Patch Set: Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/arm/simulator-arm.h ('k') | test/cctest/test-assembler-arm.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include <stdarg.h> 5 #include <stdarg.h>
6 #include <stdlib.h> 6 #include <stdlib.h>
7 #include <cmath> 7 #include <cmath>
8 8
9 #if V8_TARGET_ARCH_ARM 9 #if V8_TARGET_ARCH_ARM
10 10
(...skipping 878 matching lines...) Expand 10 before | Expand all | Expand 10 after
889 DCHECK((dreg >= 0) && (dreg < DwVfpRegister::NumRegisters())); 889 DCHECK((dreg >= 0) && (dreg < DwVfpRegister::NumRegisters()));
890 memcpy(value, vfp_registers_ + dreg * 2, sizeof(*value) * 2); 890 memcpy(value, vfp_registers_ + dreg * 2, sizeof(*value) * 2);
891 } 891 }
892 892
893 893
894 void Simulator::set_d_register(int dreg, const uint32_t* value) { 894 void Simulator::set_d_register(int dreg, const uint32_t* value) {
895 DCHECK((dreg >= 0) && (dreg < DwVfpRegister::NumRegisters())); 895 DCHECK((dreg >= 0) && (dreg < DwVfpRegister::NumRegisters()));
896 memcpy(vfp_registers_ + dreg * 2, value, sizeof(*value) * 2); 896 memcpy(vfp_registers_ + dreg * 2, value, sizeof(*value) * 2);
897 } 897 }
898 898
899 template <typename T, int SIZE> 899 template <typename T>
900 void Simulator::get_neon_register(int reg, T (&value)[SIZE / sizeof(T)]) { 900 void Simulator::get_d_register(int dreg, T* value) {
901 DCHECK(SIZE == kSimd128Size || SIZE == kDoubleSize); 901 DCHECK((dreg >= 0) && (dreg < num_d_registers));
902 DCHECK_LE(0, reg); 902 memcpy(value, vfp_registers_ + dreg * 2, kDoubleSize);
903 DCHECK_GT(SIZE == kSimd128Size ? num_q_registers : num_d_registers, reg);
904 memcpy(value, vfp_registers_ + reg * (SIZE / 4), SIZE);
905 } 903 }
906 904
907 template <typename T, int SIZE> 905 template <typename T>
908 void Simulator::set_neon_register(int reg, const T (&value)[SIZE / sizeof(T)]) { 906 void Simulator::set_d_register(int dreg, const T* value) {
909 DCHECK(SIZE == kSimd128Size || SIZE == kDoubleSize); 907 DCHECK((dreg >= 0) && (dreg < num_d_registers));
910 DCHECK_LE(0, reg); 908 memcpy(vfp_registers_ + dreg * 2, value, kDoubleSize);
911 DCHECK_GT(SIZE == kSimd128Size ? num_q_registers : num_d_registers, reg); 909 }
912 memcpy(vfp_registers_ + reg * (SIZE / 4), value, SIZE); 910
911 template <typename T>
912 void Simulator::get_q_register(int qreg, T* value) {
913 DCHECK((qreg >= 0) && (qreg < num_q_registers));
914 memcpy(value, vfp_registers_ + qreg * 4, kSimd128Size);
915 }
916
917 template <typename T>
918 void Simulator::set_q_register(int qreg, const T* value) {
919 DCHECK((qreg >= 0) && (qreg < num_q_registers));
920 memcpy(vfp_registers_ + qreg * 4, value, kSimd128Size);
913 } 921 }
914 922
915 // Raw access to the PC register. 923 // Raw access to the PC register.
916 void Simulator::set_pc(int32_t value) { 924 void Simulator::set_pc(int32_t value) {
917 pc_modified_ = true; 925 pc_modified_ = true;
918 registers_[pc] = value; 926 registers_[pc] = value;
919 } 927 }
920 928
921 929
922 bool Simulator::has_bad_pc() const { 930 bool Simulator::has_bad_pc() const {
(...skipping 2570 matching lines...) Expand 10 before | Expand all | Expand 10 after
3493 case Neon32: { 3501 case Neon32: {
3494 for (int i = 0; i < 4; i++) { 3502 for (int i = 0; i < 4; i++) {
3495 q_data[i] = rt_value; 3503 q_data[i] = rt_value;
3496 } 3504 }
3497 break; 3505 break;
3498 } 3506 }
3499 default: 3507 default:
3500 UNREACHABLE(); 3508 UNREACHABLE();
3501 break; 3509 break;
3502 } 3510 }
3503 set_neon_register(vd, q_data); 3511 set_q_register(vd, q_data);
3504 } 3512 }
3505 } else if ((instr->VLValue() == 0x1) && (instr->VCValue() == 0x1)) { 3513 } else if ((instr->VLValue() == 0x1) && (instr->VCValue() == 0x1)) {
3506 // vmov (scalar to ARM core register) 3514 // vmov (scalar to ARM core register)
3507 int vn = instr->VFPNRegValue(kDoublePrecision); 3515 int vn = instr->VFPNRegValue(kDoublePrecision);
3508 int rt = instr->RtValue(); 3516 int rt = instr->RtValue();
3509 int opc1_opc2 = (instr->Bits(22, 21) << 2) | instr->Bits(6, 5); 3517 int opc1_opc2 = (instr->Bits(22, 21) << 2) | instr->Bits(6, 5);
3510 uint64_t data; 3518 uint64_t data;
3511 get_d_register(vn, &data); 3519 get_d_register(vn, &data);
3512 if ((opc1_opc2 & 0xb) == 0) { 3520 if ((opc1_opc2 & 0xb) == 0) {
3513 // NeonS32 / NeonU32 3521 // NeonS32 / NeonU32
(...skipping 466 matching lines...) Expand 10 before | Expand all | Expand 10 after
3980 break; 3988 break;
3981 default: 3989 default:
3982 UNIMPLEMENTED(); // Not used by V8. 3990 UNIMPLEMENTED(); // Not used by V8.
3983 } 3991 }
3984 } else { 3992 } else {
3985 UNIMPLEMENTED(); // Not used by V8. 3993 UNIMPLEMENTED(); // Not used by V8.
3986 } 3994 }
3987 } 3995 }
3988 3996
3989 // Templated operations for NEON instructions. 3997 // Templated operations for NEON instructions.
3998 // TODO(bbudge) Add more templates for use in DecodeSpecialCondition.
3990 template <typename T, typename U> 3999 template <typename T, typename U>
3991 U Widen(T value) { 4000 U Widen(T value) {
3992 static_assert(sizeof(int64_t) > sizeof(T), "T must be int32_t or smaller"); 4001 static_assert(sizeof(int64_t) > sizeof(T), "T must be int32_t or smaller");
3993 static_assert(sizeof(U) > sizeof(T), "T must smaller than U"); 4002 static_assert(sizeof(U) > sizeof(T), "T must smaller than U");
3994 return static_cast<U>(value); 4003 return static_cast<U>(value);
3995 } 4004 }
3996 4005
3997 template <typename T, typename U> 4006 template <typename T, typename U>
3998 U Narrow(T value) { 4007 U Narrow(T value) {
3999 static_assert(sizeof(int8_t) < sizeof(T), "T must be int16_t or larger"); 4008 static_assert(sizeof(int8_t) < sizeof(T), "T must be int16_t or larger");
4000 static_assert(sizeof(U) < sizeof(T), "T must larger than U"); 4009 static_assert(sizeof(U) < sizeof(T), "T must larger than U");
4001 static_assert(std::is_unsigned<T>() == std::is_unsigned<U>(), 4010 static_assert(std::is_unsigned<T>() == std::is_unsigned<U>(),
4002 "Signed-ness of T and U must match"); 4011 "Signed-ness of T and U must match");
4003 // Make sure value can be expressed in the smaller type; otherwise, the 4012 // Make sure value can be expressed in the smaller type; otherwise, the
4004 // casted result is implementation defined. 4013 // casted result is implementation defined.
4005 DCHECK_LE(std::numeric_limits<T>::min(), value); 4014 DCHECK_LE(std::numeric_limits<T>::min(), value);
4006 DCHECK_GE(std::numeric_limits<T>::max(), value); 4015 DCHECK_GE(std::numeric_limits<T>::max(), value);
4007 return static_cast<U>(value); 4016 return static_cast<U>(value);
4008 } 4017 }
4009 4018
4010 template <typename T> 4019 template <typename T>
4011 T Clamp(int64_t value) { 4020 T Clamp(int64_t value) {
4012 static_assert(sizeof(int64_t) > sizeof(T), "T must be int32_t or smaller"); 4021 static_assert(sizeof(int64_t) > sizeof(T), "T must be int32_t or smaller");
4013 int64_t min = static_cast<int64_t>(std::numeric_limits<T>::min()); 4022 int64_t min = static_cast<int64_t>(std::numeric_limits<T>::min());
4014 int64_t max = static_cast<int64_t>(std::numeric_limits<T>::max()); 4023 int64_t max = static_cast<int64_t>(std::numeric_limits<T>::max());
4015 int64_t clamped = std::max(min, std::min(max, value)); 4024 int64_t clamped = std::max(min, std::min(max, value));
4016 return static_cast<T>(clamped); 4025 return static_cast<T>(clamped);
4017 } 4026 }
4018 4027
4028 template <typename T>
4029 T MinMax(T a, T b, bool is_min) {
4030 return is_min ? std::min(a, b) : std::max(a, b);
4031 }
4032
4019 template <typename T, typename U> 4033 template <typename T, typename U>
4020 void Widen(Simulator* simulator, int Vd, int Vm) { 4034 void Widen(Simulator* simulator, int Vd, int Vm) {
4021 static const int kLanes = 8 / sizeof(T); 4035 static const int kLanes = 8 / sizeof(T);
4022 T src[kLanes]; 4036 T src[kLanes];
4023 U dst[kLanes]; 4037 U dst[kLanes];
4024 simulator->get_neon_register<T, kDoubleSize>(Vm, src); 4038 simulator->get_d_register(Vm, src);
4025 for (int i = 0; i < kLanes; i++) { 4039 for (int i = 0; i < kLanes; i++) {
4026 dst[i] = Widen<T, U>(src[i]); 4040 dst[i] = Widen<T, U>(src[i]);
4027 } 4041 }
4028 simulator->set_neon_register(Vd, dst); 4042 simulator->set_q_register(Vd, dst);
4029 }
4030
4031 template <typename T, int SIZE>
4032 void Abs(Simulator* simulator, int Vd, int Vm) {
4033 static const int kElems = SIZE / sizeof(T);
4034 T src[kElems];
4035 simulator->get_neon_register<T, SIZE>(Vm, src);
4036 for (int i = 0; i < kElems; i++) {
4037 src[i] = std::abs(src[i]);
4038 }
4039 simulator->set_neon_register<T, SIZE>(Vd, src);
4040 }
4041
4042 template <typename T, int SIZE>
4043 void Neg(Simulator* simulator, int Vd, int Vm) {
4044 static const int kElems = SIZE / sizeof(T);
4045 T src[kElems];
4046 simulator->get_neon_register<T, SIZE>(Vm, src);
4047 for (int i = 0; i < kElems; i++) {
4048 src[i] = -src[i];
4049 }
4050 simulator->set_neon_register<T, SIZE>(Vd, src);
4051 } 4043 }
4052 4044
4053 template <typename T, typename U> 4045 template <typename T, typename U>
4054 void SaturatingNarrow(Simulator* simulator, int Vd, int Vm) { 4046 void SaturatingNarrow(Simulator* simulator, int Vd, int Vm) {
4055 static const int kLanes = 16 / sizeof(T); 4047 static const int kLanes = 16 / sizeof(T);
4056 T src[kLanes]; 4048 T src[kLanes];
4057 U dst[kLanes]; 4049 U dst[kLanes];
4058 simulator->get_neon_register(Vm, src); 4050 simulator->get_q_register(Vm, src);
4059 for (int i = 0; i < kLanes; i++) { 4051 for (int i = 0; i < kLanes; i++) {
4060 dst[i] = Narrow<T, U>(Clamp<U>(src[i])); 4052 dst[i] = Narrow<T, U>(Clamp<U>(src[i]));
4061 } 4053 }
4062 simulator->set_neon_register<U, kDoubleSize>(Vd, dst); 4054 simulator->set_d_register(Vd, dst);
4063 } 4055 }
4064 4056
4065 template <typename T> 4057 template <typename T>
4066 void AddSaturate(Simulator* simulator, int Vd, int Vm, int Vn) { 4058 void AddSaturate(Simulator* simulator, int Vd, int Vm, int Vn) {
4067 static const int kLanes = 16 / sizeof(T); 4059 static const int kLanes = 16 / sizeof(T);
4068 T src1[kLanes], src2[kLanes]; 4060 T src1[kLanes], src2[kLanes];
4069 simulator->get_neon_register(Vn, src1); 4061 simulator->get_q_register(Vn, src1);
4070 simulator->get_neon_register(Vm, src2); 4062 simulator->get_q_register(Vm, src2);
4071 for (int i = 0; i < kLanes; i++) { 4063 for (int i = 0; i < kLanes; i++) {
4072 src1[i] = Clamp<T>(Widen<T, int64_t>(src1[i]) + Widen<T, int64_t>(src2[i])); 4064 src1[i] = Clamp<T>(Widen<T, int64_t>(src1[i]) + Widen<T, int64_t>(src2[i]));
4073 } 4065 }
4074 simulator->set_neon_register(Vd, src1); 4066 simulator->set_q_register(Vd, src1);
4075 } 4067 }
4076 4068
4077 template <typename T> 4069 template <typename T>
4078 void SubSaturate(Simulator* simulator, int Vd, int Vm, int Vn) { 4070 void SubSaturate(Simulator* simulator, int Vd, int Vm, int Vn) {
4079 static const int kLanes = 16 / sizeof(T); 4071 static const int kLanes = 16 / sizeof(T);
4080 T src1[kLanes], src2[kLanes]; 4072 T src1[kLanes], src2[kLanes];
4081 simulator->get_neon_register(Vn, src1); 4073 simulator->get_q_register(Vn, src1);
4082 simulator->get_neon_register(Vm, src2); 4074 simulator->get_q_register(Vm, src2);
4083 for (int i = 0; i < kLanes; i++) { 4075 for (int i = 0; i < kLanes; i++) {
4084 src1[i] = Clamp<T>(Widen<T, int64_t>(src1[i]) - Widen<T, int64_t>(src2[i])); 4076 src1[i] = Clamp<T>(Widen<T, int64_t>(src1[i]) - Widen<T, int64_t>(src2[i]));
4085 } 4077 }
4086 simulator->set_neon_register(Vd, src1); 4078 simulator->set_q_register(Vd, src1);
4087 }
4088
4089 template <typename T, int SIZE>
4090 void Zip(Simulator* simulator, int Vd, int Vm) {
4091 static const int kElems = SIZE / sizeof(T);
4092 static const int kPairs = kElems / 2;
4093 T src1[kElems], src2[kElems], dst1[kElems], dst2[kElems];
4094 simulator->get_neon_register<T, SIZE>(Vd, src1);
4095 simulator->get_neon_register<T, SIZE>(Vm, src2);
4096 for (int i = 0; i < kPairs; i++) {
4097 dst1[i * 2] = src1[i];
4098 dst1[i * 2 + 1] = src2[i];
4099 dst2[i * 2] = src1[i + kPairs];
4100 dst2[i * 2 + 1] = src2[i + kPairs];
4101 }
4102 simulator->set_neon_register<T, SIZE>(Vd, dst1);
4103 simulator->set_neon_register<T, SIZE>(Vm, dst2);
4104 }
4105
4106 template <typename T, int SIZE>
4107 void Unzip(Simulator* simulator, int Vd, int Vm) {
4108 static const int kElems = SIZE / sizeof(T);
4109 static const int kPairs = kElems / 2;
4110 T src1[kElems], src2[kElems], dst1[kElems], dst2[kElems];
4111 simulator->get_neon_register<T, SIZE>(Vd, src1);
4112 simulator->get_neon_register<T, SIZE>(Vm, src2);
4113 for (int i = 0; i < kPairs; i++) {
4114 dst1[i] = src1[i * 2];
4115 dst1[i + kPairs] = src2[i * 2];
4116 dst2[i] = src1[i * 2 + 1];
4117 dst2[i + kPairs] = src2[i * 2 + 1];
4118 }
4119 simulator->set_neon_register<T, SIZE>(Vd, dst1);
4120 simulator->set_neon_register<T, SIZE>(Vm, dst2);
4121 }
4122
4123 template <typename T, int SIZE>
4124 void Transpose(Simulator* simulator, int Vd, int Vm) {
4125 static const int kElems = SIZE / sizeof(T);
4126 static const int kPairs = kElems / 2;
4127 T src1[kElems], src2[kElems];
4128 simulator->get_neon_register<T, SIZE>(Vd, src1);
4129 simulator->get_neon_register<T, SIZE>(Vm, src2);
4130 for (int i = 0; i < kPairs; i++) {
4131 std::swap(src1[2 * i + 1], src2[2 * i]);
4132 }
4133 simulator->set_neon_register<T, SIZE>(Vd, src1);
4134 simulator->set_neon_register<T, SIZE>(Vm, src2);
4135 }
4136
4137 template <typename T, int SIZE>
4138 void Test(Simulator* simulator, int Vd, int Vm, int Vn) {
4139 static const int kElems = SIZE / sizeof(T);
4140 T src1[kElems], src2[kElems];
4141 simulator->get_neon_register<T, SIZE>(Vn, src1);
4142 simulator->get_neon_register<T, SIZE>(Vm, src2);
4143 for (int i = 0; i < kElems; i++) {
4144 src1[i] = (src1[i] & src2[i]) != 0 ? -1 : 0;
4145 }
4146 simulator->set_neon_register<T, SIZE>(Vd, src1);
4147 }
4148
4149 template <typename T, int SIZE>
4150 void Add(Simulator* simulator, int Vd, int Vm, int Vn) {
4151 static const int kElems = SIZE / sizeof(T);
4152 T src1[kElems], src2[kElems];
4153 simulator->get_neon_register<T, SIZE>(Vn, src1);
4154 simulator->get_neon_register<T, SIZE>(Vm, src2);
4155 for (int i = 0; i < kElems; i++) {
4156 src1[i] += src2[i];
4157 }
4158 simulator->set_neon_register<T, SIZE>(Vd, src1);
4159 }
4160
4161 template <typename T, int SIZE>
4162 void Sub(Simulator* simulator, int Vd, int Vm, int Vn) {
4163 static const int kElems = SIZE / sizeof(T);
4164 T src1[kElems], src2[kElems];
4165 simulator->get_neon_register<T, SIZE>(Vn, src1);
4166 simulator->get_neon_register<T, SIZE>(Vm, src2);
4167 for (int i = 0; i < kElems; i++) {
4168 src1[i] -= src2[i];
4169 }
4170 simulator->set_neon_register<T, SIZE>(Vd, src1);
4171 }
4172
4173 template <typename T, int SIZE>
4174 void Mul(Simulator* simulator, int Vd, int Vm, int Vn) {
4175 static const int kElems = SIZE / sizeof(T);
4176 T src1[kElems], src2[kElems];
4177 simulator->get_neon_register<T, SIZE>(Vn, src1);
4178 simulator->get_neon_register<T, SIZE>(Vm, src2);
4179 for (int i = 0; i < kElems; i++) {
4180 src1[i] *= src2[i];
4181 }
4182 simulator->set_neon_register<T, SIZE>(Vd, src1);
4183 }
4184
4185 template <typename T, int SIZE>
4186 void ShiftLeft(Simulator* simulator, int Vd, int Vm, int shift) {
4187 static const int kElems = SIZE / sizeof(T);
4188 T src[kElems];
4189 simulator->get_neon_register<T, SIZE>(Vm, src);
4190 for (int i = 0; i < kElems; i++) {
4191 src[i] <<= shift;
4192 }
4193 simulator->set_neon_register<T, SIZE>(Vd, src);
4194 }
4195
4196 template <typename T, int SIZE>
4197 void ShiftRight(Simulator* simulator, int Vd, int Vm, int shift) {
4198 static const int kElems = SIZE / sizeof(T);
4199 T src[kElems];
4200 simulator->get_neon_register<T, SIZE>(Vm, src);
4201 for (int i = 0; i < kElems; i++) {
4202 src[i] >>= shift;
4203 }
4204 simulator->set_neon_register<T, SIZE>(Vd, src);
4205 }
4206
4207 template <typename T, int SIZE>
4208 void ArithmeticShiftRight(Simulator* simulator, int Vd, int Vm, int shift) {
4209 static const int kElems = SIZE / sizeof(T);
4210 T src[kElems];
4211 simulator->get_neon_register<T, SIZE>(Vm, src);
4212 for (int i = 0; i < kElems; i++) {
4213 src[i] = ArithmeticShiftRight(src[i], shift);
4214 }
4215 simulator->set_neon_register<T, SIZE>(Vd, src);
4216 }
4217
4218 template <typename T, int SIZE>
4219 void CompareEqual(Simulator* simulator, int Vd, int Vm, int Vn) {
4220 static const int kElems = SIZE / sizeof(T);
4221 T src1[kElems], src2[kElems];
4222 simulator->get_neon_register<T, SIZE>(Vn, src1);
4223 simulator->get_neon_register<T, SIZE>(Vm, src2);
4224 for (int i = 0; i < kElems; i++) {
4225 src1[i] = src1[i] == src2[i] ? -1 : 0;
4226 }
4227 simulator->set_neon_register<T, SIZE>(Vd, src1);
4228 }
4229
4230 template <typename T, int SIZE>
4231 void CompareGreater(Simulator* simulator, int Vd, int Vm, int Vn, bool ge) {
4232 static const int kElems = SIZE / sizeof(T);
4233 T src1[kElems], src2[kElems];
4234 simulator->get_neon_register<T, SIZE>(Vn, src1);
4235 simulator->get_neon_register<T, SIZE>(Vm, src2);
4236 for (int i = 0; i < kElems; i++) {
4237 if (ge)
4238 src1[i] = src1[i] >= src2[i] ? -1 : 0;
4239 else
4240 src1[i] = src1[i] > src2[i] ? -1 : 0;
4241 }
4242 simulator->set_neon_register<T, SIZE>(Vd, src1);
4243 }
4244
4245 template <typename T>
4246 T MinMax(T a, T b, bool is_min) {
4247 return is_min ? std::min(a, b) : std::max(a, b);
4248 }
4249
4250 template <typename T, int SIZE>
4251 void MinMax(Simulator* simulator, int Vd, int Vm, int Vn, bool min) {
4252 static const int kElems = SIZE / sizeof(T);
4253 T src1[kElems], src2[kElems];
4254 simulator->get_neon_register<T, SIZE>(Vn, src1);
4255 simulator->get_neon_register<T, SIZE>(Vm, src2);
4256 for (int i = 0; i < kElems; i++) {
4257 src1[i] = MinMax(src1[i], src2[i], min);
4258 }
4259 simulator->set_neon_register<T, SIZE>(Vd, src1);
4260 }
4261
4262 template <typename T>
4263 void PairwiseMinMax(Simulator* simulator, int Vd, int Vm, int Vn, bool min) {
4264 static const int kElems = kDoubleSize / sizeof(T);
4265 static const int kPairs = kElems / 2;
4266 T dst[kElems], src1[kElems], src2[kElems];
4267 simulator->get_neon_register<T, kDoubleSize>(Vn, src1);
4268 simulator->get_neon_register<T, kDoubleSize>(Vm, src2);
4269 for (int i = 0; i < kPairs; i++) {
4270 dst[i] = MinMax(src1[i * 2], src1[i * 2 + 1], min);
4271 dst[i + kPairs] = MinMax(src2[i * 2], src2[i * 2 + 1], min);
4272 }
4273 simulator->set_neon_register<T, kDoubleSize>(Vd, dst);
4274 } 4079 }
4275 4080
4276 void Simulator::DecodeSpecialCondition(Instruction* instr) { 4081 void Simulator::DecodeSpecialCondition(Instruction* instr) {
4277 switch (instr->SpecialValue()) { 4082 switch (instr->SpecialValue()) {
4278 case 4: { 4083 case 4: {
4279 int Vd, Vm, Vn; 4084 int Vd, Vm, Vn;
4280 if (instr->Bit(6) == 0) { 4085 if (instr->Bit(6) == 0) {
4281 Vd = instr->VFPDRegValue(kDoublePrecision); 4086 Vd = instr->VFPDRegValue(kDoublePrecision);
4282 Vm = instr->VFPMRegValue(kDoublePrecision); 4087 Vm = instr->VFPMRegValue(kDoublePrecision);
4283 Vn = instr->VFPNRegValue(kDoublePrecision); 4088 Vn = instr->VFPNRegValue(kDoublePrecision);
(...skipping 25 matching lines...) Expand all
4309 UNIMPLEMENTED(); 4114 UNIMPLEMENTED();
4310 } 4115 }
4311 break; 4116 break;
4312 } 4117 }
4313 case 0x1: { 4118 case 0x1: {
4314 if (instr->Bits(21, 20) == 2 && instr->Bit(6) == 1 && 4119 if (instr->Bits(21, 20) == 2 && instr->Bit(6) == 1 &&
4315 instr->Bit(4) == 1) { 4120 instr->Bit(4) == 1) {
4316 // vmov Qd, Qm. 4121 // vmov Qd, Qm.
4317 // vorr, Qd, Qm, Qn. 4122 // vorr, Qd, Qm, Qn.
4318 uint32_t src1[4]; 4123 uint32_t src1[4];
4319 get_neon_register(Vm, src1); 4124 get_q_register(Vm, src1);
4320 if (Vm != Vn) { 4125 if (Vm != Vn) {
4321 uint32_t src2[4]; 4126 uint32_t src2[4];
4322 get_neon_register(Vn, src2); 4127 get_q_register(Vn, src2);
4323 for (int i = 0; i < 4; i++) { 4128 for (int i = 0; i < 4; i++) {
4324 src1[i] = src1[i] | src2[i]; 4129 src1[i] = src1[i] | src2[i];
4325 } 4130 }
4326 } 4131 }
4327 set_neon_register(Vd, src1); 4132 set_q_register(Vd, src1);
4328 } else if (instr->Bits(21, 20) == 0 && instr->Bit(6) == 1 && 4133 } else if (instr->Bits(21, 20) == 0 && instr->Bit(6) == 1 &&
4329 instr->Bit(4) == 1) { 4134 instr->Bit(4) == 1) {
4330 // vand Qd, Qm, Qn. 4135 // vand Qd, Qm, Qn.
4331 uint32_t src1[4], src2[4]; 4136 uint32_t src1[4], src2[4];
4332 get_neon_register(Vn, src1); 4137 get_q_register(Vn, src1);
4333 get_neon_register(Vm, src2); 4138 get_q_register(Vm, src2);
4334 for (int i = 0; i < 4; i++) { 4139 for (int i = 0; i < 4; i++) {
4335 src1[i] = src1[i] & src2[i]; 4140 src1[i] = src1[i] & src2[i];
4336 } 4141 }
4337 set_neon_register(Vd, src1); 4142 set_q_register(Vd, src1);
4338 } else { 4143 } else {
4339 UNIMPLEMENTED(); 4144 UNIMPLEMENTED();
4340 } 4145 }
4341 break; 4146 break;
4342 } 4147 }
4343 case 0x2: { 4148 case 0x2: {
4344 if (instr->Bit(4) == 1) { 4149 if (instr->Bit(4) == 1) {
4345 // vqsub.s<size> Qd, Qm, Qn. 4150 // vqsub.s<size> Qd, Qm, Qn.
4346 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); 4151 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
4347 switch (size) { 4152 switch (size) {
(...skipping 13 matching lines...) Expand all
4361 } else { 4166 } else {
4362 UNIMPLEMENTED(); 4167 UNIMPLEMENTED();
4363 } 4168 }
4364 break; 4169 break;
4365 } 4170 }
4366 case 0x3: { 4171 case 0x3: {
4367 // vcge/vcgt.s<size> Qd, Qm, Qn. 4172 // vcge/vcgt.s<size> Qd, Qm, Qn.
4368 bool ge = instr->Bit(4) == 1; 4173 bool ge = instr->Bit(4) == 1;
4369 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); 4174 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
4370 switch (size) { 4175 switch (size) {
4371 case Neon8: 4176 case Neon8: {
4372 CompareGreater<int8_t, kSimd128Size>(this, Vd, Vm, Vn, ge); 4177 int8_t src1[16], src2[16];
4178 get_q_register(Vn, src1);
4179 get_q_register(Vm, src2);
4180 for (int i = 0; i < 16; i++) {
4181 if (ge)
4182 src1[i] = src1[i] >= src2[i] ? 0xFF : 0;
4183 else
4184 src1[i] = src1[i] > src2[i] ? 0xFF : 0;
4185 }
4186 set_q_register(Vd, src1);
4373 break; 4187 break;
4374 case Neon16: 4188 }
4375 CompareGreater<int16_t, kSimd128Size>(this, Vd, Vm, Vn, ge); 4189 case Neon16: {
4190 int16_t src1[8], src2[8];
4191 get_q_register(Vn, src1);
4192 get_q_register(Vm, src2);
4193 for (int i = 0; i < 8; i++) {
4194 if (ge)
4195 src1[i] = src1[i] >= src2[i] ? 0xFFFF : 0;
4196 else
4197 src1[i] = src1[i] > src2[i] ? 0xFFFF : 0;
4198 }
4199 set_q_register(Vd, src1);
4376 break; 4200 break;
4377 case Neon32: 4201 }
4378 CompareGreater<int32_t, kSimd128Size>(this, Vd, Vm, Vn, ge); 4202 case Neon32: {
4203 int32_t src1[4], src2[4];
4204 get_q_register(Vn, src1);
4205 get_q_register(Vm, src2);
4206 for (int i = 0; i < 4; i++) {
4207 if (ge)
4208 src1[i] = src1[i] >= src2[i] ? 0xFFFFFFFF : 0;
4209 else
4210 src1[i] = src1[i] > src2[i] ? 0xFFFFFFFF : 0;
4211 }
4212 set_q_register(Vd, src1);
4379 break; 4213 break;
4214 }
4380 default: 4215 default:
4381 UNREACHABLE(); 4216 UNREACHABLE();
4382 break; 4217 break;
4383 } 4218 }
4384 break; 4219 break;
4385 } 4220 }
4386 case 0x6: { 4221 case 0x6: {
4387 // vmin/vmax.s<size> Qd, Qm, Qn. 4222 // vmin/vmax.s<size> Qd, Qm, Qn.
4388 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); 4223 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
4389 bool min = instr->Bit(4) != 0; 4224 bool min = instr->Bit(4) != 0;
4390 switch (size) { 4225 switch (size) {
4391 case Neon8: 4226 case Neon8: {
4392 MinMax<int8_t, kSimd128Size>(this, Vd, Vm, Vn, min); 4227 int8_t src1[16], src2[16];
4228 get_q_register(Vn, src1);
4229 get_q_register(Vm, src2);
4230 for (int i = 0; i < 16; i++) {
4231 src1[i] = MinMax(src1[i], src2[i], min);
4232 }
4233 set_q_register(Vd, src1);
4393 break; 4234 break;
4394 case Neon16: 4235 }
4395 MinMax<int16_t, kSimd128Size>(this, Vd, Vm, Vn, min); 4236 case Neon16: {
4237 int16_t src1[8], src2[8];
4238 get_q_register(Vn, src1);
4239 get_q_register(Vm, src2);
4240 for (int i = 0; i < 8; i++) {
4241 src1[i] = MinMax(src1[i], src2[i], min);
4242 }
4243 set_q_register(Vd, src1);
4396 break; 4244 break;
4397 case Neon32: 4245 }
4398 MinMax<int32_t, kSimd128Size>(this, Vd, Vm, Vn, min); 4246 case Neon32: {
4247 int32_t src1[4], src2[4];
4248 get_q_register(Vn, src1);
4249 get_q_register(Vm, src2);
4250 for (int i = 0; i < 4; i++) {
4251 src1[i] = MinMax(src1[i], src2[i], min);
4252 }
4253 set_q_register(Vd, src1);
4399 break; 4254 break;
4255 }
4400 default: 4256 default:
4401 UNREACHABLE(); 4257 UNREACHABLE();
4402 break; 4258 break;
4403 } 4259 }
4404 break; 4260 break;
4405 } 4261 }
4406 case 0x8: { 4262 case 0x8: {
4407 // vadd/vtst 4263 // vadd/vtst
4408 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); 4264 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
4409 if (instr->Bit(4) == 0) { 4265 if (instr->Bit(4) == 0) {
4410 // vadd.i<size> Qd, Qm, Qn. 4266 // vadd.i<size> Qd, Qm, Qn.
4411 switch (size) { 4267 switch (size) {
4412 case Neon8: 4268 case Neon8: {
4413 Add<uint8_t, kSimd128Size>(this, Vd, Vm, Vn); 4269 uint8_t src1[16], src2[16];
4270 get_q_register(Vn, src1);
4271 get_q_register(Vm, src2);
4272 for (int i = 0; i < 16; i++) {
4273 src1[i] += src2[i];
4274 }
4275 set_q_register(Vd, src1);
4414 break; 4276 break;
4415 case Neon16: 4277 }
4416 Add<uint16_t, kSimd128Size>(this, Vd, Vm, Vn); 4278 case Neon16: {
4279 uint16_t src1[8], src2[8];
4280 get_q_register(Vn, src1);
4281 get_q_register(Vm, src2);
4282 for (int i = 0; i < 8; i++) {
4283 src1[i] += src2[i];
4284 }
4285 set_q_register(Vd, src1);
4417 break; 4286 break;
4418 case Neon32: 4287 }
4419 Add<uint32_t, kSimd128Size>(this, Vd, Vm, Vn); 4288 case Neon32: {
4289 uint32_t src1[4], src2[4];
4290 get_q_register(Vn, src1);
4291 get_q_register(Vm, src2);
4292 for (int i = 0; i < 4; i++) {
4293 src1[i] += src2[i];
4294 }
4295 set_q_register(Vd, src1);
4420 break; 4296 break;
4297 }
4421 default: 4298 default:
4422 UNREACHABLE(); 4299 UNREACHABLE();
4423 break; 4300 break;
4424 } 4301 }
4425 } else { 4302 } else {
4426 // vtst.i<size> Qd, Qm, Qn. 4303 // vtst.i<size> Qd, Qm, Qn.
4427 switch (size) { 4304 switch (size) {
4428 case Neon8: 4305 case Neon8: {
4429 Test<uint8_t, kSimd128Size>(this, Vd, Vm, Vn); 4306 uint8_t src1[16], src2[16];
4307 get_q_register(Vn, src1);
4308 get_q_register(Vm, src2);
4309 for (int i = 0; i < 16; i++) {
4310 src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFu : 0;
4311 }
4312 set_q_register(Vd, src1);
4430 break; 4313 break;
4431 case Neon16: 4314 }
4432 Test<uint16_t, kSimd128Size>(this, Vd, Vm, Vn); 4315 case Neon16: {
4316 uint16_t src1[8], src2[8];
4317 get_q_register(Vn, src1);
4318 get_q_register(Vm, src2);
4319 for (int i = 0; i < 8; i++) {
4320 src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFFFu : 0;
4321 }
4322 set_q_register(Vd, src1);
4433 break; 4323 break;
4434 case Neon32: 4324 }
4435 Test<uint32_t, kSimd128Size>(this, Vd, Vm, Vn); 4325 case Neon32: {
4326 uint32_t src1[4], src2[4];
4327 get_q_register(Vn, src1);
4328 get_q_register(Vm, src2);
4329 for (int i = 0; i < 4; i++) {
4330 src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFFFFFFFu : 0;
4331 }
4332 set_q_register(Vd, src1);
4436 break; 4333 break;
4334 }
4437 default: 4335 default:
4438 UNREACHABLE(); 4336 UNREACHABLE();
4439 break; 4337 break;
4440 } 4338 }
4441 } 4339 }
4442 break; 4340 break;
4443 } 4341 }
4444 case 0x9: { 4342 case 0x9: {
4445 if (instr->Bit(6) == 1 && instr->Bit(4) == 1) { 4343 if (instr->Bit(6) == 1 && instr->Bit(4) == 1) {
4446 // vmul.i<size> Qd, Qm, Qn. 4344 // vmul.i<size> Qd, Qm, Qn.
4447 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); 4345 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
4448 switch (size) { 4346 switch (size) {
4449 case Neon8: 4347 case Neon8: {
4450 Mul<uint8_t, kSimd128Size>(this, Vd, Vm, Vn); 4348 uint8_t src1[16], src2[16];
4349 get_q_register(Vn, src1);
4350 get_q_register(Vm, src2);
4351 for (int i = 0; i < 16; i++) {
4352 src1[i] *= src2[i];
4353 }
4354 set_q_register(Vd, src1);
4451 break; 4355 break;
4452 case Neon16: 4356 }
4453 Mul<uint16_t, kSimd128Size>(this, Vd, Vm, Vn); 4357 case Neon16: {
4358 uint16_t src1[8], src2[8];
4359 get_q_register(Vn, src1);
4360 get_q_register(Vm, src2);
4361 for (int i = 0; i < 8; i++) {
4362 src1[i] *= src2[i];
4363 }
4364 set_q_register(Vd, src1);
4454 break; 4365 break;
4455 case Neon32: 4366 }
4456 Mul<uint32_t, kSimd128Size>(this, Vd, Vm, Vn); 4367 case Neon32: {
4368 uint32_t src1[4], src2[4];
4369 get_q_register(Vn, src1);
4370 get_q_register(Vm, src2);
4371 for (int i = 0; i < 4; i++) {
4372 src1[i] *= src2[i];
4373 }
4374 set_q_register(Vd, src1);
4457 break; 4375 break;
4376 }
4458 default: 4377 default:
4459 UNREACHABLE(); 4378 UNREACHABLE();
4460 break; 4379 break;
4461 } 4380 }
4462 } else { 4381 } else {
4463 UNIMPLEMENTED(); 4382 UNIMPLEMENTED();
4464 } 4383 }
4465 break; 4384 break;
4466 } 4385 }
4467 case 0xa: { 4386 case 0xa: {
4468 // vpmin/vpmax.s<size> Dd, Dm, Dn. 4387 // vpmin/vpmax.s<size> Dd, Dm, Dn.
4469 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); 4388 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
4470 bool min = instr->Bit(4) != 0; 4389 bool min = instr->Bit(4) != 0;
4471 switch (size) { 4390 switch (size) {
4472 case Neon8: 4391 case Neon8: {
4473 PairwiseMinMax<int8_t>(this, Vd, Vm, Vn, min); 4392 int8_t dst[8], src1[8], src2[8];
4393 get_d_register(Vn, src1);
4394 get_d_register(Vm, src2);
4395 for (int i = 0; i < 4; i++) {
4396 dst[i + 0] = MinMax(src1[i * 2], src1[i * 2 + 1], min);
4397 dst[i + 4] = MinMax(src2[i * 2], src2[i * 2 + 1], min);
4398 }
4399 set_d_register(Vd, dst);
4474 break; 4400 break;
4475 case Neon16: 4401 }
4476 PairwiseMinMax<int16_t>(this, Vd, Vm, Vn, min); 4402 case Neon16: {
4403 int16_t dst[4], src1[4], src2[4];
4404 get_d_register(Vn, src1);
4405 get_d_register(Vm, src2);
4406 for (int i = 0; i < 2; i++) {
4407 dst[i + 0] = MinMax(src1[i * 2], src1[i * 2 + 1], min);
4408 dst[i + 2] = MinMax(src2[i * 2], src2[i * 2 + 1], min);
4409 }
4410 set_d_register(Vd, dst);
4477 break; 4411 break;
4478 case Neon32: 4412 }
4479 PairwiseMinMax<int32_t>(this, Vd, Vm, Vn, min); 4413 case Neon32: {
4414 int32_t dst[2], src1[2], src2[2];
4415 get_d_register(Vn, src1);
4416 get_d_register(Vm, src2);
4417 dst[0] = MinMax(src1[0], src1[1], min);
4418 dst[1] = MinMax(src2[0], src2[1], min);
4419 set_d_register(Vd, dst);
4480 break; 4420 break;
4421 }
4481 default: 4422 default:
4482 UNREACHABLE(); 4423 UNREACHABLE();
4483 break; 4424 break;
4484 } 4425 }
4485 break; 4426 break;
4486 } 4427 }
4487 case 0xd: { 4428 case 0xd: {
4488 if (instr->Bit(4) == 0) { 4429 if (instr->Bit(4) == 0) {
4489 float src1[4], src2[4]; 4430 float src1[4], src2[4];
4490 get_neon_register(Vn, src1); 4431 get_q_register(Vn, src1);
4491 get_neon_register(Vm, src2); 4432 get_q_register(Vm, src2);
4492 for (int i = 0; i < 4; i++) { 4433 for (int i = 0; i < 4; i++) {
4493 if (instr->Bit(21) == 0) { 4434 if (instr->Bit(21) == 0) {
4494 // vadd.f32 Qd, Qm, Qn. 4435 // vadd.f32 Qd, Qm, Qn.
4495 src1[i] = src1[i] + src2[i]; 4436 src1[i] = src1[i] + src2[i];
4496 } else { 4437 } else {
4497 // vsub.f32 Qd, Qm, Qn. 4438 // vsub.f32 Qd, Qm, Qn.
4498 src1[i] = src1[i] - src2[i]; 4439 src1[i] = src1[i] - src2[i];
4499 } 4440 }
4500 } 4441 }
4501 set_neon_register(Vd, src1); 4442 set_q_register(Vd, src1);
4502 } else { 4443 } else {
4503 UNIMPLEMENTED(); 4444 UNIMPLEMENTED();
4504 } 4445 }
4505 break; 4446 break;
4506 } 4447 }
4507 case 0xe: { 4448 case 0xe: {
4508 if (instr->Bits(21, 20) == 0 && instr->Bit(4) == 0) { 4449 if (instr->Bits(21, 20) == 0 && instr->Bit(4) == 0) {
4509 // vceq.f32. 4450 // vceq.f32.
4510 float src1[4], src2[4]; 4451 float src1[4], src2[4];
4511 get_neon_register(Vn, src1); 4452 get_q_register(Vn, src1);
4512 get_neon_register(Vm, src2); 4453 get_q_register(Vm, src2);
4513 uint32_t dst[4]; 4454 uint32_t dst[4];
4514 for (int i = 0; i < 4; i++) { 4455 for (int i = 0; i < 4; i++) {
4515 dst[i] = (src1[i] == src2[i]) ? 0xFFFFFFFF : 0; 4456 dst[i] = (src1[i] == src2[i]) ? 0xFFFFFFFF : 0;
4516 } 4457 }
4517 set_neon_register(Vd, dst); 4458 set_q_register(Vd, dst);
4518 } else { 4459 } else {
4519 UNIMPLEMENTED(); 4460 UNIMPLEMENTED();
4520 } 4461 }
4521 break; 4462 break;
4522 } 4463 }
4523 case 0xf: { 4464 case 0xf: {
4524 if (instr->Bit(20) == 0 && instr->Bit(6) == 1) { 4465 if (instr->Bit(20) == 0 && instr->Bit(6) == 1) {
4525 float src1[4], src2[4]; 4466 float src1[4], src2[4];
4526 get_neon_register(Vn, src1); 4467 get_q_register(Vn, src1);
4527 get_neon_register(Vm, src2); 4468 get_q_register(Vm, src2);
4528 if (instr->Bit(4) == 1) { 4469 if (instr->Bit(4) == 1) {
4529 if (instr->Bit(21) == 0) { 4470 if (instr->Bit(21) == 0) {
4530 // vrecps.f32 Qd, Qm, Qn. 4471 // vrecps.f32 Qd, Qm, Qn.
4531 for (int i = 0; i < 4; i++) { 4472 for (int i = 0; i < 4; i++) {
4532 src1[i] = 2.0f - src1[i] * src2[i]; 4473 src1[i] = 2.0f - src1[i] * src2[i];
4533 } 4474 }
4534 } else { 4475 } else {
4535 // vrsqrts.f32 Qd, Qm, Qn. 4476 // vrsqrts.f32 Qd, Qm, Qn.
4536 for (int i = 0; i < 4; i++) { 4477 for (int i = 0; i < 4; i++) {
4537 src1[i] = (3.0f - src1[i] * src2[i]) * 0.5f; 4478 src1[i] = (3.0f - src1[i] * src2[i]) * 0.5f;
4538 } 4479 }
4539 } 4480 }
4540 } else { 4481 } else {
4541 // vmin/vmax.f32 Qd, Qm, Qn. 4482 // vmin/vmax.f32 Qd, Qm, Qn.
4542 bool min = instr->Bit(21) == 1; 4483 bool min = instr->Bit(21) == 1;
4543 for (int i = 0; i < 4; i++) { 4484 for (int i = 0; i < 4; i++) {
4544 src1[i] = MinMax(src1[i], src2[i], min); 4485 src1[i] = MinMax(src1[i], src2[i], min);
4545 } 4486 }
4546 } 4487 }
4547 set_neon_register(Vd, src1); 4488 set_q_register(Vd, src1);
4548 } else { 4489 } else {
4549 UNIMPLEMENTED(); 4490 UNIMPLEMENTED();
4550 } 4491 }
4551 break; 4492 break;
4552 } 4493 }
4553 default: 4494 default:
4554 UNIMPLEMENTED(); 4495 UNIMPLEMENTED();
4555 break; 4496 break;
4556 } 4497 }
4557 break; 4498 break;
(...skipping 20 matching lines...) Expand all
4578 UNIMPLEMENTED(); 4519 UNIMPLEMENTED();
4579 break; 4520 break;
4580 } 4521 }
4581 } else if (instr->Bits(21, 20) == 3 && instr->Bit(4) == 0) { 4522 } else if (instr->Bits(21, 20) == 3 && instr->Bit(4) == 0) {
4582 // vext. 4523 // vext.
4583 int imm4 = instr->Bits(11, 8); 4524 int imm4 = instr->Bits(11, 8);
4584 int Vd = instr->VFPDRegValue(kSimd128Precision); 4525 int Vd = instr->VFPDRegValue(kSimd128Precision);
4585 int Vm = instr->VFPMRegValue(kSimd128Precision); 4526 int Vm = instr->VFPMRegValue(kSimd128Precision);
4586 int Vn = instr->VFPNRegValue(kSimd128Precision); 4527 int Vn = instr->VFPNRegValue(kSimd128Precision);
4587 uint8_t src1[16], src2[16], dst[16]; 4528 uint8_t src1[16], src2[16], dst[16];
4588 get_neon_register(Vn, src1); 4529 get_q_register(Vn, src1);
4589 get_neon_register(Vm, src2); 4530 get_q_register(Vm, src2);
4590 int boundary = kSimd128Size - imm4; 4531 int boundary = kSimd128Size - imm4;
4591 int i = 0; 4532 int i = 0;
4592 for (; i < boundary; i++) { 4533 for (; i < boundary; i++) {
4593 dst[i] = src1[i + imm4]; 4534 dst[i] = src1[i + imm4];
4594 } 4535 }
4595 for (; i < 16; i++) { 4536 for (; i < 16; i++) {
4596 dst[i] = src2[i - boundary]; 4537 dst[i] = src2[i - boundary];
4597 } 4538 }
4598 set_neon_register(Vd, dst); 4539 set_q_register(Vd, dst);
4599 } else if (instr->Bits(11, 7) == 0xA && instr->Bit(4) == 1) { 4540 } else if (instr->Bits(11, 7) == 0xA && instr->Bit(4) == 1) {
4600 // vshl.i<size> Qd, Qm, shift 4541 // vshl.i<size> Qd, Qm, shift
4601 int size = base::bits::RoundDownToPowerOfTwo32(instr->Bits(21, 16)); 4542 int size = base::bits::RoundDownToPowerOfTwo32(instr->Bits(21, 16));
4602 int shift = instr->Bits(21, 16) - size; 4543 int shift = instr->Bits(21, 16) - size;
4603 int Vd = instr->VFPDRegValue(kSimd128Precision); 4544 int Vd = instr->VFPDRegValue(kSimd128Precision);
4604 int Vm = instr->VFPMRegValue(kSimd128Precision); 4545 int Vm = instr->VFPMRegValue(kSimd128Precision);
4605 NeonSize ns = static_cast<NeonSize>(size / 16); 4546 NeonSize ns = static_cast<NeonSize>(size / 16);
4606 switch (ns) { 4547 switch (ns) {
4607 case Neon8: 4548 case Neon8: {
4608 ShiftLeft<uint8_t, kSimd128Size>(this, Vd, Vm, shift); 4549 uint8_t src[16];
4550 get_q_register(Vm, src);
4551 for (int i = 0; i < 16; i++) {
4552 src[i] <<= shift;
4553 }
4554 set_q_register(Vd, src);
4609 break; 4555 break;
4610 case Neon16: 4556 }
4611 ShiftLeft<uint16_t, kSimd128Size>(this, Vd, Vm, shift); 4557 case Neon16: {
4558 uint16_t src[8];
4559 get_q_register(Vm, src);
4560 for (int i = 0; i < 8; i++) {
4561 src[i] <<= shift;
4562 }
4563 set_q_register(Vd, src);
4612 break; 4564 break;
4613 case Neon32: 4565 }
4614 ShiftLeft<uint32_t, kSimd128Size>(this, Vd, Vm, shift); 4566 case Neon32: {
4567 uint32_t src[4];
4568 get_q_register(Vm, src);
4569 for (int i = 0; i < 4; i++) {
4570 src[i] <<= shift;
4571 }
4572 set_q_register(Vd, src);
4615 break; 4573 break;
4574 }
4616 default: 4575 default:
4617 UNREACHABLE(); 4576 UNREACHABLE();
4618 break; 4577 break;
4619 } 4578 }
4620 } else if (instr->Bits(11, 7) == 0 && instr->Bit(4) == 1) { 4579 } else if (instr->Bits(11, 7) == 0 && instr->Bit(4) == 1) {
4621 // vshr.s<size> Qd, Qm, shift 4580 // vshr.s<size> Qd, Qm, shift
4622 int size = base::bits::RoundDownToPowerOfTwo32(instr->Bits(21, 16)); 4581 int size = base::bits::RoundDownToPowerOfTwo32(instr->Bits(21, 16));
4623 int shift = 2 * size - instr->Bits(21, 16); 4582 int shift = 2 * size - instr->Bits(21, 16);
4624 int Vd = instr->VFPDRegValue(kSimd128Precision); 4583 int Vd = instr->VFPDRegValue(kSimd128Precision);
4625 int Vm = instr->VFPMRegValue(kSimd128Precision); 4584 int Vm = instr->VFPMRegValue(kSimd128Precision);
4626 NeonSize ns = static_cast<NeonSize>(size / 16); 4585 NeonSize ns = static_cast<NeonSize>(size / 16);
4627 switch (ns) { 4586 switch (ns) {
4628 case Neon8: 4587 case Neon8: {
4629 ArithmeticShiftRight<int8_t, kSimd128Size>(this, Vd, Vm, shift); 4588 int8_t src[16];
4589 get_q_register(Vm, src);
4590 for (int i = 0; i < 16; i++) {
4591 src[i] = ArithmeticShiftRight(src[i], shift);
4592 }
4593 set_q_register(Vd, src);
4630 break; 4594 break;
4631 case Neon16: 4595 }
4632 ArithmeticShiftRight<int16_t, kSimd128Size>(this, Vd, Vm, shift); 4596 case Neon16: {
4597 int16_t src[8];
4598 get_q_register(Vm, src);
4599 for (int i = 0; i < 8; i++) {
4600 src[i] = ArithmeticShiftRight(src[i], shift);
4601 }
4602 set_q_register(Vd, src);
4633 break; 4603 break;
4634 case Neon32: 4604 }
4635 ArithmeticShiftRight<int32_t, kSimd128Size>(this, Vd, Vm, shift); 4605 case Neon32: {
4606 int32_t src[4];
4607 get_q_register(Vm, src);
4608 for (int i = 0; i < 4; i++) {
4609 src[i] = ArithmeticShiftRight(src[i], shift);
4610 }
4611 set_q_register(Vd, src);
4636 break; 4612 break;
4613 }
4637 default: 4614 default:
4638 UNREACHABLE(); 4615 UNREACHABLE();
4639 break; 4616 break;
4640 } 4617 }
4641 } else { 4618 } else {
4642 UNIMPLEMENTED(); 4619 UNIMPLEMENTED();
4643 } 4620 }
4644 break; 4621 break;
4645 case 6: { 4622 case 6: {
4646 int Vd, Vm, Vn; 4623 int Vd, Vm, Vn;
(...skipping 27 matching lines...) Expand all
4674 } 4651 }
4675 } else { 4652 } else {
4676 UNIMPLEMENTED(); 4653 UNIMPLEMENTED();
4677 } 4654 }
4678 break; 4655 break;
4679 } 4656 }
4680 case 0x1: { 4657 case 0x1: {
4681 if (instr->Bits(21, 20) == 1 && instr->Bit(4) == 1) { 4658 if (instr->Bits(21, 20) == 1 && instr->Bit(4) == 1) {
4682 // vbsl.size Qd, Qm, Qn. 4659 // vbsl.size Qd, Qm, Qn.
4683 uint32_t dst[4], src1[4], src2[4]; 4660 uint32_t dst[4], src1[4], src2[4];
4684 get_neon_register(Vd, dst); 4661 get_q_register(Vd, dst);
4685 get_neon_register(Vn, src1); 4662 get_q_register(Vn, src1);
4686 get_neon_register(Vm, src2); 4663 get_q_register(Vm, src2);
4687 for (int i = 0; i < 4; i++) { 4664 for (int i = 0; i < 4; i++) {
4688 dst[i] = (dst[i] & src1[i]) | (~dst[i] & src2[i]); 4665 dst[i] = (dst[i] & src1[i]) | (~dst[i] & src2[i]);
4689 } 4666 }
4690 set_neon_register(Vd, dst); 4667 set_q_register(Vd, dst);
4691 } else if (instr->Bits(21, 20) == 0 && instr->Bit(4) == 1) { 4668 } else if (instr->Bits(21, 20) == 0 && instr->Bit(4) == 1) {
4692 if (instr->Bit(6) == 0) { 4669 if (instr->Bit(6) == 0) {
4693 // veor Dd, Dn, Dm 4670 // veor Dd, Dn, Dm
4694 uint64_t src1, src2; 4671 uint64_t src1, src2;
4695 get_d_register(Vn, &src1); 4672 get_d_register(Vn, &src1);
4696 get_d_register(Vm, &src2); 4673 get_d_register(Vm, &src2);
4697 src1 ^= src2; 4674 src1 ^= src2;
4698 set_d_register(Vd, &src1); 4675 set_d_register(Vd, &src1);
4699 4676
4700 } else { 4677 } else {
4701 // veor Qd, Qn, Qm 4678 // veor Qd, Qn, Qm
4702 uint32_t src1[4], src2[4]; 4679 uint32_t src1[4], src2[4];
4703 get_neon_register(Vn, src1); 4680 get_q_register(Vn, src1);
4704 get_neon_register(Vm, src2); 4681 get_q_register(Vm, src2);
4705 for (int i = 0; i < 4; i++) src1[i] ^= src2[i]; 4682 for (int i = 0; i < 4; i++) src1[i] ^= src2[i];
4706 set_neon_register(Vd, src1); 4683 set_q_register(Vd, src1);
4707 } 4684 }
4708 } else { 4685 } else {
4709 UNIMPLEMENTED(); 4686 UNIMPLEMENTED();
4710 } 4687 }
4711 break; 4688 break;
4712 } 4689 }
4713 case 0x2: { 4690 case 0x2: {
4714 if (instr->Bit(4) == 1) { 4691 if (instr->Bit(4) == 1) {
4715 // vqsub.u<size> Qd, Qm, Qn. 4692 // vqsub.u<size> Qd, Qm, Qn.
4716 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); 4693 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
(...skipping 14 matching lines...) Expand all
4731 } else { 4708 } else {
4732 UNIMPLEMENTED(); 4709 UNIMPLEMENTED();
4733 } 4710 }
4734 break; 4711 break;
4735 } 4712 }
4736 case 0x3: { 4713 case 0x3: {
4737 // vcge/vcgt.u<size> Qd, Qm, Qn. 4714 // vcge/vcgt.u<size> Qd, Qm, Qn.
4738 bool ge = instr->Bit(4) == 1; 4715 bool ge = instr->Bit(4) == 1;
4739 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); 4716 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
4740 switch (size) { 4717 switch (size) {
4741 case Neon8: 4718 case Neon8: {
4742 CompareGreater<uint8_t, kSimd128Size>(this, Vd, Vm, Vn, ge); 4719 uint8_t src1[16], src2[16];
4720 get_q_register(Vn, src1);
4721 get_q_register(Vm, src2);
4722 for (int i = 0; i < 16; i++) {
4723 if (ge)
4724 src1[i] = src1[i] >= src2[i] ? 0xFFu : 0;
4725 else
4726 src1[i] = src1[i] > src2[i] ? 0xFFu : 0;
4727 }
4728 set_q_register(Vd, src1);
4743 break; 4729 break;
4744 case Neon16: 4730 }
4745 CompareGreater<uint16_t, kSimd128Size>(this, Vd, Vm, Vn, ge); 4731 case Neon16: {
4732 uint16_t src1[8], src2[8];
4733 get_q_register(Vn, src1);
4734 get_q_register(Vm, src2);
4735 for (int i = 0; i < 8; i++) {
4736 if (ge)
4737 src1[i] = src1[i] >= src2[i] ? 0xFFFFu : 0;
4738 else
4739 src1[i] = src1[i] > src2[i] ? 0xFFFFu : 0;
4740 }
4741 set_q_register(Vd, src1);
4746 break; 4742 break;
4747 case Neon32: 4743 }
4748 CompareGreater<uint32_t, kSimd128Size>(this, Vd, Vm, Vn, ge); 4744 case Neon32: {
4745 uint32_t src1[4], src2[4];
4746 get_q_register(Vn, src1);
4747 get_q_register(Vm, src2);
4748 for (int i = 0; i < 4; i++) {
4749 if (ge)
4750 src1[i] = src1[i] >= src2[i] ? 0xFFFFFFFFu : 0;
4751 else
4752 src1[i] = src1[i] > src2[i] ? 0xFFFFFFFFu : 0;
4753 }
4754 set_q_register(Vd, src1);
4749 break; 4755 break;
4756 }
4750 default: 4757 default:
4751 UNREACHABLE(); 4758 UNREACHABLE();
4752 break; 4759 break;
4753 } 4760 }
4754 break; 4761 break;
4755 } 4762 }
4756 case 0x6: { 4763 case 0x6: {
4757 // vmin/vmax.u<size> Qd, Qm, Qn. 4764 // vmin/vmax.u<size> Qd, Qm, Qn.
4758 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); 4765 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
4759 bool min = instr->Bit(4) != 0; 4766 bool min = instr->Bit(4) != 0;
4760 switch (size) { 4767 switch (size) {
4761 case Neon8: 4768 case Neon8: {
4762 MinMax<uint8_t, kSimd128Size>(this, Vd, Vm, Vn, min); 4769 uint8_t src1[16], src2[16];
4770 get_q_register(Vn, src1);
4771 get_q_register(Vm, src2);
4772 for (int i = 0; i < 16; i++) {
4773 src1[i] = MinMax(src1[i], src2[i], min);
4774 }
4775 set_q_register(Vd, src1);
4763 break; 4776 break;
4764 case Neon16: 4777 }
4765 MinMax<uint16_t, kSimd128Size>(this, Vd, Vm, Vn, min); 4778 case Neon16: {
4779 uint16_t src1[8], src2[8];
4780 get_q_register(Vn, src1);
4781 get_q_register(Vm, src2);
4782 for (int i = 0; i < 8; i++) {
4783 src1[i] = MinMax(src1[i], src2[i], min);
4784 }
4785 set_q_register(Vd, src1);
4766 break; 4786 break;
4767 case Neon32: 4787 }
4768 MinMax<uint32_t, kSimd128Size>(this, Vd, Vm, Vn, min); 4788 case Neon32: {
4789 uint32_t src1[4], src2[4];
4790 get_q_register(Vn, src1);
4791 get_q_register(Vm, src2);
4792 for (int i = 0; i < 4; i++) {
4793 src1[i] = MinMax(src1[i], src2[i], min);
4794 }
4795 set_q_register(Vd, src1);
4769 break; 4796 break;
4797 }
4770 default: 4798 default:
4771 UNREACHABLE(); 4799 UNREACHABLE();
4772 break; 4800 break;
4773 } 4801 }
4774 break; 4802 break;
4775 } 4803 }
4776 case 0x8: { 4804 case 0x8: {
4777 if (instr->Bit(4) == 0) { 4805 if (instr->Bit(4) == 0) {
4778 // vsub.size Qd, Qm, Qn. 4806 // vsub.size Qd, Qm, Qn.
4779 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); 4807 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
4780 switch (size) { 4808 switch (size) {
4781 case Neon8: 4809 case Neon8: {
4782 Sub<uint8_t, kSimd128Size>(this, Vd, Vm, Vn); 4810 uint8_t src1[16], src2[16];
4811 get_q_register(Vn, src1);
4812 get_q_register(Vm, src2);
4813 for (int i = 0; i < 16; i++) {
4814 src1[i] -= src2[i];
4815 }
4816 set_q_register(Vd, src1);
4783 break; 4817 break;
4784 case Neon16: 4818 }
4785 Sub<uint16_t, kSimd128Size>(this, Vd, Vm, Vn); 4819 case Neon16: {
4820 uint16_t src1[8], src2[8];
4821 get_q_register(Vn, src1);
4822 get_q_register(Vm, src2);
4823 for (int i = 0; i < 8; i++) {
4824 src1[i] -= src2[i];
4825 }
4826 set_q_register(Vd, src1);
4786 break; 4827 break;
4787 case Neon32: 4828 }
4788 Sub<uint32_t, kSimd128Size>(this, Vd, Vm, Vn); 4829 case Neon32: {
4830 uint32_t src1[4], src2[4];
4831 get_q_register(Vn, src1);
4832 get_q_register(Vm, src2);
4833 for (int i = 0; i < 4; i++) {
4834 src1[i] -= src2[i];
4835 }
4836 set_q_register(Vd, src1);
4789 break; 4837 break;
4838 }
4790 default: 4839 default:
4791 UNREACHABLE(); 4840 UNREACHABLE();
4792 break; 4841 break;
4793 } 4842 }
4794 } else { 4843 } else {
4795 // vceq.size Qd, Qm, Qn. 4844 // vceq.size Qd, Qm, Qn.
4796 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); 4845 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
4797 switch (size) { 4846 switch (size) {
4798 case Neon8: 4847 case Neon8: {
4799 CompareEqual<uint8_t, kSimd128Size>(this, Vd, Vm, Vn); 4848 uint8_t src1[16], src2[16];
4849 get_q_register(Vn, src1);
4850 get_q_register(Vm, src2);
4851 for (int i = 0; i < 16; i++) {
4852 src1[i] = (src1[i] == src2[i]) ? 0xFFu : 0;
4853 }
4854 set_q_register(Vd, src1);
4800 break; 4855 break;
4801 case Neon16: 4856 }
4802 CompareEqual<uint16_t, kSimd128Size>(this, Vd, Vm, Vn); 4857 case Neon16: {
4858 uint16_t src1[8], src2[8];
4859 get_q_register(Vn, src1);
4860 get_q_register(Vm, src2);
4861 for (int i = 0; i < 8; i++) {
4862 src1[i] = (src1[i] == src2[i]) ? 0xFFFFu : 0;
4863 }
4864 set_q_register(Vd, src1);
4803 break; 4865 break;
4804 case Neon32: 4866 }
4805 CompareEqual<uint32_t, kSimd128Size>(this, Vd, Vm, Vn); 4867 case Neon32: {
4868 uint32_t src1[4], src2[4];
4869 get_q_register(Vn, src1);
4870 get_q_register(Vm, src2);
4871 for (int i = 0; i < 4; i++) {
4872 src1[i] = (src1[i] == src2[i]) ? 0xFFFFFFFFu : 0;
4873 }
4874 set_q_register(Vd, src1);
4806 break; 4875 break;
4876 }
4807 default: 4877 default:
4808 UNREACHABLE(); 4878 UNREACHABLE();
4809 break; 4879 break;
4810 } 4880 }
4811 } 4881 }
4812 break; 4882 break;
4813 } 4883 }
4814 case 0xa: { 4884 case 0xa: {
4815 // vpmin/vpmax.u<size> Dd, Dm, Dn. 4885 // vpmin/vpmax.u<size> Dd, Dm, Dn.
4816 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); 4886 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
4817 bool min = instr->Bit(4) != 0; 4887 bool min = instr->Bit(4) != 0;
4818 switch (size) { 4888 switch (size) {
4819 case Neon8: 4889 case Neon8: {
4820 PairwiseMinMax<uint8_t>(this, Vd, Vm, Vn, min); 4890 uint8_t dst[8], src1[8], src2[8];
4891 get_d_register(Vn, src1);
4892 get_d_register(Vm, src2);
4893 for (int i = 0; i < 4; i++) {
4894 dst[i + 0] = MinMax(src1[i * 2], src1[i * 2 + 1], min);
4895 dst[i + 4] = MinMax(src2[i * 2], src2[i * 2 + 1], min);
4896 }
4897 set_d_register(Vd, dst);
4821 break; 4898 break;
4822 case Neon16: 4899 }
4823 PairwiseMinMax<uint16_t>(this, Vd, Vm, Vn, min); 4900 case Neon16: {
4901 uint16_t dst[4], src1[4], src2[4];
4902 get_d_register(Vn, src1);
4903 get_d_register(Vm, src2);
4904 for (int i = 0; i < 2; i++) {
4905 dst[i + 0] = MinMax(src1[i * 2], src1[i * 2 + 1], min);
4906 dst[i + 2] = MinMax(src2[i * 2], src2[i * 2 + 1], min);
4907 }
4908 set_d_register(Vd, dst);
4824 break; 4909 break;
4825 case Neon32: 4910 }
4826 PairwiseMinMax<uint32_t>(this, Vd, Vm, Vn, min); 4911 case Neon32: {
4912 uint32_t dst[2], src1[2], src2[2];
4913 get_d_register(Vn, src1);
4914 get_d_register(Vm, src2);
4915 dst[0] = MinMax(src1[0], src1[1], min);
4916 dst[1] = MinMax(src2[0], src2[1], min);
4917 set_d_register(Vd, dst);
4827 break; 4918 break;
4919 }
4828 default: 4920 default:
4829 UNREACHABLE(); 4921 UNREACHABLE();
4830 break; 4922 break;
4831 } 4923 }
4832 break; 4924 break;
4833 } 4925 }
4834 case 0xd: { 4926 case 0xd: {
4835 if (instr->Bit(21) == 0 && instr->Bit(6) == 1 && instr->Bit(4) == 1) { 4927 if (instr->Bit(21) == 0 && instr->Bit(6) == 1 && instr->Bit(4) == 1) {
4836 // vmul.f32 Qd, Qn, Qm 4928 // vmul.f32 Qd, Qn, Qm
4837 float src1[4], src2[4]; 4929 float src1[4], src2[4];
4838 get_neon_register(Vn, src1); 4930 get_q_register(Vn, src1);
4839 get_neon_register(Vm, src2); 4931 get_q_register(Vm, src2);
4840 for (int i = 0; i < 4; i++) { 4932 for (int i = 0; i < 4; i++) {
4841 src1[i] = src1[i] * src2[i]; 4933 src1[i] = src1[i] * src2[i];
4842 } 4934 }
4843 set_neon_register(Vd, src1); 4935 set_q_register(Vd, src1);
4844 } else { 4936 } else {
4845 UNIMPLEMENTED(); 4937 UNIMPLEMENTED();
4846 } 4938 }
4847 break; 4939 break;
4848 } 4940 }
4849 case 0xe: { 4941 case 0xe: {
4850 if (instr->Bit(20) == 0 && instr->Bit(4) == 0) { 4942 if (instr->Bit(20) == 0 && instr->Bit(4) == 0) {
4851 // vcge/vcgt.f32 Qd, Qm, Qn 4943 // vcge/vcgt.f32 Qd, Qm, Qn
4852 bool ge = instr->Bit(21) == 0; 4944 bool ge = instr->Bit(21) == 0;
4853 float src1[4], src2[4]; 4945 float src1[4], src2[4];
4854 get_neon_register(Vn, src1); 4946 get_q_register(Vn, src1);
4855 get_neon_register(Vm, src2); 4947 get_q_register(Vm, src2);
4856 uint32_t dst[4]; 4948 uint32_t dst[4];
4857 for (int i = 0; i < 4; i++) { 4949 for (int i = 0; i < 4; i++) {
4858 if (ge) { 4950 if (ge) {
4859 dst[i] = src1[i] >= src2[i] ? 0xFFFFFFFFu : 0; 4951 dst[i] = src1[i] >= src2[i] ? 0xFFFFFFFFu : 0;
4860 } else { 4952 } else {
4861 dst[i] = src1[i] > src2[i] ? 0xFFFFFFFFu : 0; 4953 dst[i] = src1[i] > src2[i] ? 0xFFFFFFFFu : 0;
4862 } 4954 }
4863 } 4955 }
4864 set_neon_register(Vd, dst); 4956 set_q_register(Vd, dst);
4865 } else { 4957 } else {
4866 UNIMPLEMENTED(); 4958 UNIMPLEMENTED();
4867 } 4959 }
4868 break; 4960 break;
4869 } 4961 }
4870 default: 4962 default:
4871 UNREACHABLE(); 4963 UNREACHABLE();
4872 break; 4964 break;
4873 } 4965 }
4874 break; 4966 break;
(...skipping 20 matching lines...) Expand all
4895 UNIMPLEMENTED(); 4987 UNIMPLEMENTED();
4896 break; 4988 break;
4897 } 4989 }
4898 } else if (instr->Opc1Value() == 7 && instr->Bit(4) == 0) { 4990 } else if (instr->Opc1Value() == 7 && instr->Bit(4) == 0) {
4899 if (instr->Bits(19, 16) == 0xB && instr->Bits(11, 9) == 0x3 && 4991 if (instr->Bits(19, 16) == 0xB && instr->Bits(11, 9) == 0x3 &&
4900 instr->Bit(6) == 1) { 4992 instr->Bit(6) == 1) {
4901 // vcvt.<Td>.<Tm> Qd, Qm. 4993 // vcvt.<Td>.<Tm> Qd, Qm.
4902 int Vd = instr->VFPDRegValue(kSimd128Precision); 4994 int Vd = instr->VFPDRegValue(kSimd128Precision);
4903 int Vm = instr->VFPMRegValue(kSimd128Precision); 4995 int Vm = instr->VFPMRegValue(kSimd128Precision);
4904 uint32_t q_data[4]; 4996 uint32_t q_data[4];
4905 get_neon_register(Vm, q_data); 4997 get_q_register(Vm, q_data);
4906 int op = instr->Bits(8, 7); 4998 int op = instr->Bits(8, 7);
4907 for (int i = 0; i < 4; i++) { 4999 for (int i = 0; i < 4; i++) {
4908 switch (op) { 5000 switch (op) {
4909 case 0: 5001 case 0:
4910 // f32 <- s32, round towards nearest. 5002 // f32 <- s32, round towards nearest.
4911 q_data[i] = bit_cast<uint32_t>(std::round( 5003 q_data[i] = bit_cast<uint32_t>(std::round(
4912 static_cast<float>(bit_cast<int32_t>(q_data[i])))); 5004 static_cast<float>(bit_cast<int32_t>(q_data[i]))));
4913 break; 5005 break;
4914 case 1: 5006 case 1:
4915 // f32 <- u32, round towards nearest. 5007 // f32 <- u32, round towards nearest.
4916 q_data[i] = bit_cast<uint32_t>( 5008 q_data[i] = bit_cast<uint32_t>(
4917 std::round(static_cast<float>(q_data[i]))); 5009 std::round(static_cast<float>(q_data[i])));
4918 break; 5010 break;
4919 case 2: 5011 case 2:
4920 // s32 <- f32, round to zero. 5012 // s32 <- f32, round to zero.
4921 q_data[i] = static_cast<uint32_t>( 5013 q_data[i] = static_cast<uint32_t>(
4922 ConvertDoubleToInt(bit_cast<float>(q_data[i]), false, RZ)); 5014 ConvertDoubleToInt(bit_cast<float>(q_data[i]), false, RZ));
4923 break; 5015 break;
4924 case 3: 5016 case 3:
4925 // u32 <- f32, round to zero. 5017 // u32 <- f32, round to zero.
4926 q_data[i] = static_cast<uint32_t>( 5018 q_data[i] = static_cast<uint32_t>(
4927 ConvertDoubleToInt(bit_cast<float>(q_data[i]), true, RZ)); 5019 ConvertDoubleToInt(bit_cast<float>(q_data[i]), true, RZ));
4928 break; 5020 break;
4929 } 5021 }
4930 } 5022 }
4931 set_neon_register(Vd, q_data); 5023 set_q_register(Vd, q_data);
4932 } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 7) == 0) { 5024 } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 7) == 0) {
4933 if (instr->Bit(6) == 0) { 5025 if (instr->Bit(6) == 0) {
4934 // vswp Dd, Dm. 5026 // vswp Dd, Dm.
4935 uint64_t dval, mval; 5027 uint64_t dval, mval;
4936 int vd = instr->VFPDRegValue(kDoublePrecision); 5028 int vd = instr->VFPDRegValue(kDoublePrecision);
4937 int vm = instr->VFPMRegValue(kDoublePrecision); 5029 int vm = instr->VFPMRegValue(kDoublePrecision);
4938 get_d_register(vd, &dval); 5030 get_d_register(vd, &dval);
4939 get_d_register(vm, &mval); 5031 get_d_register(vm, &mval);
4940 set_d_register(vm, &dval); 5032 set_d_register(vm, &dval);
4941 set_d_register(vd, &mval); 5033 set_d_register(vd, &mval);
4942 } else { 5034 } else {
4943 // vswp Qd, Qm. 5035 // vswp Qd, Qm.
4944 uint32_t dval[4], mval[4]; 5036 uint32_t dval[4], mval[4];
4945 int vd = instr->VFPDRegValue(kSimd128Precision); 5037 int vd = instr->VFPDRegValue(kSimd128Precision);
4946 int vm = instr->VFPMRegValue(kSimd128Precision); 5038 int vm = instr->VFPMRegValue(kSimd128Precision);
4947 get_neon_register(vd, dval); 5039 get_q_register(vd, dval);
4948 get_neon_register(vm, mval); 5040 get_q_register(vm, mval);
4949 set_neon_register(vm, dval); 5041 set_q_register(vm, dval);
4950 set_neon_register(vd, mval); 5042 set_q_register(vd, mval);
4951 } 5043 }
4952 } else if (instr->Bits(11, 7) == 0x18) { 5044 } else if (instr->Bits(11, 7) == 0x18) {
4953 // vdup.32 Qd, Sm. 5045 // vdup.32 Qd, Sm.
4954 int vd = instr->VFPDRegValue(kSimd128Precision); 5046 int vd = instr->VFPDRegValue(kSimd128Precision);
4955 int vm = instr->VFPMRegValue(kDoublePrecision); 5047 int vm = instr->VFPMRegValue(kDoublePrecision);
4956 int index = instr->Bit(19); 5048 int index = instr->Bit(19);
4957 uint32_t s_data = get_s_register(vm * 2 + index); 5049 uint32_t s_data = get_s_register(vm * 2 + index);
4958 uint32_t q_data[4]; 5050 uint32_t q_data[4];
4959 for (int i = 0; i < 4; i++) q_data[i] = s_data; 5051 for (int i = 0; i < 4; i++) q_data[i] = s_data;
4960 set_neon_register(vd, q_data); 5052 set_q_register(vd, q_data);
4961 } else if (instr->Bits(19, 16) == 0 && instr->Bits(11, 6) == 0x17) { 5053 } else if (instr->Bits(19, 16) == 0 && instr->Bits(11, 6) == 0x17) {
4962 // vmvn Qd, Qm. 5054 // vmvn Qd, Qm.
4963 int vd = instr->VFPDRegValue(kSimd128Precision); 5055 int vd = instr->VFPDRegValue(kSimd128Precision);
4964 int vm = instr->VFPMRegValue(kSimd128Precision); 5056 int vm = instr->VFPMRegValue(kSimd128Precision);
4965 uint32_t q_data[4]; 5057 uint32_t q_data[4];
4966 get_neon_register(vm, q_data); 5058 get_q_register(vm, q_data);
4967 for (int i = 0; i < 4; i++) q_data[i] = ~q_data[i]; 5059 for (int i = 0; i < 4; i++) q_data[i] = ~q_data[i];
4968 set_neon_register(vd, q_data); 5060 set_q_register(vd, q_data);
4969 } else if (instr->Bits(11, 10) == 0x2) { 5061 } else if (instr->Bits(11, 10) == 0x2) {
4970 // vtb[l,x] Dd, <list>, Dm. 5062 // vtb[l,x] Dd, <list>, Dm.
4971 int vd = instr->VFPDRegValue(kDoublePrecision); 5063 int vd = instr->VFPDRegValue(kDoublePrecision);
4972 int vn = instr->VFPNRegValue(kDoublePrecision); 5064 int vn = instr->VFPNRegValue(kDoublePrecision);
4973 int vm = instr->VFPMRegValue(kDoublePrecision); 5065 int vm = instr->VFPMRegValue(kDoublePrecision);
4974 int table_len = (instr->Bits(9, 8) + 1) * kDoubleSize; 5066 int table_len = (instr->Bits(9, 8) + 1) * kDoubleSize;
4975 bool vtbx = instr->Bit(6) != 0; // vtbl / vtbx 5067 bool vtbx = instr->Bit(6) != 0; // vtbl / vtbx
4976 uint64_t destination = 0, indices = 0, result = 0; 5068 uint64_t destination = 0, indices = 0, result = 0;
4977 get_d_register(vd, &destination); 5069 get_d_register(vd, &destination);
4978 get_d_register(vm, &indices); 5070 get_d_register(vm, &indices);
4979 for (int i = 0; i < kDoubleSize; i++) { 5071 for (int i = 0; i < kDoubleSize; i++) {
4980 int shift = i * kBitsPerByte; 5072 int shift = i * kBitsPerByte;
4981 int index = (indices >> shift) & 0xFF; 5073 int index = (indices >> shift) & 0xFF;
4982 if (index < table_len) { 5074 if (index < table_len) {
4983 uint64_t table; 5075 uint64_t table;
4984 get_d_register(vn + index / kDoubleSize, &table); 5076 get_d_register(vn + index / kDoubleSize, &table);
4985 result |= 5077 result |=
4986 ((table >> ((index % kDoubleSize) * kBitsPerByte)) & 0xFF) 5078 ((table >> ((index % kDoubleSize) * kBitsPerByte)) & 0xFF)
4987 << shift; 5079 << shift;
4988 } else if (vtbx) { 5080 } else if (vtbx) {
4989 result |= destination & (0xFFull << shift); 5081 result |= destination & (0xFFull << shift);
4990 } 5082 }
4991 } 5083 }
4992 set_d_register(vd, &result); 5084 set_d_register(vd, &result);
4993 } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 8) == 0x1) { 5085 } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 8) == 0x1 &&
5086 instr->Bit(6) == 1) {
4994 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); 5087 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18));
4995 if (instr->Bit(6) == 0) { 5088 int Vd = instr->VFPDRegValue(kSimd128Precision);
4996 int Vd = instr->VFPDRegValue(kDoublePrecision); 5089 int Vm = instr->VFPMRegValue(kSimd128Precision);
4997 int Vm = instr->VFPMRegValue(kDoublePrecision); 5090 if (instr->Bit(7) == 1) {
4998 if (instr->Bit(7) == 1) { 5091 // vzip.<size> Qd, Qm.
4999 // vzip.<size> Dd, Dm. 5092 switch (size) {
5000 switch (size) { 5093 case Neon8: {
5001 case Neon8: 5094 uint8_t src1[16], src2[16], dst1[16], dst2[16];
5002 Zip<uint8_t, kDoubleSize>(this, Vd, Vm); 5095 get_q_register(Vd, src1);
5003 break; 5096 get_q_register(Vm, src2);
5004 case Neon16: 5097 for (int i = 0; i < 8; i++) {
5005 Zip<uint16_t, kDoubleSize>(this, Vd, Vm); 5098 dst1[i * 2] = src1[i];
5006 break; 5099 dst1[i * 2 + 1] = src2[i];
5007 case Neon32: 5100 dst2[i * 2] = src1[i + 8];
5008 Zip<uint32_t, kDoubleSize>(this, Vd, Vm); 5101 dst2[i * 2 + 1] = src2[i + 8];
5009 break; 5102 }
5010 default: 5103 set_q_register(Vd, dst1);
5011 UNREACHABLE(); 5104 set_q_register(Vm, dst2);
5012 break; 5105 break;
5013 } 5106 }
5014 } else { 5107 case Neon16: {
5015 // vuzp.<size> Dd, Dm. 5108 uint16_t src1[8], src2[8], dst1[8], dst2[8];
5016 switch (size) { 5109 get_q_register(Vd, src1);
5017 case Neon8: 5110 get_q_register(Vm, src2);
5018 Unzip<uint8_t, kDoubleSize>(this, Vd, Vm); 5111 for (int i = 0; i < 4; i++) {
5019 break; 5112 dst1[i * 2] = src1[i];
5020 case Neon16: 5113 dst1[i * 2 + 1] = src2[i];
5021 Unzip<uint16_t, kDoubleSize>(this, Vd, Vm); 5114 dst2[i * 2] = src1[i + 4];
5022 break; 5115 dst2[i * 2 + 1] = src2[i + 4];
5023 case Neon32: 5116 }
5024 Unzip<uint32_t, kDoubleSize>(this, Vd, Vm); 5117 set_q_register(Vd, dst1);
5025 break; 5118 set_q_register(Vm, dst2);
5026 default: 5119 break;
5027 UNREACHABLE();
5028 break;
5029 } 5120 }
5121 case Neon32: {
5122 uint32_t src1[4], src2[4], dst1[4], dst2[4];
5123 get_q_register(Vd, src1);
5124 get_q_register(Vm, src2);
5125 for (int i = 0; i < 2; i++) {
5126 dst1[i * 2] = src1[i];
5127 dst1[i * 2 + 1] = src2[i];
5128 dst2[i * 2] = src1[i + 2];
5129 dst2[i * 2 + 1] = src2[i + 2];
5130 }
5131 set_q_register(Vd, dst1);
5132 set_q_register(Vm, dst2);
5133 break;
5134 }
5135 default:
5136 UNREACHABLE();
5137 break;
5030 } 5138 }
5031 } else { 5139 } else {
5032 int Vd = instr->VFPDRegValue(kSimd128Precision); 5140 // vuzp.<size> Qd, Qm.
5033 int Vm = instr->VFPMRegValue(kSimd128Precision); 5141 switch (size) {
5034 if (instr->Bit(7) == 1) { 5142 case Neon8: {
5035 // vzip.<size> Qd, Qm. 5143 uint8_t src1[16], src2[16], dst1[16], dst2[16];
5036 switch (size) { 5144 get_q_register(Vd, src1);
5037 case Neon8: 5145 get_q_register(Vm, src2);
5038 Zip<uint8_t, kSimd128Size>(this, Vd, Vm); 5146 for (int i = 0; i < 8; i++) {
5039 break; 5147 dst1[i] = src1[i * 2];
5040 case Neon16: 5148 dst1[i + 8] = src2[i * 2];
5041 Zip<uint16_t, kSimd128Size>(this, Vd, Vm); 5149 dst2[i] = src1[i * 2 + 1];
5042 break; 5150 dst2[i + 8] = src2[i * 2 + 1];
5043 case Neon32: 5151 }
5044 Zip<uint32_t, kSimd128Size>(this, Vd, Vm); 5152 set_q_register(Vd, dst1);
5045 break; 5153 set_q_register(Vm, dst2);
5046 default: 5154 break;
5047 UNREACHABLE();
5048 break;
5049 } 5155 }
5050 } else { 5156 case Neon16: {
5051 // vuzp.<size> Qd, Qm. 5157 uint16_t src1[8], src2[8], dst1[8], dst2[8];
5052 switch (size) { 5158 get_q_register(Vd, src1);
5053 case Neon8: 5159 get_q_register(Vm, src2);
5054 Unzip<uint8_t, kSimd128Size>(this, Vd, Vm); 5160 for (int i = 0; i < 4; i++) {
5055 break; 5161 dst1[i] = src1[i * 2];
5056 case Neon16: 5162 dst1[i + 4] = src2[i * 2];
5057 Unzip<uint16_t, kSimd128Size>(this, Vd, Vm); 5163 dst2[i] = src1[i * 2 + 1];
5058 break; 5164 dst2[i + 4] = src2[i * 2 + 1];
5059 case Neon32: 5165 }
5060 Unzip<uint32_t, kSimd128Size>(this, Vd, Vm); 5166 set_q_register(Vd, dst1);
5061 break; 5167 set_q_register(Vm, dst2);
5062 default: 5168 break;
5063 UNREACHABLE();
5064 break;
5065 } 5169 }
5170 case Neon32: {
5171 uint32_t src1[4], src2[4], dst1[4], dst2[4];
5172 get_q_register(Vd, src1);
5173 get_q_register(Vm, src2);
5174 for (int i = 0; i < 2; i++) {
5175 dst1[i] = src1[i * 2];
5176 dst1[i + 2] = src2[i * 2];
5177 dst2[i] = src1[i * 2 + 1];
5178 dst2[i + 2] = src2[i * 2 + 1];
5179 }
5180 set_q_register(Vd, dst1);
5181 set_q_register(Vm, dst2);
5182 break;
5183 }
5184 default:
5185 UNREACHABLE();
5186 break;
5066 } 5187 }
5067 } 5188 }
5068 } else if (instr->Bits(17, 16) == 0 && instr->Bits(11, 9) == 0) { 5189 } else if (instr->Bits(17, 16) == 0 && instr->Bits(11, 9) == 0) {
5069 // vrev<op>.size Qd, Qm 5190 // vrev<op>.size Qd, Qm
5070 int Vd = instr->VFPDRegValue(kSimd128Precision); 5191 int Vd = instr->VFPDRegValue(kSimd128Precision);
5071 int Vm = instr->VFPMRegValue(kSimd128Precision); 5192 int Vm = instr->VFPMRegValue(kSimd128Precision);
5072 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); 5193 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18));
5073 NeonSize op = static_cast<NeonSize>(static_cast<int>(Neon64) - 5194 NeonSize op = static_cast<NeonSize>(static_cast<int>(Neon64) -
5074 instr->Bits(8, 7)); 5195 instr->Bits(8, 7));
5075 switch (op) { 5196 switch (op) {
5076 case Neon16: { 5197 case Neon16: {
5077 DCHECK_EQ(Neon8, size); 5198 DCHECK_EQ(Neon8, size);
5078 uint8_t src[16]; 5199 uint8_t src[16];
5079 get_neon_register(Vm, src); 5200 get_q_register(Vm, src);
5080 for (int i = 0; i < 16; i += 2) { 5201 for (int i = 0; i < 16; i += 2) {
5081 std::swap(src[i], src[i + 1]); 5202 std::swap(src[i], src[i + 1]);
5082 } 5203 }
5083 set_neon_register(Vd, src); 5204 set_q_register(Vd, src);
5084 break; 5205 break;
5085 } 5206 }
5086 case Neon32: { 5207 case Neon32: {
5087 switch (size) { 5208 switch (size) {
5088 case Neon16: { 5209 case Neon16: {
5089 uint16_t src[8]; 5210 uint16_t src[8];
5090 get_neon_register(Vm, src); 5211 get_q_register(Vm, src);
5091 for (int i = 0; i < 8; i += 2) { 5212 for (int i = 0; i < 8; i += 2) {
5092 std::swap(src[i], src[i + 1]); 5213 std::swap(src[i], src[i + 1]);
5093 } 5214 }
5094 set_neon_register(Vd, src); 5215 set_q_register(Vd, src);
5095 break; 5216 break;
5096 } 5217 }
5097 case Neon8: { 5218 case Neon8: {
5098 uint8_t src[16]; 5219 uint8_t src[16];
5099 get_neon_register(Vm, src); 5220 get_q_register(Vm, src);
5100 for (int i = 0; i < 4; i++) { 5221 for (int i = 0; i < 4; i++) {
5101 std::swap(src[i * 4], src[i * 4 + 3]); 5222 std::swap(src[i * 4], src[i * 4 + 3]);
5102 std::swap(src[i * 4 + 1], src[i * 4 + 2]); 5223 std::swap(src[i * 4 + 1], src[i * 4 + 2]);
5103 } 5224 }
5104 set_neon_register(Vd, src); 5225 set_q_register(Vd, src);
5105 break; 5226 break;
5106 } 5227 }
5107 default: 5228 default:
5108 UNREACHABLE(); 5229 UNREACHABLE();
5109 break; 5230 break;
5110 } 5231 }
5111 break; 5232 break;
5112 } 5233 }
5113 case Neon64: { 5234 case Neon64: {
5114 switch (size) { 5235 switch (size) {
5115 case Neon32: { 5236 case Neon32: {
5116 uint32_t src[4]; 5237 uint32_t src[4];
5117 get_neon_register(Vm, src); 5238 get_q_register(Vm, src);
5118 std::swap(src[0], src[1]); 5239 std::swap(src[0], src[1]);
5119 std::swap(src[2], src[3]); 5240 std::swap(src[2], src[3]);
5120 set_neon_register(Vd, src); 5241 set_q_register(Vd, src);
5121 break; 5242 break;
5122 } 5243 }
5123 case Neon16: { 5244 case Neon16: {
5124 uint16_t src[8]; 5245 uint16_t src[8];
5125 get_neon_register(Vm, src); 5246 get_q_register(Vm, src);
5126 for (int i = 0; i < 4; i++) { 5247 for (int i = 0; i < 4; i++) {
5127 std::swap(src[i * 4], src[i * 4 + 3]); 5248 std::swap(src[i * 4], src[i * 4 + 3]);
5128 std::swap(src[i * 4 + 1], src[i * 4 + 2]); 5249 std::swap(src[i * 4 + 1], src[i * 4 + 2]);
5129 } 5250 }
5130 set_neon_register(Vd, src); 5251 set_q_register(Vd, src);
5131 break; 5252 break;
5132 } 5253 }
5133 case Neon8: { 5254 case Neon8: {
5134 uint8_t src[16]; 5255 uint8_t src[16];
5135 get_neon_register(Vm, src); 5256 get_q_register(Vm, src);
5136 for (int i = 0; i < 4; i++) { 5257 for (int i = 0; i < 4; i++) {
5137 std::swap(src[i], src[7 - i]); 5258 std::swap(src[i], src[7 - i]);
5138 std::swap(src[i + 8], src[15 - i]); 5259 std::swap(src[i + 8], src[15 - i]);
5139 } 5260 }
5140 set_neon_register(Vd, src); 5261 set_q_register(Vd, src);
5141 break; 5262 break;
5142 } 5263 }
5143 default: 5264 default:
5144 UNREACHABLE(); 5265 UNREACHABLE();
5145 break; 5266 break;
5146 } 5267 }
5147 break; 5268 break;
5148 } 5269 }
5149 default: 5270 default:
5150 UNREACHABLE(); 5271 UNREACHABLE();
5151 break; 5272 break;
5152 } 5273 }
5153 } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 7) == 0x1) { 5274 } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 6) == 0x3) {
5275 int Vd = instr->VFPDRegValue(kSimd128Precision);
5276 int Vm = instr->VFPMRegValue(kSimd128Precision);
5154 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); 5277 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18));
5155 if (instr->Bit(6) == 0) { 5278 // vtrn.<size> Qd, Qm.
5156 int Vd = instr->VFPDRegValue(kDoublePrecision); 5279 switch (size) {
5157 int Vm = instr->VFPMRegValue(kDoublePrecision); 5280 case Neon8: {
5158 // vtrn.<size> Dd, Dm. 5281 uint8_t src[16], dst[16];
5159 switch (size) { 5282 get_q_register(Vd, dst);
5160 case Neon8: 5283 get_q_register(Vm, src);
5161 Transpose<uint8_t, kDoubleSize>(this, Vd, Vm); 5284 for (int i = 0; i < 8; i++) {
5162 break; 5285 std::swap(dst[2 * i + 1], src[2 * i]);
5163 case Neon16: 5286 }
5164 Transpose<uint16_t, kDoubleSize>(this, Vd, Vm); 5287 set_q_register(Vd, dst);
5165 break; 5288 set_q_register(Vm, src);
5166 case Neon32: 5289 break;
5167 Transpose<uint32_t, kDoubleSize>(this, Vd, Vm);
5168 break;
5169 default:
5170 UNREACHABLE();
5171 break;
5172 } 5290 }
5173 } else { 5291 case Neon16: {
5174 int Vd = instr->VFPDRegValue(kSimd128Precision); 5292 uint16_t src[8], dst[8];
5175 int Vm = instr->VFPMRegValue(kSimd128Precision); 5293 get_q_register(Vd, dst);
5176 // vtrn.<size> Qd, Qm. 5294 get_q_register(Vm, src);
5177 switch (size) { 5295 for (int i = 0; i < 4; i++) {
5178 case Neon8: 5296 std::swap(dst[2 * i + 1], src[2 * i]);
5179 Transpose<uint8_t, kSimd128Size>(this, Vd, Vm); 5297 }
5180 break; 5298 set_q_register(Vd, dst);
5181 case Neon16: 5299 set_q_register(Vm, src);
5182 Transpose<uint16_t, kSimd128Size>(this, Vd, Vm); 5300 break;
5183 break;
5184 case Neon32:
5185 Transpose<uint32_t, kSimd128Size>(this, Vd, Vm);
5186 break;
5187 default:
5188 UNREACHABLE();
5189 break;
5190 } 5301 }
5302 case Neon32: {
5303 uint32_t src[4], dst[4];
5304 get_q_register(Vd, dst);
5305 get_q_register(Vm, src);
5306 for (int i = 0; i < 2; i++) {
5307 std::swap(dst[2 * i + 1], src[2 * i]);
5308 }
5309 set_q_register(Vd, dst);
5310 set_q_register(Vm, src);
5311 break;
5312 }
5313 default:
5314 UNREACHABLE();
5315 break;
5191 } 5316 }
5192 } else if (instr->Bits(17, 16) == 0x1 && instr->Bit(11) == 0) { 5317 } else if (instr->Bits(17, 16) == 0x1 && instr->Bit(11) == 0) {
5193 int Vd = instr->VFPDRegValue(kSimd128Precision); 5318 int Vd = instr->VFPDRegValue(kSimd128Precision);
5194 int Vm = instr->VFPMRegValue(kSimd128Precision); 5319 int Vm = instr->VFPMRegValue(kSimd128Precision);
5195 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); 5320 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18));
5196 if (instr->Bits(9, 6) == 0xd) { 5321 if (instr->Bits(9, 6) == 0xd) {
5197 // vabs<type>.<size> Qd, Qm 5322 // vabs<type>.<size> Qd, Qm
5198 if (instr->Bit(10) != 0) { 5323 if (instr->Bit(10) != 0) {
5199 // floating point (clear sign bits) 5324 // floating point (clear sign bits)
5200 uint32_t src[4]; 5325 uint32_t src[4];
5201 get_neon_register(Vm, src); 5326 get_q_register(Vm, src);
5202 for (int i = 0; i < 4; i++) { 5327 for (int i = 0; i < 4; i++) {
5203 src[i] &= ~0x80000000; 5328 src[i] &= ~0x80000000;
5204 } 5329 }
5205 set_neon_register(Vd, src); 5330 set_q_register(Vd, src);
5206 } else { 5331 } else {
5207 // signed integer 5332 // signed integer
5208 switch (size) { 5333 switch (size) {
5209 case Neon8: 5334 case Neon8: {
5210 Abs<int8_t, kSimd128Size>(this, Vd, Vm); 5335 int8_t src[16];
5336 get_q_register(Vm, src);
5337 for (int i = 0; i < 16; i++) {
5338 src[i] = std::abs(src[i]);
5339 }
5340 set_q_register(Vd, src);
5211 break; 5341 break;
5212 case Neon16: 5342 }
5213 Abs<int16_t, kSimd128Size>(this, Vd, Vm); 5343 case Neon16: {
5344 int16_t src[8];
5345 get_q_register(Vm, src);
5346 for (int i = 0; i < 8; i++) {
5347 src[i] = std::abs(src[i]);
5348 }
5349 set_q_register(Vd, src);
5214 break; 5350 break;
5215 case Neon32: 5351 }
5216 Abs<int32_t, kSimd128Size>(this, Vd, Vm); 5352 case Neon32: {
5353 int32_t src[4];
5354 get_q_register(Vm, src);
5355 for (int i = 0; i < 4; i++) {
5356 src[i] = std::abs(src[i]);
5357 }
5358 set_q_register(Vd, src);
5217 break; 5359 break;
5360 }
5218 default: 5361 default:
5219 UNIMPLEMENTED(); 5362 UNIMPLEMENTED();
5220 break; 5363 break;
5221 } 5364 }
5222 } 5365 }
5223 } else if (instr->Bits(9, 6) == 0xf) { 5366 } else if (instr->Bits(9, 6) == 0xf) {
5224 // vneg<type>.<size> Qd, Qm (signed integer) 5367 // vneg<type>.<size> Qd, Qm (signed integer)
5225 if (instr->Bit(10) != 0) { 5368 if (instr->Bit(10) != 0) {
5226 // floating point (toggle sign bits) 5369 // floating point (toggle sign bits)
5227 uint32_t src[4]; 5370 uint32_t src[4];
5228 get_neon_register(Vm, src); 5371 get_q_register(Vm, src);
5229 for (int i = 0; i < 4; i++) { 5372 for (int i = 0; i < 4; i++) {
5230 src[i] ^= 0x80000000; 5373 src[i] ^= 0x80000000;
5231 } 5374 }
5232 set_neon_register(Vd, src); 5375 set_q_register(Vd, src);
5233 } else { 5376 } else {
5234 // signed integer 5377 // signed integer
5235 switch (size) { 5378 switch (size) {
5236 case Neon8: 5379 case Neon8: {
5237 Neg<int8_t, kSimd128Size>(this, Vd, Vm); 5380 int8_t src[16];
5381 get_q_register(Vm, src);
5382 for (int i = 0; i < 16; i++) {
5383 src[i] = -src[i];
5384 }
5385 set_q_register(Vd, src);
5238 break; 5386 break;
5387 }
5239 case Neon16: 5388 case Neon16:
5240 Neg<int16_t, kSimd128Size>(this, Vd, Vm); 5389 int16_t src[8];
5390 get_q_register(Vm, src);
5391 for (int i = 0; i < 8; i++) {
5392 src[i] = -src[i];
5393 }
5394 set_q_register(Vd, src);
5241 break; 5395 break;
5242 case Neon32: 5396 case Neon32: {
5243 Neg<int32_t, kSimd128Size>(this, Vd, Vm); 5397 int32_t src[4];
5398 get_q_register(Vm, src);
5399 for (int i = 0; i < 4; i++) {
5400 src[i] = -src[i];
5401 }
5402 set_q_register(Vd, src);
5244 break; 5403 break;
5404 }
5245 default: 5405 default:
5246 UNIMPLEMENTED(); 5406 UNIMPLEMENTED();
5247 break; 5407 break;
5248 } 5408 }
5249 } 5409 }
5250 } else { 5410 } else {
5251 UNIMPLEMENTED(); 5411 UNIMPLEMENTED();
5252 } 5412 }
5253 } else if (instr->Bits(19, 18) == 0x2 && instr->Bits(11, 8) == 0x5) { 5413 } else if (instr->Bits(19, 18) == 0x2 && instr->Bits(11, 8) == 0x5) {
5254 // vrecpe/vrsqrte.f32 Qd, Qm. 5414 // vrecpe/vrsqrte.f32 Qd, Qm.
5255 int Vd = instr->VFPDRegValue(kSimd128Precision); 5415 int Vd = instr->VFPDRegValue(kSimd128Precision);
5256 int Vm = instr->VFPMRegValue(kSimd128Precision); 5416 int Vm = instr->VFPMRegValue(kSimd128Precision);
5257 uint32_t src[4]; 5417 uint32_t src[4];
5258 get_neon_register(Vm, src); 5418 get_q_register(Vm, src);
5259 if (instr->Bit(7) == 0) { 5419 if (instr->Bit(7) == 0) {
5260 for (int i = 0; i < 4; i++) { 5420 for (int i = 0; i < 4; i++) {
5261 float denom = bit_cast<float>(src[i]); 5421 float denom = bit_cast<float>(src[i]);
5262 div_zero_vfp_flag_ = (denom == 0); 5422 div_zero_vfp_flag_ = (denom == 0);
5263 float result = 1.0f / denom; 5423 float result = 1.0f / denom;
5264 result = canonicalizeNaN(result); 5424 result = canonicalizeNaN(result);
5265 src[i] = bit_cast<uint32_t>(result); 5425 src[i] = bit_cast<uint32_t>(result);
5266 } 5426 }
5267 } else { 5427 } else {
5268 lazily_initialize_fast_sqrt(isolate_); 5428 lazily_initialize_fast_sqrt(isolate_);
5269 for (int i = 0; i < 4; i++) { 5429 for (int i = 0; i < 4; i++) {
5270 float radicand = bit_cast<float>(src[i]); 5430 float radicand = bit_cast<float>(src[i]);
5271 float result = 1.0f / fast_sqrt(radicand, isolate_); 5431 float result = 1.0f / fast_sqrt(radicand, isolate_);
5272 result = canonicalizeNaN(result); 5432 result = canonicalizeNaN(result);
5273 src[i] = bit_cast<uint32_t>(result); 5433 src[i] = bit_cast<uint32_t>(result);
5274 } 5434 }
5275 } 5435 }
5276 set_neon_register(Vd, src); 5436 set_q_register(Vd, src);
5277 } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 8) == 0x2 && 5437 } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 8) == 0x2 &&
5278 instr->Bits(7, 6) != 0) { 5438 instr->Bits(7, 6) != 0) {
5279 // vqmovn.<type><size> Dd, Qm. 5439 // vqmovn.<type><size> Dd, Qm.
5280 int Vd = instr->VFPDRegValue(kDoublePrecision); 5440 int Vd = instr->VFPDRegValue(kDoublePrecision);
5281 int Vm = instr->VFPMRegValue(kSimd128Precision); 5441 int Vm = instr->VFPMRegValue(kSimd128Precision);
5282 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); 5442 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18));
5283 bool is_unsigned = instr->Bit(6) != 0; 5443 bool is_unsigned = instr->Bit(6) != 0;
5284 switch (size) { 5444 switch (size) {
5285 case Neon8: { 5445 case Neon8: {
5286 if (is_unsigned) { 5446 if (is_unsigned) {
(...skipping 27 matching lines...) Expand all
5314 UNIMPLEMENTED(); 5474 UNIMPLEMENTED();
5315 } 5475 }
5316 } else if (instr->Bits(11, 7) == 0 && instr->Bit(4) == 1) { 5476 } else if (instr->Bits(11, 7) == 0 && instr->Bit(4) == 1) {
5317 // vshr.u<size> Qd, Qm, shift 5477 // vshr.u<size> Qd, Qm, shift
5318 int size = base::bits::RoundDownToPowerOfTwo32(instr->Bits(21, 16)); 5478 int size = base::bits::RoundDownToPowerOfTwo32(instr->Bits(21, 16));
5319 int shift = 2 * size - instr->Bits(21, 16); 5479 int shift = 2 * size - instr->Bits(21, 16);
5320 int Vd = instr->VFPDRegValue(kSimd128Precision); 5480 int Vd = instr->VFPDRegValue(kSimd128Precision);
5321 int Vm = instr->VFPMRegValue(kSimd128Precision); 5481 int Vm = instr->VFPMRegValue(kSimd128Precision);
5322 NeonSize ns = static_cast<NeonSize>(size / 16); 5482 NeonSize ns = static_cast<NeonSize>(size / 16);
5323 switch (ns) { 5483 switch (ns) {
5324 case Neon8: 5484 case Neon8: {
5325 ShiftRight<uint8_t, kSimd128Size>(this, Vd, Vm, shift); 5485 uint8_t src[16];
5486 get_q_register(Vm, src);
5487 for (int i = 0; i < 16; i++) {
5488 src[i] >>= shift;
5489 }
5490 set_q_register(Vd, src);
5326 break; 5491 break;
5327 case Neon16: 5492 }
5328 ShiftRight<uint16_t, kSimd128Size>(this, Vd, Vm, shift); 5493 case Neon16: {
5494 uint16_t src[8];
5495 get_q_register(Vm, src);
5496 for (int i = 0; i < 8; i++) {
5497 src[i] >>= shift;
5498 }
5499 set_q_register(Vd, src);
5329 break; 5500 break;
5330 case Neon32: 5501 }
5331 ShiftRight<uint32_t, kSimd128Size>(this, Vd, Vm, shift); 5502 case Neon32: {
5503 uint32_t src[4];
5504 get_q_register(Vm, src);
5505 for (int i = 0; i < 4; i++) {
5506 src[i] >>= shift;
5507 }
5508 set_q_register(Vd, src);
5332 break; 5509 break;
5510 }
5333 default: 5511 default:
5334 UNREACHABLE(); 5512 UNREACHABLE();
5335 break; 5513 break;
5336 } 5514 }
5337 } else { 5515 } else {
5338 UNIMPLEMENTED(); 5516 UNIMPLEMENTED();
5339 } 5517 }
5340 break; 5518 break;
5341 case 8: 5519 case 8:
5342 if (instr->Bits(21, 20) == 0) { 5520 if (instr->Bits(21, 20) == 0) {
(...skipping 701 matching lines...) Expand 10 before | Expand all | Expand 10 after
6044 processor->prev_ = nullptr; 6222 processor->prev_ = nullptr;
6045 processor->next_ = nullptr; 6223 processor->next_ = nullptr;
6046 } 6224 }
6047 6225
6048 } // namespace internal 6226 } // namespace internal
6049 } // namespace v8 6227 } // namespace v8
6050 6228
6051 #endif // USE_SIMULATOR 6229 #endif // USE_SIMULATOR
6052 6230
6053 #endif // V8_TARGET_ARCH_ARM 6231 #endif // V8_TARGET_ARCH_ARM
OLDNEW
« no previous file with comments | « src/arm/simulator-arm.h ('k') | test/cctest/test-assembler-arm.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698