Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(191)

Side by Side Diff: src/arm/simulator-arm.cc

Issue 2649323012: [ARM] Add Neon saturating add and subtract instructions. (Closed)
Patch Set: Template-ize Add and Sub Saturate operations. Created 3 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/arm/disasm-arm.cc ('k') | test/cctest/test-assembler-arm.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include <stdarg.h> 5 #include <stdarg.h>
6 #include <stdlib.h> 6 #include <stdlib.h>
7 #include <cmath> 7 #include <cmath>
8 8
9 #if V8_TARGET_ARCH_ARM 9 #if V8_TARGET_ARCH_ARM
10 10
(...skipping 3970 matching lines...) Expand 10 before | Expand all | Expand 10 after
3981 HandleVList(instr); 3981 HandleVList(instr);
3982 break; 3982 break;
3983 default: 3983 default:
3984 UNIMPLEMENTED(); // Not used by V8. 3984 UNIMPLEMENTED(); // Not used by V8.
3985 } 3985 }
3986 } else { 3986 } else {
3987 UNIMPLEMENTED(); // Not used by V8. 3987 UNIMPLEMENTED(); // Not used by V8.
3988 } 3988 }
3989 } 3989 }
3990 3990
3991 // Templated operations for NEON instructions.
3992 // TODO(bbudge) Add more templates for use in DecodeSpecialCondition.
3993 template <typename T>
3994 int64_t Widen(T value) {
3995 return static_cast<int64_t>(value);
martyn.capewell 2017/02/01 11:44:15 Perhaps assert sizeof(T) < sizeof(value), as this
martyn.capewell 2017/02/01 11:45:49 I mean, sizeof(value) < sizeof(int64_t)
bbudge 2017/02/01 18:12:20 Done.
3996 }
3997
3998 template <typename T>
3999 T Clamp(int64_t value) {
4000 int64_t min = static_cast<int64_t>(std::numeric_limits<T>::min());
4001 int64_t max = static_cast<int64_t>(std::numeric_limits<T>::max());
4002 int64_t clamped = std::max(min, std::min(max, value));
4003 return static_cast<T>(clamped);
4004 }
4005
4006 template <typename T>
4007 void AddSaturate(Simulator* simulator, int Vd, int Vm, int Vn) {
4008 static const int kLanes = 16 / sizeof(T);
4009 T src1[kLanes], src2[kLanes];
4010 simulator->get_q_register(Vn, src1);
4011 simulator->get_q_register(Vm, src2);
4012 for (int i = 0; i < kLanes; i++) {
4013 src1[i] = Clamp<T>(Widen(src1[i]) + Widen(src2[i]));
4014 }
4015 simulator->set_q_register(Vd, src1);
4016 }
4017
4018 template <typename T>
4019 void SubSaturate(Simulator* simulator, int Vd, int Vm, int Vn) {
4020 static const int kLanes = 16 / sizeof(T);
4021 T src1[kLanes], src2[kLanes];
4022 simulator->get_q_register(Vn, src1);
4023 simulator->get_q_register(Vm, src2);
4024 for (int i = 0; i < kLanes; i++) {
4025 src1[i] = Clamp<T>(Widen(src1[i]) - Widen(src2[i]));
4026 }
4027 simulator->set_q_register(Vd, src1);
4028 }
4029
3991 void Simulator::DecodeSpecialCondition(Instruction* instr) { 4030 void Simulator::DecodeSpecialCondition(Instruction* instr) {
3992 switch (instr->SpecialValue()) { 4031 switch (instr->SpecialValue()) {
3993 case 4: 4032 case 4: {
3994 if (instr->Bits(11, 8) == 1 && instr->Bits(21, 20) == 2 && 4033 int Vd, Vm, Vn;
3995 instr->Bit(4) == 1) { 4034 if (instr->Bit(6) == 0) {
3996 // vmov Qd, Qm. 4035 Vd = instr->VFPDRegValue(kDoublePrecision);
3997 // vorr, Qd, Qm, Qn. 4036 Vm = instr->VFPMRegValue(kDoublePrecision);
3998 int Vd = instr->VFPDRegValue(kSimd128Precision); 4037 Vn = instr->VFPNRegValue(kDoublePrecision);
3999 int Vm = instr->VFPMRegValue(kSimd128Precision); 4038 } else {
4000 int Vn = instr->VFPNRegValue(kSimd128Precision); 4039 Vd = instr->VFPDRegValue(kSimd128Precision);
4001 uint32_t src1[4]; 4040 Vm = instr->VFPMRegValue(kSimd128Precision);
4002 get_q_register(Vm, src1); 4041 Vn = instr->VFPNRegValue(kSimd128Precision);
4003 if (Vm != Vn) { 4042 }
4004 uint32_t src2[4]; 4043 switch (instr->Bits(11, 8)) {
4005 get_q_register(Vn, src2); 4044 case 0x0: {
4006 for (int i = 0; i < 4; i++) { 4045 if (instr->Bit(4) == 1) {
4007 src1[i] = src1[i] | src2[i]; 4046 // vqadd.s<size> Qd, Qm, Qn.
4047 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
4048 switch (size) {
4049 case Neon8:
4050 AddSaturate<int8_t>(this, Vd, Vm, Vn);
4051 break;
4052 case Neon16:
4053 AddSaturate<int16_t>(this, Vd, Vm, Vn);
4054 break;
4055 case Neon32:
4056 AddSaturate<int32_t>(this, Vd, Vm, Vn);
4057 break;
4058 default:
4059 UNREACHABLE();
4060 break;
4061 }
4062 } else {
4063 UNIMPLEMENTED();
4008 } 4064 }
4065 break;
4009 } 4066 }
4010 set_q_register(Vd, src1); 4067 case 0x1: {
4011 } else if (instr->Bits(11, 8) == 8) { 4068 if (instr->Bits(21, 20) == 2 && instr->Bit(6) == 1 &&
4012 // vadd/vtst 4069 instr->Bit(4) == 1) {
4013 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); 4070 // vmov Qd, Qm.
4014 int Vd = instr->VFPDRegValue(kSimd128Precision); 4071 // vorr, Qd, Qm, Qn.
4015 int Vm = instr->VFPMRegValue(kSimd128Precision); 4072 uint32_t src1[4];
4016 int Vn = instr->VFPNRegValue(kSimd128Precision); 4073 get_q_register(Vm, src1);
4017 if (instr->Bit(4) == 0) { 4074 if (Vm != Vn) {
4018 // vadd.i<size> Qd, Qm, Qn. 4075 uint32_t src2[4];
4076 get_q_register(Vn, src2);
4077 for (int i = 0; i < 4; i++) {
4078 src1[i] = src1[i] | src2[i];
4079 }
4080 }
4081 set_q_register(Vd, src1);
4082 } else if (instr->Bits(21, 20) == 0 && instr->Bit(6) == 1 &&
4083 instr->Bit(4) == 1) {
4084 // vand Qd, Qm, Qn.
4085 uint32_t src1[4], src2[4];
4086 get_q_register(Vn, src1);
4087 get_q_register(Vm, src2);
4088 for (int i = 0; i < 4; i++) {
4089 src1[i] = src1[i] & src2[i];
4090 }
4091 set_q_register(Vd, src1);
4092 } else {
4093 UNIMPLEMENTED();
4094 }
4095 break;
4096 }
4097 case 0x2: {
4098 if (instr->Bit(4) == 1) {
4099 // vqsub.s<size> Qd, Qm, Qn.
4100 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
4101 switch (size) {
4102 case Neon8:
4103 SubSaturate<int8_t>(this, Vd, Vm, Vn);
4104 break;
4105 case Neon16:
4106 SubSaturate<int16_t>(this, Vd, Vm, Vn);
4107 break;
4108 case Neon32:
4109 SubSaturate<int32_t>(this, Vd, Vm, Vn);
4110 break;
4111 default:
4112 UNREACHABLE();
4113 break;
4114 }
4115 } else {
4116 UNIMPLEMENTED();
4117 }
4118 break;
4119 }
4120 case 0x3: {
4121 // vcge/vcgt.s<size> Qd, Qm, Qn.
4122 bool ge = instr->Bit(4) == 1;
4123 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
4019 switch (size) { 4124 switch (size) {
4020 case Neon8: { 4125 case Neon8: {
4021 uint8_t src1[16], src2[16]; 4126 int8_t src1[16], src2[16];
4022 get_q_register(Vn, src1); 4127 get_q_register(Vn, src1);
4023 get_q_register(Vm, src2); 4128 get_q_register(Vm, src2);
4024 for (int i = 0; i < 16; i++) { 4129 for (int i = 0; i < 16; i++) {
4025 src1[i] += src2[i]; 4130 if (ge)
4131 src1[i] = src1[i] >= src2[i] ? 0xFF : 0;
4132 else
4133 src1[i] = src1[i] > src2[i] ? 0xFF : 0;
4026 } 4134 }
4027 set_q_register(Vd, src1); 4135 set_q_register(Vd, src1);
4028 break; 4136 break;
4029 } 4137 }
4030 case Neon16: { 4138 case Neon16: {
4031 uint16_t src1[8], src2[8]; 4139 int16_t src1[8], src2[8];
4032 get_q_register(Vn, src1); 4140 get_q_register(Vn, src1);
4033 get_q_register(Vm, src2); 4141 get_q_register(Vm, src2);
4034 for (int i = 0; i < 8; i++) { 4142 for (int i = 0; i < 8; i++) {
4035 src1[i] += src2[i]; 4143 if (ge)
4144 src1[i] = src1[i] >= src2[i] ? 0xFFFF : 0;
4145 else
4146 src1[i] = src1[i] > src2[i] ? 0xFFFF : 0;
4036 } 4147 }
4037 set_q_register(Vd, src1); 4148 set_q_register(Vd, src1);
4038 break; 4149 break;
4039 } 4150 }
4040 case Neon32: { 4151 case Neon32: {
4041 uint32_t src1[4], src2[4]; 4152 int32_t src1[4], src2[4];
4042 get_q_register(Vn, src1); 4153 get_q_register(Vn, src1);
4043 get_q_register(Vm, src2); 4154 get_q_register(Vm, src2);
4044 for (int i = 0; i < 4; i++) { 4155 for (int i = 0; i < 4; i++) {
4045 src1[i] += src2[i]; 4156 if (ge)
4157 src1[i] = src1[i] >= src2[i] ? 0xFFFFFFFF : 0;
4158 else
4159 src1[i] = src1[i] > src2[i] ? 0xFFFFFFFF : 0;
4046 } 4160 }
4047 set_q_register(Vd, src1); 4161 set_q_register(Vd, src1);
4048 break; 4162 break;
4049 } 4163 }
4050 default: 4164 default:
4051 UNREACHABLE(); 4165 UNREACHABLE();
4052 break; 4166 break;
4053 } 4167 }
4054 } else { 4168 break;
4055 // vtst.i<size> Qd, Qm, Qn. 4169 }
4170 case 0x6: {
4171 // vmin/vmax.s<size> Qd, Qm, Qn.
4172 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
4173 bool min = instr->Bit(4) != 0;
4056 switch (size) { 4174 switch (size) {
4057 case Neon8: { 4175 case Neon8: {
4058 uint8_t src1[16], src2[16]; 4176 int8_t src1[16], src2[16];
4059 get_q_register(Vn, src1); 4177 get_q_register(Vn, src1);
4060 get_q_register(Vm, src2); 4178 get_q_register(Vm, src2);
4061 for (int i = 0; i < 16; i++) { 4179 for (int i = 0; i < 16; i++) {
4062 src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFu : 0; 4180 if (min)
4181 src1[i] = std::min(src1[i], src2[i]);
4182 else
4183 src1[i] = std::max(src1[i], src2[i]);
4063 } 4184 }
4064 set_q_register(Vd, src1); 4185 set_q_register(Vd, src1);
4065 break; 4186 break;
4066 } 4187 }
4067 case Neon16: { 4188 case Neon16: {
4068 uint16_t src1[8], src2[8]; 4189 int16_t src1[8], src2[8];
4069 get_q_register(Vn, src1); 4190 get_q_register(Vn, src1);
4070 get_q_register(Vm, src2); 4191 get_q_register(Vm, src2);
4071 for (int i = 0; i < 8; i++) { 4192 for (int i = 0; i < 8; i++) {
4072 src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFFFu : 0; 4193 if (min)
4194 src1[i] = std::min(src1[i], src2[i]);
4195 else
4196 src1[i] = std::max(src1[i], src2[i]);
4073 } 4197 }
4074 set_q_register(Vd, src1); 4198 set_q_register(Vd, src1);
4075 break; 4199 break;
4076 } 4200 }
4077 case Neon32: { 4201 case Neon32: {
4078 uint32_t src1[4], src2[4]; 4202 int32_t src1[4], src2[4];
4079 get_q_register(Vn, src1); 4203 get_q_register(Vn, src1);
4080 get_q_register(Vm, src2); 4204 get_q_register(Vm, src2);
4081 for (int i = 0; i < 4; i++) { 4205 for (int i = 0; i < 4; i++) {
4082 src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFFFFFFFu : 0; 4206 if (min)
4207 src1[i] = std::min(src1[i], src2[i]);
4208 else
4209 src1[i] = std::max(src1[i], src2[i]);
4083 } 4210 }
4084 set_q_register(Vd, src1); 4211 set_q_register(Vd, src1);
4085 break; 4212 break;
4086 } 4213 }
4087 default: 4214 default:
4088 UNREACHABLE(); 4215 UNREACHABLE();
4089 break; 4216 break;
4090 } 4217 }
4091 } 4218 break;
4092 } else if (instr->Bits(11, 8) == 0xd && instr->Bit(20) == 0 && 4219 }
4093 instr->Bit(4) == 0) { 4220 case 0x8: {
4094 int Vd = instr->VFPDRegValue(kSimd128Precision); 4221 // vadd/vtst
4095 int Vm = instr->VFPMRegValue(kSimd128Precision); 4222 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
4096 int Vn = instr->VFPNRegValue(kSimd128Precision); 4223 if (instr->Bit(4) == 0) {
4097 float src1[4], src2[4]; 4224 // vadd.i<size> Qd, Qm, Qn.
4098 get_q_register(Vn, src1); 4225 switch (size) {
4099 get_q_register(Vm, src2); 4226 case Neon8: {
4100 for (int i = 0; i < 4; i++) { 4227 uint8_t src1[16], src2[16];
4101 if (instr->Bit(21) == 0) { 4228 get_q_register(Vn, src1);
4102 // vadd.f32 Qd, Qm, Qn. 4229 get_q_register(Vm, src2);
4103 src1[i] = src1[i] + src2[i]; 4230 for (int i = 0; i < 16; i++) {
4104 } else { 4231 src1[i] += src2[i];
4105 // vsub.f32 Qd, Qm, Qn. 4232 }
4106 src1[i] = src1[i] - src2[i]; 4233 set_q_register(Vd, src1);
4107 } 4234 break;
4108 } 4235 }
4109 set_q_register(Vd, src1); 4236 case Neon16: {
4110 } else if (instr->Bits(11, 8) == 0x9 && instr->Bit(6) == 1 && 4237 uint16_t src1[8], src2[8];
4111 instr->Bit(4) == 1) { 4238 get_q_register(Vn, src1);
4112 // vmul.i<size> Qd, Qm, Qn. 4239 get_q_register(Vm, src2);
4113 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); 4240 for (int i = 0; i < 8; i++) {
4114 int Vd = instr->VFPDRegValue(kSimd128Precision); 4241 src1[i] += src2[i];
4115 int Vm = instr->VFPMRegValue(kSimd128Precision); 4242 }
4116 int Vn = instr->VFPNRegValue(kSimd128Precision); 4243 set_q_register(Vd, src1);
4117 switch (size) { 4244 break;
4118 case Neon8: { 4245 }
4119 uint8_t src1[16], src2[16]; 4246 case Neon32: {
4120 get_q_register(Vn, src1); 4247 uint32_t src1[4], src2[4];
4121 get_q_register(Vm, src2); 4248 get_q_register(Vn, src1);
4122 for (int i = 0; i < 16; i++) { 4249 get_q_register(Vm, src2);
4123 src1[i] *= src2[i]; 4250 for (int i = 0; i < 4; i++) {
4124 } 4251 src1[i] += src2[i];
4125 set_q_register(Vd, src1); 4252 }
4126 break; 4253 set_q_register(Vd, src1);
4127 } 4254 break;
4128 case Neon16: { 4255 }
4129 uint16_t src1[8], src2[8]; 4256 default:
4130 get_q_register(Vn, src1); 4257 UNREACHABLE();
4131 get_q_register(Vm, src2); 4258 break;
4132 for (int i = 0; i < 8; i++) { 4259 }
4133 src1[i] *= src2[i]; 4260 } else {
4134 } 4261 // vtst.i<size> Qd, Qm, Qn.
4135 set_q_register(Vd, src1); 4262 switch (size) {
4136 break; 4263 case Neon8: {
4137 } 4264 uint8_t src1[16], src2[16];
4138 case Neon32: { 4265 get_q_register(Vn, src1);
4139 uint32_t src1[4], src2[4]; 4266 get_q_register(Vm, src2);
4267 for (int i = 0; i < 16; i++) {
4268 src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFu : 0;
4269 }
4270 set_q_register(Vd, src1);
4271 break;
4272 }
4273 case Neon16: {
4274 uint16_t src1[8], src2[8];
4275 get_q_register(Vn, src1);
4276 get_q_register(Vm, src2);
4277 for (int i = 0; i < 8; i++) {
4278 src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFFFu : 0;
4279 }
4280 set_q_register(Vd, src1);
4281 break;
4282 }
4283 case Neon32: {
4284 uint32_t src1[4], src2[4];
4285 get_q_register(Vn, src1);
4286 get_q_register(Vm, src2);
4287 for (int i = 0; i < 4; i++) {
4288 src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFFFFFFFu : 0;
4289 }
4290 set_q_register(Vd, src1);
4291 break;
4292 }
4293 default:
4294 UNREACHABLE();
4295 break;
4296 }
4297 }
4298 break;
4299 }
4300 case 0x9: {
4301 if (instr->Bit(6) == 1 && instr->Bit(4) == 1) {
4302 // vmul.i<size> Qd, Qm, Qn.
4303 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
4304 switch (size) {
4305 case Neon8: {
4306 uint8_t src1[16], src2[16];
4307 get_q_register(Vn, src1);
4308 get_q_register(Vm, src2);
4309 for (int i = 0; i < 16; i++) {
4310 src1[i] *= src2[i];
4311 }
4312 set_q_register(Vd, src1);
4313 break;
4314 }
4315 case Neon16: {
4316 uint16_t src1[8], src2[8];
4317 get_q_register(Vn, src1);
4318 get_q_register(Vm, src2);
4319 for (int i = 0; i < 8; i++) {
4320 src1[i] *= src2[i];
4321 }
4322 set_q_register(Vd, src1);
4323 break;
4324 }
4325 case Neon32: {
4326 uint32_t src1[4], src2[4];
4327 get_q_register(Vn, src1);
4328 get_q_register(Vm, src2);
4329 for (int i = 0; i < 4; i++) {
4330 src1[i] *= src2[i];
4331 }
4332 set_q_register(Vd, src1);
4333 break;
4334 }
4335 default:
4336 UNREACHABLE();
4337 break;
4338 }
4339 } else {
4340 UNIMPLEMENTED();
4341 }
4342 break;
4343 }
4344 case 0xd: {
4345 if (instr->Bit(4) == 0) {
4346 float src1[4], src2[4];
4140 get_q_register(Vn, src1); 4347 get_q_register(Vn, src1);
4141 get_q_register(Vm, src2); 4348 get_q_register(Vm, src2);
4142 for (int i = 0; i < 4; i++) { 4349 for (int i = 0; i < 4; i++) {
4143 src1[i] *= src2[i]; 4350 if (instr->Bit(21) == 0) {
4351 // vadd.f32 Qd, Qm, Qn.
4352 src1[i] = src1[i] + src2[i];
4353 } else {
4354 // vsub.f32 Qd, Qm, Qn.
4355 src1[i] = src1[i] - src2[i];
4356 }
4144 } 4357 }
4145 set_q_register(Vd, src1); 4358 set_q_register(Vd, src1);
4146 break; 4359 } else {
4147 } 4360 UNIMPLEMENTED();
4148 default: 4361 }
4149 UNIMPLEMENTED(); 4362 break;
4150 break; 4363 }
4151 } 4364 case 0xe: {
4152 } else if (instr->Bits(11, 8) == 0xe && instr->Bits(21, 20) == 0 && 4365 if (instr->Bits(21, 20) == 0 && instr->Bit(4) == 0) {
4153 instr->Bit(4) == 0) { 4366 // vceq.f32.
4154 // vceq.f32. 4367 float src1[4], src2[4];
4155 int Vd = instr->VFPDRegValue(kSimd128Precision);
4156 int Vm = instr->VFPMRegValue(kSimd128Precision);
4157 int Vn = instr->VFPNRegValue(kSimd128Precision);
4158 float src1[4], src2[4];
4159 get_q_register(Vn, src1);
4160 get_q_register(Vm, src2);
4161 uint32_t dst[4];
4162 for (int i = 0; i < 4; i++) {
4163 dst[i] = (src1[i] == src2[i]) ? 0xFFFFFFFF : 0;
4164 }
4165 set_q_register(Vd, dst);
4166 } else if (instr->Bits(11, 8) == 1 && instr->Bits(21, 20) == 0 &&
4167 instr->Bit(6) == 1 && instr->Bit(4) == 1) {
4168 int Vd = instr->VFPDRegValue(kSimd128Precision);
4169 int Vm = instr->VFPMRegValue(kSimd128Precision);
4170 int Vn = instr->VFPNRegValue(kSimd128Precision);
4171 // vand Qd, Qm, Qn.
4172 uint32_t src1[4], src2[4];
4173 get_q_register(Vn, src1);
4174 get_q_register(Vm, src2);
4175 for (int i = 0; i < 4; i++) {
4176 src1[i] = src1[i] & src2[i];
4177 }
4178 set_q_register(Vd, src1);
4179 } else if (instr->Bits(11, 8) == 0x3) {
4180 // vcge/vcgt.s<size> Qd, Qm, Qn.
4181 bool ge = instr->Bit(4) == 1;
4182 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
4183 int Vd = instr->VFPDRegValue(kSimd128Precision);
4184 int Vm = instr->VFPMRegValue(kSimd128Precision);
4185 int Vn = instr->VFPNRegValue(kSimd128Precision);
4186 switch (size) {
4187 case Neon8: {
4188 int8_t src1[16], src2[16];
4189 get_q_register(Vn, src1); 4368 get_q_register(Vn, src1);
4190 get_q_register(Vm, src2); 4369 get_q_register(Vm, src2);
4191 for (int i = 0; i < 16; i++) { 4370 uint32_t dst[4];
4192 if (ge) 4371 for (int i = 0; i < 4; i++) {
4193 src1[i] = src1[i] >= src2[i] ? 0xFF : 0; 4372 dst[i] = (src1[i] == src2[i]) ? 0xFFFFFFFF : 0;
4194 else 4373 }
4195 src1[i] = src1[i] > src2[i] ? 0xFF : 0; 4374 set_q_register(Vd, dst);
4196 } 4375 } else {
4197 set_q_register(Vd, src1); 4376 UNIMPLEMENTED();
4198 break; 4377 }
4199 } 4378 break;
4200 case Neon16: { 4379 }
4201 int16_t src1[8], src2[8]; 4380 case 0xf: {
4381 if (instr->Bit(20) == 0 && instr->Bit(6) == 1) {
4382 float src1[4], src2[4];
4202 get_q_register(Vn, src1); 4383 get_q_register(Vn, src1);
4203 get_q_register(Vm, src2); 4384 get_q_register(Vm, src2);
4204 for (int i = 0; i < 8; i++) { 4385 if (instr->Bit(4) == 1) {
4205 if (ge) 4386 if (instr->Bit(21) == 0) {
4206 src1[i] = src1[i] >= src2[i] ? 0xFFFF : 0; 4387 // vrecps.f32 Qd, Qm, Qn.
4207 else 4388 for (int i = 0; i < 4; i++) {
4208 src1[i] = src1[i] > src2[i] ? 0xFFFF : 0; 4389 src1[i] = 2.0f - src1[i] * src2[i];
4390 }
4391 } else {
4392 // vrsqrts.f32 Qd, Qm, Qn.
4393 for (int i = 0; i < 4; i++) {
4394 src1[i] = (3.0f - src1[i] * src2[i]) * 0.5f;
4395 }
4396 }
4397 } else {
4398 if (instr->Bit(21) == 1) {
4399 // vmin.f32 Qd, Qm, Qn.
4400 for (int i = 0; i < 4; i++) {
4401 src1[i] = std::min(src1[i], src2[i]);
4402 }
4403 } else {
4404 // vmax.f32 Qd, Qm, Qn.
4405 for (int i = 0; i < 4; i++) {
4406 src1[i] = std::max(src1[i], src2[i]);
4407 }
4408 }
4209 } 4409 }
4210 set_q_register(Vd, src1); 4410 set_q_register(Vd, src1);
4211 break; 4411 } else {
4212 } 4412 UNIMPLEMENTED();
4213 case Neon32: { 4413 }
4214 int32_t src1[4], src2[4]; 4414 break;
4215 get_q_register(Vn, src1); 4415 }
4216 get_q_register(Vm, src2); 4416 default:
4217 for (int i = 0; i < 4; i++) { 4417 UNIMPLEMENTED();
4218 if (ge) 4418 break;
4219 src1[i] = src1[i] >= src2[i] ? 0xFFFFFFFF : 0;
4220 else
4221 src1[i] = src1[i] > src2[i] ? 0xFFFFFFFF : 0;
4222 }
4223 set_q_register(Vd, src1);
4224 break;
4225 }
4226 default:
4227 UNREACHABLE();
4228 break;
4229 }
4230 } else if (instr->Bits(11, 8) == 0xf && instr->Bit(20) == 0 &&
4231 instr->Bit(6) == 1) {
4232 int Vd = instr->VFPDRegValue(kSimd128Precision);
4233 int Vm = instr->VFPMRegValue(kSimd128Precision);
4234 int Vn = instr->VFPNRegValue(kSimd128Precision);
4235 float src1[4], src2[4];
4236 get_q_register(Vn, src1);
4237 get_q_register(Vm, src2);
4238 if (instr->Bit(4) == 1) {
4239 if (instr->Bit(21) == 0) {
4240 // vrecps.f32 Qd, Qm, Qn.
4241 for (int i = 0; i < 4; i++) {
4242 src1[i] = 2.0f - src1[i] * src2[i];
4243 }
4244 } else {
4245 // vrsqrts.f32 Qd, Qm, Qn.
4246 for (int i = 0; i < 4; i++) {
4247 src1[i] = (3.0f - src1[i] * src2[i]) * 0.5f;
4248 }
4249 }
4250 } else {
4251 if (instr->Bit(21) == 1) {
4252 // vmin.f32 Qd, Qm, Qn.
4253 for (int i = 0; i < 4; i++) {
4254 src1[i] = std::min(src1[i], src2[i]);
4255 }
4256 } else {
4257 // vmax.f32 Qd, Qm, Qn.
4258 for (int i = 0; i < 4; i++) {
4259 src1[i] = std::max(src1[i], src2[i]);
4260 }
4261 }
4262 }
4263 set_q_register(Vd, src1);
4264 } else if (instr->Bits(11, 8) == 0x6) {
4265 // vmin/vmax.s<size> Qd, Qm, Qn.
4266 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
4267 int Vd = instr->VFPDRegValue(kSimd128Precision);
4268 int Vm = instr->VFPMRegValue(kSimd128Precision);
4269 int Vn = instr->VFPNRegValue(kSimd128Precision);
4270 bool min = instr->Bit(4) != 0;
4271 switch (size) {
4272 case Neon8: {
4273 int8_t src1[16], src2[16];
4274 get_q_register(Vn, src1);
4275 get_q_register(Vm, src2);
4276 for (int i = 0; i < 16; i++) {
4277 if (min)
4278 src1[i] = std::min(src1[i], src2[i]);
4279 else
4280 src1[i] = std::max(src1[i], src2[i]);
4281 }
4282 set_q_register(Vd, src1);
4283 break;
4284 }
4285 case Neon16: {
4286 int16_t src1[8], src2[8];
4287 get_q_register(Vn, src1);
4288 get_q_register(Vm, src2);
4289 for (int i = 0; i < 8; i++) {
4290 if (min)
4291 src1[i] = std::min(src1[i], src2[i]);
4292 else
4293 src1[i] = std::max(src1[i], src2[i]);
4294 }
4295 set_q_register(Vd, src1);
4296 break;
4297 }
4298 case Neon32: {
4299 int32_t src1[4], src2[4];
4300 get_q_register(Vn, src1);
4301 get_q_register(Vm, src2);
4302 for (int i = 0; i < 4; i++) {
4303 if (min)
4304 src1[i] = std::min(src1[i], src2[i]);
4305 else
4306 src1[i] = std::max(src1[i], src2[i]);
4307 }
4308 set_q_register(Vd, src1);
4309 break;
4310 }
4311 default:
4312 UNREACHABLE();
4313 break;
4314 }
4315 } else {
4316 UNIMPLEMENTED();
4317 } 4419 }
4318 break; 4420 break;
4421 }
4319 case 5: 4422 case 5:
4320 if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) && 4423 if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) &&
4321 (instr->Bit(4) == 1)) { 4424 (instr->Bit(4) == 1)) {
4322 // vmovl signed 4425 // vmovl signed
4323 if ((instr->VdValue() & 1) != 0) UNIMPLEMENTED(); 4426 if ((instr->VdValue() & 1) != 0) UNIMPLEMENTED();
4324 int Vd = (instr->Bit(22) << 3) | (instr->VdValue() >> 1); 4427 int Vd = (instr->Bit(22) << 3) | (instr->VdValue() >> 1);
4325 int Vm = (instr->Bit(5) << 4) | instr->VmValue(); 4428 int Vm = (instr->Bit(5) << 4) | instr->VmValue();
4326 int imm3 = instr->Bits(21, 19); 4429 int imm3 = instr->Bits(21, 19);
4327 if ((imm3 != 1) && (imm3 != 2) && (imm3 != 4)) UNIMPLEMENTED(); 4430 if ((imm3 != 1) && (imm3 != 2) && (imm3 != 4)) UNIMPLEMENTED();
4328 int esize = 8 * imm3; 4431 int esize = 8 * imm3;
(...skipping 100 matching lines...) Expand 10 before | Expand all | Expand 10 after
4429 break; 4532 break;
4430 } 4533 }
4431 default: 4534 default:
4432 UNREACHABLE(); 4535 UNREACHABLE();
4433 break; 4536 break;
4434 } 4537 }
4435 } else { 4538 } else {
4436 UNIMPLEMENTED(); 4539 UNIMPLEMENTED();
4437 } 4540 }
4438 break; 4541 break;
4439 case 6: 4542 case 6: {
4440 if (instr->Bits(11, 8) == 8 && instr->Bit(4) == 0) { 4543 int Vd, Vm, Vn;
4441 // vsub.size Qd, Qm, Qn. 4544 if (instr->Bit(6) == 0) {
4442 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); 4545 Vd = instr->VFPDRegValue(kDoublePrecision);
4443 int Vd = instr->VFPDRegValue(kSimd128Precision); 4546 Vm = instr->VFPMRegValue(kDoublePrecision);
4444 int Vm = instr->VFPMRegValue(kSimd128Precision); 4547 Vn = instr->VFPNRegValue(kDoublePrecision);
4445 int Vn = instr->VFPNRegValue(kSimd128Precision); 4548 } else {
4446 switch (size) { 4549 Vd = instr->VFPDRegValue(kSimd128Precision);
4447 case Neon8: { 4550 Vm = instr->VFPMRegValue(kSimd128Precision);
4448 uint8_t src1[16], src2[16]; 4551 Vn = instr->VFPNRegValue(kSimd128Precision);
4449 get_q_register(Vn, src1); 4552 }
4450 get_q_register(Vm, src2); 4553 switch (instr->Bits(11, 8)) {
4451 for (int i = 0; i < 16; i++) { 4554 case 0x0: {
4452 src1[i] -= src2[i]; 4555 if (instr->Bit(4) == 1) {
4453 } 4556 // vqadd.u<size> Qd, Qm, Qn.
4454 set_q_register(Vd, src1); 4557 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
4455 break; 4558 switch (size) {
4456 } 4559 case Neon8:
4457 case Neon16: { 4560 AddSaturate<uint8_t>(this, Vd, Vm, Vn);
4458 uint16_t src1[8], src2[8]; 4561 break;
4459 get_q_register(Vn, src1); 4562 case Neon16:
4460 get_q_register(Vm, src2); 4563 AddSaturate<uint16_t>(this, Vd, Vm, Vn);
4461 for (int i = 0; i < 8; i++) { 4564 break;
4462 src1[i] -= src2[i]; 4565 case Neon32:
4463 } 4566 AddSaturate<uint32_t>(this, Vd, Vm, Vn);
4464 set_q_register(Vd, src1); 4567 break;
4465 break; 4568 default:
4466 } 4569 UNREACHABLE();
4467 case Neon32: { 4570 break;
4468 uint32_t src1[4], src2[4]; 4571 }
4572 } else {
4573 UNIMPLEMENTED();
4574 }
4575 break;
4576 }
4577 case 0x1: {
4578 if (instr->Bits(21, 20) == 1 && instr->Bit(4) == 1) {
4579 // vbsl.size Qd, Qm, Qn.
4580 uint32_t dst[4], src1[4], src2[4];
4581 get_q_register(Vd, dst);
4469 get_q_register(Vn, src1); 4582 get_q_register(Vn, src1);
4470 get_q_register(Vm, src2); 4583 get_q_register(Vm, src2);
4471 for (int i = 0; i < 4; i++) { 4584 for (int i = 0; i < 4; i++) {
4472 src1[i] -= src2[i]; 4585 dst[i] = (dst[i] & src1[i]) | (~dst[i] & src2[i]);
4473 } 4586 }
4474 set_q_register(Vd, src1); 4587 set_q_register(Vd, dst);
4475 break; 4588 } else if (instr->Bits(21, 20) == 0 && instr->Bit(4) == 1) {
4476 } 4589 if (instr->Bit(6) == 0) {
4477 default: 4590 // veor Dd, Dn, Dm
4478 UNREACHABLE(); 4591 uint64_t src1, src2;
4479 break; 4592 get_d_register(Vn, &src1);
4480 } 4593 get_d_register(Vm, &src2);
4481 } else if (instr->Bits(11, 8) == 8 && instr->Bit(4) == 1) { 4594 src1 ^= src2;
4482 // vceq.size Qd, Qm, Qn. 4595 set_d_register(Vd, &src1);
4483 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); 4596
4484 int Vd = instr->VFPDRegValue(kSimd128Precision); 4597 } else {
4485 int Vm = instr->VFPMRegValue(kSimd128Precision); 4598 // veor Qd, Qn, Qm
4486 int Vn = instr->VFPNRegValue(kSimd128Precision); 4599 uint32_t src1[4], src2[4];
4487 switch (size) { 4600 get_q_register(Vn, src1);
4488 case Neon8: { 4601 get_q_register(Vm, src2);
4489 uint8_t src1[16], src2[16]; 4602 for (int i = 0; i < 4; i++) src1[i] ^= src2[i];
4490 get_q_register(Vn, src1); 4603 set_q_register(Vd, src1);
4491 get_q_register(Vm, src2); 4604 }
4492 for (int i = 0; i < 16; i++) { 4605 } else {
4493 src1[i] = (src1[i] == src2[i]) ? 0xFFu : 0; 4606 UNIMPLEMENTED();
4494 } 4607 }
4495 set_q_register(Vd, src1); 4608 break;
4496 break; 4609 }
4497 } 4610 case 0x2: {
4498 case Neon16: { 4611 if (instr->Bit(4) == 1) {
4499 uint16_t src1[8], src2[8]; 4612 // vqsub.u<size> Qd, Qm, Qn.
4500 get_q_register(Vn, src1); 4613 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
4501 get_q_register(Vm, src2); 4614 switch (size) {
4502 for (int i = 0; i < 8; i++) { 4615 case Neon8:
4503 src1[i] = (src1[i] == src2[i]) ? 0xFFFFu : 0; 4616 SubSaturate<uint8_t>(this, Vd, Vm, Vn);
4504 } 4617 break;
4505 set_q_register(Vd, src1); 4618 case Neon16:
4506 break; 4619 SubSaturate<uint16_t>(this, Vd, Vm, Vn);
4507 } 4620 break;
4508 case Neon32: { 4621 case Neon32:
4509 uint32_t src1[4], src2[4]; 4622 SubSaturate<uint32_t>(this, Vd, Vm, Vn);
4623 break;
4624 default:
4625 UNREACHABLE();
4626 break;
4627 }
4628 } else {
4629 UNIMPLEMENTED();
4630 }
4631 break;
4632 }
4633 case 0x3: {
4634 // vcge/vcgt.u<size> Qd, Qm, Qn.
4635 bool ge = instr->Bit(4) == 1;
4636 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
4637 switch (size) {
4638 case Neon8: {
4639 uint8_t src1[16], src2[16];
4640 get_q_register(Vn, src1);
4641 get_q_register(Vm, src2);
4642 for (int i = 0; i < 16; i++) {
4643 if (ge)
4644 src1[i] = src1[i] >= src2[i] ? 0xFFu : 0;
4645 else
4646 src1[i] = src1[i] > src2[i] ? 0xFFu : 0;
4647 }
4648 set_q_register(Vd, src1);
4649 break;
4650 }
4651 case Neon16: {
4652 uint16_t src1[8], src2[8];
4653 get_q_register(Vn, src1);
4654 get_q_register(Vm, src2);
4655 for (int i = 0; i < 8; i++) {
4656 if (ge)
4657 src1[i] = src1[i] >= src2[i] ? 0xFFFFu : 0;
4658 else
4659 src1[i] = src1[i] > src2[i] ? 0xFFFFu : 0;
4660 }
4661 set_q_register(Vd, src1);
4662 break;
4663 }
4664 case Neon32: {
4665 uint32_t src1[4], src2[4];
4666 get_q_register(Vn, src1);
4667 get_q_register(Vm, src2);
4668 for (int i = 0; i < 4; i++) {
4669 if (ge)
4670 src1[i] = src1[i] >= src2[i] ? 0xFFFFFFFFu : 0;
4671 else
4672 src1[i] = src1[i] > src2[i] ? 0xFFFFFFFFu : 0;
4673 }
4674 set_q_register(Vd, src1);
4675 break;
4676 }
4677 default:
4678 UNREACHABLE();
4679 break;
4680 }
4681 break;
4682 }
4683 case 0x6: {
4684 // vmin/vmax.u<size> Qd, Qm, Qn.
4685 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
4686 bool min = instr->Bit(4) != 0;
4687 switch (size) {
4688 case Neon8: {
4689 uint8_t src1[16], src2[16];
4690 get_q_register(Vn, src1);
4691 get_q_register(Vm, src2);
4692 for (int i = 0; i < 16; i++) {
4693 if (min)
4694 src1[i] = std::min(src1[i], src2[i]);
4695 else
4696 src1[i] = std::max(src1[i], src2[i]);
4697 }
4698 set_q_register(Vd, src1);
4699 break;
4700 }
4701 case Neon16: {
4702 uint16_t src1[8], src2[8];
4703 get_q_register(Vn, src1);
4704 get_q_register(Vm, src2);
4705 for (int i = 0; i < 8; i++) {
4706 if (min)
4707 src1[i] = std::min(src1[i], src2[i]);
4708 else
4709 src1[i] = std::max(src1[i], src2[i]);
4710 }
4711 set_q_register(Vd, src1);
4712 break;
4713 }
4714 case Neon32: {
4715 uint32_t src1[4], src2[4];
4716 get_q_register(Vn, src1);
4717 get_q_register(Vm, src2);
4718 for (int i = 0; i < 4; i++) {
4719 if (min)
4720 src1[i] = std::min(src1[i], src2[i]);
4721 else
4722 src1[i] = std::max(src1[i], src2[i]);
4723 }
4724 set_q_register(Vd, src1);
4725 break;
4726 }
4727 default:
4728 UNREACHABLE();
4729 break;
4730 }
4731 break;
4732 }
4733 case 0x8: {
4734 if (instr->Bit(4) == 0) {
4735 // vsub.size Qd, Qm, Qn.
4736 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
4737 switch (size) {
4738 case Neon8: {
4739 uint8_t src1[16], src2[16];
4740 get_q_register(Vn, src1);
4741 get_q_register(Vm, src2);
4742 for (int i = 0; i < 16; i++) {
4743 src1[i] -= src2[i];
4744 }
4745 set_q_register(Vd, src1);
4746 break;
4747 }
4748 case Neon16: {
4749 uint16_t src1[8], src2[8];
4750 get_q_register(Vn, src1);
4751 get_q_register(Vm, src2);
4752 for (int i = 0; i < 8; i++) {
4753 src1[i] -= src2[i];
4754 }
4755 set_q_register(Vd, src1);
4756 break;
4757 }
4758 case Neon32: {
4759 uint32_t src1[4], src2[4];
4760 get_q_register(Vn, src1);
4761 get_q_register(Vm, src2);
4762 for (int i = 0; i < 4; i++) {
4763 src1[i] -= src2[i];
4764 }
4765 set_q_register(Vd, src1);
4766 break;
4767 }
4768 default:
4769 UNREACHABLE();
4770 break;
4771 }
4772 } else {
4773 // vceq.size Qd, Qm, Qn.
4774 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
4775 switch (size) {
4776 case Neon8: {
4777 uint8_t src1[16], src2[16];
4778 get_q_register(Vn, src1);
4779 get_q_register(Vm, src2);
4780 for (int i = 0; i < 16; i++) {
4781 src1[i] = (src1[i] == src2[i]) ? 0xFFu : 0;
4782 }
4783 set_q_register(Vd, src1);
4784 break;
4785 }
4786 case Neon16: {
4787 uint16_t src1[8], src2[8];
4788 get_q_register(Vn, src1);
4789 get_q_register(Vm, src2);
4790 for (int i = 0; i < 8; i++) {
4791 src1[i] = (src1[i] == src2[i]) ? 0xFFFFu : 0;
4792 }
4793 set_q_register(Vd, src1);
4794 break;
4795 }
4796 case Neon32: {
4797 uint32_t src1[4], src2[4];
4798 get_q_register(Vn, src1);
4799 get_q_register(Vm, src2);
4800 for (int i = 0; i < 4; i++) {
4801 src1[i] = (src1[i] == src2[i]) ? 0xFFFFFFFFu : 0;
4802 }
4803 set_q_register(Vd, src1);
4804 break;
4805 }
4806 default:
4807 UNREACHABLE();
4808 break;
4809 }
4810 }
4811 break;
4812 }
4813 case 0xd: {
4814 if (instr->Bit(21) == 0 && instr->Bit(6) == 1 && instr->Bit(4) == 1) {
4815 // vmul.f32 Qd, Qn, Qm
4816 float src1[4], src2[4];
4510 get_q_register(Vn, src1); 4817 get_q_register(Vn, src1);
4511 get_q_register(Vm, src2); 4818 get_q_register(Vm, src2);
4512 for (int i = 0; i < 4; i++) { 4819 for (int i = 0; i < 4; i++) {
4513 src1[i] = (src1[i] == src2[i]) ? 0xFFFFFFFFu : 0; 4820 src1[i] = src1[i] * src2[i];
4514 } 4821 }
4515 set_q_register(Vd, src1); 4822 set_q_register(Vd, src1);
4516 break; 4823 } else {
4517 } 4824 UNIMPLEMENTED();
4518 default: 4825 }
4519 UNREACHABLE(); 4826 break;
4520 break; 4827 }
4521 } 4828 case 0xe: {
4522 } else if (instr->Bits(11, 8) == 1 && instr->Bits(21, 20) == 1 && 4829 if (instr->Bit(20) == 0 && instr->Bit(4) == 0) {
4523 instr->Bit(4) == 1) { 4830 // vcge/vcgt.f32 Qd, Qm, Qn
4524 // vbsl.size Qd, Qm, Qn. 4831 bool ge = instr->Bit(21) == 0;
4525 int Vd = instr->VFPDRegValue(kSimd128Precision); 4832 float src1[4], src2[4];
4526 int Vm = instr->VFPMRegValue(kSimd128Precision);
4527 int Vn = instr->VFPNRegValue(kSimd128Precision);
4528 uint32_t dst[4], src1[4], src2[4];
4529 get_q_register(Vd, dst);
4530 get_q_register(Vn, src1);
4531 get_q_register(Vm, src2);
4532 for (int i = 0; i < 4; i++) {
4533 dst[i] = (dst[i] & src1[i]) | (~dst[i] & src2[i]);
4534 }
4535 set_q_register(Vd, dst);
4536 } else if (instr->Bits(11, 8) == 1 && instr->Bits(21, 20) == 0 &&
4537 instr->Bit(4) == 1) {
4538 if (instr->Bit(6) == 0) {
4539 // veor Dd, Dn, Dm
4540 int Vd = instr->VFPDRegValue(kDoublePrecision);
4541 int Vn = instr->VFPNRegValue(kDoublePrecision);
4542 int Vm = instr->VFPMRegValue(kDoublePrecision);
4543 uint64_t src1, src2;
4544 get_d_register(Vn, &src1);
4545 get_d_register(Vm, &src2);
4546 src1 ^= src2;
4547 set_d_register(Vd, &src1);
4548
4549 } else {
4550 // veor Qd, Qn, Qm
4551 int Vd = instr->VFPDRegValue(kSimd128Precision);
4552 int Vn = instr->VFPNRegValue(kSimd128Precision);
4553 int Vm = instr->VFPMRegValue(kSimd128Precision);
4554 uint32_t src1[4], src2[4];
4555 get_q_register(Vn, src1);
4556 get_q_register(Vm, src2);
4557 for (int i = 0; i < 4; i++) src1[i] ^= src2[i];
4558 set_q_register(Vd, src1);
4559 }
4560 } else if (instr->Bits(11, 8) == 0xd && instr->Bit(21) == 0 &&
4561 instr->Bit(6) == 1 && instr->Bit(4) == 1) {
4562 // vmul.f32 Qd, Qn, Qm
4563 int Vd = instr->VFPDRegValue(kSimd128Precision);
4564 int Vn = instr->VFPNRegValue(kSimd128Precision);
4565 int Vm = instr->VFPMRegValue(kSimd128Precision);
4566 float src1[4], src2[4];
4567 get_q_register(Vn, src1);
4568 get_q_register(Vm, src2);
4569 for (int i = 0; i < 4; i++) {
4570 src1[i] = src1[i] * src2[i];
4571 }
4572 set_q_register(Vd, src1);
4573 } else if (instr->Bits(11, 8) == 0xe && instr->Bit(20) == 0 &&
4574 instr->Bit(4) == 0) {
4575 // vcge/vcgt.f32 Qd, Qm, Qn
4576 bool ge = instr->Bit(21) == 0;
4577 int Vd = instr->VFPDRegValue(kSimd128Precision);
4578 int Vm = instr->VFPMRegValue(kSimd128Precision);
4579 int Vn = instr->VFPNRegValue(kSimd128Precision);
4580 float src1[4], src2[4];
4581 get_q_register(Vn, src1);
4582 get_q_register(Vm, src2);
4583 uint32_t dst[4];
4584 for (int i = 0; i < 4; i++) {
4585 if (ge) {
4586 dst[i] = src1[i] >= src2[i] ? 0xFFFFFFFFu : 0;
4587 } else {
4588 dst[i] = src1[i] > src2[i] ? 0xFFFFFFFFu : 0;
4589 }
4590 }
4591 set_q_register(Vd, dst);
4592 } else if (instr->Bits(11, 8) == 0x3) {
4593 // vcge/vcgt.u<size> Qd, Qm, Qn.
4594 bool ge = instr->Bit(4) == 1;
4595 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
4596 int Vd = instr->VFPDRegValue(kSimd128Precision);
4597 int Vm = instr->VFPMRegValue(kSimd128Precision);
4598 int Vn = instr->VFPNRegValue(kSimd128Precision);
4599 switch (size) {
4600 case Neon8: {
4601 uint8_t src1[16], src2[16];
4602 get_q_register(Vn, src1); 4833 get_q_register(Vn, src1);
4603 get_q_register(Vm, src2); 4834 get_q_register(Vm, src2);
4604 for (int i = 0; i < 16; i++) { 4835 uint32_t dst[4];
4605 if (ge)
4606 src1[i] = src1[i] >= src2[i] ? 0xFFu : 0;
4607 else
4608 src1[i] = src1[i] > src2[i] ? 0xFFu : 0;
4609 }
4610 set_q_register(Vd, src1);
4611 break;
4612 }
4613 case Neon16: {
4614 uint16_t src1[8], src2[8];
4615 get_q_register(Vn, src1);
4616 get_q_register(Vm, src2);
4617 for (int i = 0; i < 8; i++) {
4618 if (ge)
4619 src1[i] = src1[i] >= src2[i] ? 0xFFFFu : 0;
4620 else
4621 src1[i] = src1[i] > src2[i] ? 0xFFFFu : 0;
4622 }
4623 set_q_register(Vd, src1);
4624 break;
4625 }
4626 case Neon32: {
4627 uint32_t src1[4], src2[4];
4628 get_q_register(Vn, src1);
4629 get_q_register(Vm, src2);
4630 for (int i = 0; i < 4; i++) { 4836 for (int i = 0; i < 4; i++) {
4631 if (ge) 4837 if (ge) {
4632 src1[i] = src1[i] >= src2[i] ? 0xFFFFFFFFu : 0; 4838 dst[i] = src1[i] >= src2[i] ? 0xFFFFFFFFu : 0;
4633 else 4839 } else {
4634 src1[i] = src1[i] > src2[i] ? 0xFFFFFFFFu : 0; 4840 dst[i] = src1[i] > src2[i] ? 0xFFFFFFFFu : 0;
4635 } 4841 }
4636 set_q_register(Vd, src1); 4842 }
4637 break; 4843 set_q_register(Vd, dst);
4638 } 4844 } else {
4639 default: 4845 UNIMPLEMENTED();
4640 UNREACHABLE(); 4846 }
4641 break; 4847 break;
4642 } 4848 }
4643 } else if (instr->Bits(11, 8) == 0x6) { 4849 default:
4644 // vmin/vmax.u<size> Qd, Qm, Qn. 4850 UNREACHABLE();
4645 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); 4851 break;
4646 int Vd = instr->VFPDRegValue(kSimd128Precision);
4647 int Vm = instr->VFPMRegValue(kSimd128Precision);
4648 int Vn = instr->VFPNRegValue(kSimd128Precision);
4649 bool min = instr->Bit(4) != 0;
4650 switch (size) {
4651 case Neon8: {
4652 uint8_t src1[16], src2[16];
4653 get_q_register(Vn, src1);
4654 get_q_register(Vm, src2);
4655 for (int i = 0; i < 16; i++) {
4656 if (min)
4657 src1[i] = std::min(src1[i], src2[i]);
4658 else
4659 src1[i] = std::max(src1[i], src2[i]);
4660 }
4661 set_q_register(Vd, src1);
4662 break;
4663 }
4664 case Neon16: {
4665 uint16_t src1[8], src2[8];
4666 get_q_register(Vn, src1);
4667 get_q_register(Vm, src2);
4668 for (int i = 0; i < 8; i++) {
4669 if (min)
4670 src1[i] = std::min(src1[i], src2[i]);
4671 else
4672 src1[i] = std::max(src1[i], src2[i]);
4673 }
4674 set_q_register(Vd, src1);
4675 break;
4676 }
4677 case Neon32: {
4678 uint32_t src1[4], src2[4];
4679 get_q_register(Vn, src1);
4680 get_q_register(Vm, src2);
4681 for (int i = 0; i < 4; i++) {
4682 if (min)
4683 src1[i] = std::min(src1[i], src2[i]);
4684 else
4685 src1[i] = std::max(src1[i], src2[i]);
4686 }
4687 set_q_register(Vd, src1);
4688 break;
4689 }
4690 default:
4691 UNREACHABLE();
4692 break;
4693 }
4694 } else {
4695 UNIMPLEMENTED();
4696 } 4852 }
4697 break; 4853 break;
4854 }
4698 case 7: 4855 case 7:
4699 if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) && 4856 if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) &&
4700 (instr->Bit(4) == 1)) { 4857 (instr->Bit(4) == 1)) {
4701 // vmovl unsigned 4858 // vmovl unsigned
4702 if ((instr->VdValue() & 1) != 0) UNIMPLEMENTED(); 4859 if ((instr->VdValue() & 1) != 0) UNIMPLEMENTED();
4703 int Vd = (instr->Bit(22) << 3) | (instr->VdValue() >> 1); 4860 int Vd = (instr->Bit(22) << 3) | (instr->VdValue() >> 1);
4704 int Vm = (instr->Bit(5) << 4) | instr->VmValue(); 4861 int Vm = (instr->Bit(5) << 4) | instr->VmValue();
4705 int imm3 = instr->Bits(21, 19); 4862 int imm3 = instr->Bits(21, 19);
4706 if ((imm3 != 1) && (imm3 != 2) && (imm3 != 4)) UNIMPLEMENTED(); 4863 if ((imm3 != 1) && (imm3 != 2) && (imm3 != 4)) UNIMPLEMENTED();
4707 int esize = 8 * imm3; 4864 int esize = 8 * imm3;
(...skipping 1115 matching lines...) Expand 10 before | Expand all | Expand 10 after
5823 processor->prev_ = nullptr; 5980 processor->prev_ = nullptr;
5824 processor->next_ = nullptr; 5981 processor->next_ = nullptr;
5825 } 5982 }
5826 5983
5827 } // namespace internal 5984 } // namespace internal
5828 } // namespace v8 5985 } // namespace v8
5829 5986
5830 #endif // USE_SIMULATOR 5987 #endif // USE_SIMULATOR
5831 5988
5832 #endif // V8_TARGET_ARCH_ARM 5989 #endif // V8_TARGET_ARCH_ARM
OLDNEW
« no previous file with comments | « src/arm/disasm-arm.cc ('k') | test/cctest/test-assembler-arm.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698