Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(381)

Side by Side Diff: src/arm/simulator-arm.cc

Issue 2711863002: Implement remaining Boolean SIMD operations on ARM. (Closed)
Patch Set: Martyn's review changes. Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include <stdarg.h> 5 #include <stdarg.h>
6 #include <stdlib.h> 6 #include <stdlib.h>
7 #include <cmath> 7 #include <cmath>
8 8
9 #if V8_TARGET_ARCH_ARM 9 #if V8_TARGET_ARCH_ARM
10 10
(...skipping 882 matching lines...) Expand 10 before | Expand all | Expand 10 after
893 memcpy(value, vfp_registers_ + dreg * 2, sizeof(*value) * 2); 893 memcpy(value, vfp_registers_ + dreg * 2, sizeof(*value) * 2);
894 } 894 }
895 895
896 896
897 void Simulator::set_d_register(int dreg, const uint32_t* value) { 897 void Simulator::set_d_register(int dreg, const uint32_t* value) {
898 DCHECK((dreg >= 0) && (dreg < DwVfpRegister::NumRegisters())); 898 DCHECK((dreg >= 0) && (dreg < DwVfpRegister::NumRegisters()));
899 memcpy(vfp_registers_ + dreg * 2, value, sizeof(*value) * 2); 899 memcpy(vfp_registers_ + dreg * 2, value, sizeof(*value) * 2);
900 } 900 }
901 901
902 template <typename T> 902 template <typename T>
903 void Simulator::get_d_register(int dreg, T* value) {
904 DCHECK((dreg >= 0) && (dreg < num_d_registers));
905 memcpy(value, vfp_registers_ + dreg * 2, kDoubleSize);
906 }
907
908 template <typename T>
909 void Simulator::set_d_register(int dreg, const T* value) {
910 DCHECK((dreg >= 0) && (dreg < num_d_registers));
911 memcpy(vfp_registers_ + dreg * 2, value, kDoubleSize);
912 }
913
914 template <typename T>
903 void Simulator::get_q_register(int qreg, T* value) { 915 void Simulator::get_q_register(int qreg, T* value) {
904 DCHECK((qreg >= 0) && (qreg < num_q_registers)); 916 DCHECK((qreg >= 0) && (qreg < num_q_registers));
905 memcpy(value, vfp_registers_ + qreg * 4, kSimd128Size); 917 memcpy(value, vfp_registers_ + qreg * 4, kSimd128Size);
906 } 918 }
907 919
908 template <typename T> 920 template <typename T>
909 void Simulator::set_q_register(int qreg, const T* value) { 921 void Simulator::set_q_register(int qreg, const T* value) {
910 DCHECK((qreg >= 0) && (qreg < num_q_registers)); 922 DCHECK((qreg >= 0) && (qreg < num_q_registers));
911 memcpy(vfp_registers_ + qreg * 4, value, kSimd128Size); 923 memcpy(vfp_registers_ + qreg * 4, value, kSimd128Size);
912 } 924 }
913 925
914
915 // Raw access to the PC register. 926 // Raw access to the PC register.
916 void Simulator::set_pc(int32_t value) { 927 void Simulator::set_pc(int32_t value) {
917 pc_modified_ = true; 928 pc_modified_ = true;
918 registers_[pc] = value; 929 registers_[pc] = value;
919 } 930 }
920 931
921 932
922 bool Simulator::has_bad_pc() const { 933 bool Simulator::has_bad_pc() const {
923 return ((registers_[pc] == bad_lr) || (registers_[pc] == end_sim_pc)); 934 return ((registers_[pc] == bad_lr) || (registers_[pc] == end_sim_pc));
924 } 935 }
(...skipping 3074 matching lines...) Expand 10 before | Expand all | Expand 10 after
3999 template <typename T> 4010 template <typename T>
4000 T Clamp(int64_t value) { 4011 T Clamp(int64_t value) {
4001 static_assert(sizeof(int64_t) > sizeof(T), "T must be int32_t or smaller"); 4012 static_assert(sizeof(int64_t) > sizeof(T), "T must be int32_t or smaller");
4002 int64_t min = static_cast<int64_t>(std::numeric_limits<T>::min()); 4013 int64_t min = static_cast<int64_t>(std::numeric_limits<T>::min());
4003 int64_t max = static_cast<int64_t>(std::numeric_limits<T>::max()); 4014 int64_t max = static_cast<int64_t>(std::numeric_limits<T>::max());
4004 int64_t clamped = std::max(min, std::min(max, value)); 4015 int64_t clamped = std::max(min, std::min(max, value));
4005 return static_cast<T>(clamped); 4016 return static_cast<T>(clamped);
4006 } 4017 }
4007 4018
4008 template <typename T> 4019 template <typename T>
4020 T MinMax(T a, T b, bool is_min) {
4021 return is_min ? std::min(a, b) : std::max(a, b);
4022 }
4023
4024 template <typename T>
4009 void AddSaturate(Simulator* simulator, int Vd, int Vm, int Vn) { 4025 void AddSaturate(Simulator* simulator, int Vd, int Vm, int Vn) {
4010 static const int kLanes = 16 / sizeof(T); 4026 static const int kLanes = 16 / sizeof(T);
4011 T src1[kLanes], src2[kLanes]; 4027 T src1[kLanes], src2[kLanes];
4012 simulator->get_q_register(Vn, src1); 4028 simulator->get_q_register(Vn, src1);
4013 simulator->get_q_register(Vm, src2); 4029 simulator->get_q_register(Vm, src2);
4014 for (int i = 0; i < kLanes; i++) { 4030 for (int i = 0; i < kLanes; i++) {
4015 src1[i] = Clamp<T>(Widen(src1[i]) + Widen(src2[i])); 4031 src1[i] = Clamp<T>(Widen(src1[i]) + Widen(src2[i]));
4016 } 4032 }
4017 simulator->set_q_register(Vd, src1); 4033 simulator->set_q_register(Vd, src1);
4018 } 4034 }
(...skipping 153 matching lines...) Expand 10 before | Expand all | Expand 10 after
4172 case 0x6: { 4188 case 0x6: {
4173 // vmin/vmax.s<size> Qd, Qm, Qn. 4189 // vmin/vmax.s<size> Qd, Qm, Qn.
4174 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); 4190 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
4175 bool min = instr->Bit(4) != 0; 4191 bool min = instr->Bit(4) != 0;
4176 switch (size) { 4192 switch (size) {
4177 case Neon8: { 4193 case Neon8: {
4178 int8_t src1[16], src2[16]; 4194 int8_t src1[16], src2[16];
4179 get_q_register(Vn, src1); 4195 get_q_register(Vn, src1);
4180 get_q_register(Vm, src2); 4196 get_q_register(Vm, src2);
4181 for (int i = 0; i < 16; i++) { 4197 for (int i = 0; i < 16; i++) {
4182 if (min) 4198 src1[i] = MinMax(src1[i], src2[i], min);
4183 src1[i] = std::min(src1[i], src2[i]);
4184 else
4185 src1[i] = std::max(src1[i], src2[i]);
4186 } 4199 }
4187 set_q_register(Vd, src1); 4200 set_q_register(Vd, src1);
4188 break; 4201 break;
4189 } 4202 }
4190 case Neon16: { 4203 case Neon16: {
4191 int16_t src1[8], src2[8]; 4204 int16_t src1[8], src2[8];
4192 get_q_register(Vn, src1); 4205 get_q_register(Vn, src1);
4193 get_q_register(Vm, src2); 4206 get_q_register(Vm, src2);
4194 for (int i = 0; i < 8; i++) { 4207 for (int i = 0; i < 8; i++) {
4195 if (min) 4208 src1[i] = MinMax(src1[i], src2[i], min);
4196 src1[i] = std::min(src1[i], src2[i]);
4197 else
4198 src1[i] = std::max(src1[i], src2[i]);
4199 } 4209 }
4200 set_q_register(Vd, src1); 4210 set_q_register(Vd, src1);
4201 break; 4211 break;
4202 } 4212 }
4203 case Neon32: { 4213 case Neon32: {
4204 int32_t src1[4], src2[4]; 4214 int32_t src1[4], src2[4];
4205 get_q_register(Vn, src1); 4215 get_q_register(Vn, src1);
4206 get_q_register(Vm, src2); 4216 get_q_register(Vm, src2);
4207 for (int i = 0; i < 4; i++) { 4217 for (int i = 0; i < 4; i++) {
4208 if (min) 4218 src1[i] = MinMax(src1[i], src2[i], min);
4209 src1[i] = std::min(src1[i], src2[i]);
4210 else
4211 src1[i] = std::max(src1[i], src2[i]);
4212 } 4219 }
4213 set_q_register(Vd, src1); 4220 set_q_register(Vd, src1);
4214 break; 4221 break;
4215 } 4222 }
4216 default: 4223 default:
4217 UNREACHABLE(); 4224 UNREACHABLE();
4218 break; 4225 break;
4219 } 4226 }
4220 break; 4227 break;
4221 } 4228 }
(...skipping 114 matching lines...) Expand 10 before | Expand all | Expand 10 after
4336 } 4343 }
4337 default: 4344 default:
4338 UNREACHABLE(); 4345 UNREACHABLE();
4339 break; 4346 break;
4340 } 4347 }
4341 } else { 4348 } else {
4342 UNIMPLEMENTED(); 4349 UNIMPLEMENTED();
4343 } 4350 }
4344 break; 4351 break;
4345 } 4352 }
4353 case 0xa: {
4354 // vpmin/vpmax.s<size> Dd, Dm, Dn.
4355 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
4356 bool min = instr->Bit(4) != 0;
4357 switch (size) {
4358 case Neon8: {
4359 int8_t dst[8], src1[8], src2[8];
4360 get_d_register(Vn, src1);
4361 get_d_register(Vm, src2);
4362 for (int i = 0; i < 4; i++) {
4363 dst[i + 0] = MinMax(src1[i * 2], src1[i * 2 + 1], min);
4364 dst[i + 4] = MinMax(src2[i * 2], src2[i * 2 + 1], min);
4365 }
4366 set_d_register(Vd, dst);
4367 break;
4368 }
4369 case Neon16: {
4370 int16_t dst[4], src1[4], src2[4];
4371 get_d_register(Vn, src1);
4372 get_d_register(Vm, src2);
4373 for (int i = 0; i < 2; i++) {
4374 dst[i + 0] = MinMax(src1[i * 2], src1[i * 2 + 1], min);
4375 dst[i + 2] = MinMax(src2[i * 2], src2[i * 2 + 1], min);
4376 }
4377 set_d_register(Vd, dst);
4378 break;
4379 }
4380 case Neon32: {
4381 int32_t dst[2], src1[2], src2[2];
4382 get_d_register(Vn, src1);
4383 get_d_register(Vm, src2);
4384 dst[0] = MinMax(src1[0], src1[1], min);
4385 dst[1] = MinMax(src2[0], src2[1], min);
4386 set_d_register(Vd, dst);
4387 break;
4388 }
4389 default:
4390 UNREACHABLE();
4391 break;
4392 }
4393 break;
4394 }
4346 case 0xd: { 4395 case 0xd: {
4347 if (instr->Bit(4) == 0) { 4396 if (instr->Bit(4) == 0) {
4348 float src1[4], src2[4]; 4397 float src1[4], src2[4];
4349 get_q_register(Vn, src1); 4398 get_q_register(Vn, src1);
4350 get_q_register(Vm, src2); 4399 get_q_register(Vm, src2);
4351 for (int i = 0; i < 4; i++) { 4400 for (int i = 0; i < 4; i++) {
4352 if (instr->Bit(21) == 0) { 4401 if (instr->Bit(21) == 0) {
4353 // vadd.f32 Qd, Qm, Qn. 4402 // vadd.f32 Qd, Qm, Qn.
4354 src1[i] = src1[i] + src2[i]; 4403 src1[i] = src1[i] + src2[i];
4355 } else { 4404 } else {
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
4390 for (int i = 0; i < 4; i++) { 4439 for (int i = 0; i < 4; i++) {
4391 src1[i] = 2.0f - src1[i] * src2[i]; 4440 src1[i] = 2.0f - src1[i] * src2[i];
4392 } 4441 }
4393 } else { 4442 } else {
4394 // vrsqrts.f32 Qd, Qm, Qn. 4443 // vrsqrts.f32 Qd, Qm, Qn.
4395 for (int i = 0; i < 4; i++) { 4444 for (int i = 0; i < 4; i++) {
4396 src1[i] = (3.0f - src1[i] * src2[i]) * 0.5f; 4445 src1[i] = (3.0f - src1[i] * src2[i]) * 0.5f;
4397 } 4446 }
4398 } 4447 }
4399 } else { 4448 } else {
4400 if (instr->Bit(21) == 1) { 4449 // vmin/vmax.f32 Qd, Qm, Qn.
4401 // vmin.f32 Qd, Qm, Qn. 4450 bool min = instr->Bit(21) == 1;
4402 for (int i = 0; i < 4; i++) { 4451 for (int i = 0; i < 4; i++) {
4403 src1[i] = std::min(src1[i], src2[i]); 4452 src1[i] = MinMax(src1[i], src2[i], min);
4404 }
4405 } else {
4406 // vmax.f32 Qd, Qm, Qn.
4407 for (int i = 0; i < 4; i++) {
4408 src1[i] = std::max(src1[i], src2[i]);
4409 }
4410 } 4453 }
4411 } 4454 }
4412 set_q_register(Vd, src1); 4455 set_q_register(Vd, src1);
4413 } else { 4456 } else {
4414 UNIMPLEMENTED(); 4457 UNIMPLEMENTED();
4415 } 4458 }
4416 break; 4459 break;
4417 } 4460 }
4418 default: 4461 default:
4419 UNIMPLEMENTED(); 4462 UNIMPLEMENTED();
(...skipping 265 matching lines...) Expand 10 before | Expand all | Expand 10 after
4685 case 0x6: { 4728 case 0x6: {
4686 // vmin/vmax.u<size> Qd, Qm, Qn. 4729 // vmin/vmax.u<size> Qd, Qm, Qn.
4687 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); 4730 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
4688 bool min = instr->Bit(4) != 0; 4731 bool min = instr->Bit(4) != 0;
4689 switch (size) { 4732 switch (size) {
4690 case Neon8: { 4733 case Neon8: {
4691 uint8_t src1[16], src2[16]; 4734 uint8_t src1[16], src2[16];
4692 get_q_register(Vn, src1); 4735 get_q_register(Vn, src1);
4693 get_q_register(Vm, src2); 4736 get_q_register(Vm, src2);
4694 for (int i = 0; i < 16; i++) { 4737 for (int i = 0; i < 16; i++) {
4695 if (min) 4738 src1[i] = MinMax(src1[i], src2[i], min);
4696 src1[i] = std::min(src1[i], src2[i]);
4697 else
4698 src1[i] = std::max(src1[i], src2[i]);
4699 } 4739 }
4700 set_q_register(Vd, src1); 4740 set_q_register(Vd, src1);
4701 break; 4741 break;
4702 } 4742 }
4703 case Neon16: { 4743 case Neon16: {
4704 uint16_t src1[8], src2[8]; 4744 uint16_t src1[8], src2[8];
4705 get_q_register(Vn, src1); 4745 get_q_register(Vn, src1);
4706 get_q_register(Vm, src2); 4746 get_q_register(Vm, src2);
4707 for (int i = 0; i < 8; i++) { 4747 for (int i = 0; i < 8; i++) {
4708 if (min) 4748 src1[i] = MinMax(src1[i], src2[i], min);
4709 src1[i] = std::min(src1[i], src2[i]);
4710 else
4711 src1[i] = std::max(src1[i], src2[i]);
4712 } 4749 }
4713 set_q_register(Vd, src1); 4750 set_q_register(Vd, src1);
4714 break; 4751 break;
4715 } 4752 }
4716 case Neon32: { 4753 case Neon32: {
4717 uint32_t src1[4], src2[4]; 4754 uint32_t src1[4], src2[4];
4718 get_q_register(Vn, src1); 4755 get_q_register(Vn, src1);
4719 get_q_register(Vm, src2); 4756 get_q_register(Vm, src2);
4720 for (int i = 0; i < 4; i++) { 4757 for (int i = 0; i < 4; i++) {
4721 if (min) 4758 src1[i] = MinMax(src1[i], src2[i], min);
4722 src1[i] = std::min(src1[i], src2[i]);
4723 else
4724 src1[i] = std::max(src1[i], src2[i]);
4725 } 4759 }
4726 set_q_register(Vd, src1); 4760 set_q_register(Vd, src1);
4727 break; 4761 break;
4728 } 4762 }
4729 default: 4763 default:
4730 UNREACHABLE(); 4764 UNREACHABLE();
4731 break; 4765 break;
4732 } 4766 }
4733 break; 4767 break;
4734 } 4768 }
(...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after
4805 set_q_register(Vd, src1); 4839 set_q_register(Vd, src1);
4806 break; 4840 break;
4807 } 4841 }
4808 default: 4842 default:
4809 UNREACHABLE(); 4843 UNREACHABLE();
4810 break; 4844 break;
4811 } 4845 }
4812 } 4846 }
4813 break; 4847 break;
4814 } 4848 }
4849 case 0xa: {
4850 // vpmin/vpmax.u<size> Dd, Dm, Dn.
4851 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
4852 bool min = instr->Bit(4) != 0;
4853 switch (size) {
4854 case Neon8: {
4855 uint8_t dst[8], src1[8], src2[8];
4856 get_d_register(Vn, src1);
4857 get_d_register(Vm, src2);
4858 for (int i = 0; i < 4; i++) {
4859 dst[i + 0] = MinMax(src1[i * 2], src1[i * 2 + 1], min);
4860 dst[i + 4] = MinMax(src2[i * 2], src2[i * 2 + 1], min);
4861 }
4862 set_d_register(Vd, dst);
4863 break;
4864 }
4865 case Neon16: {
4866 uint16_t dst[4], src1[4], src2[4];
4867 get_d_register(Vn, src1);
4868 get_d_register(Vm, src2);
4869 for (int i = 0; i < 2; i++) {
4870 dst[i + 0] = MinMax(src1[i * 2], src1[i * 2 + 1], min);
4871 dst[i + 2] = MinMax(src2[i * 2], src2[i * 2 + 1], min);
4872 }
4873 set_d_register(Vd, dst);
4874 break;
4875 }
4876 case Neon32: {
4877 uint32_t dst[2], src1[2], src2[2];
4878 get_d_register(Vn, src1);
4879 get_d_register(Vm, src2);
4880 dst[0] = MinMax(src1[0], src1[1], min);
4881 dst[1] = MinMax(src2[0], src2[1], min);
4882 set_d_register(Vd, dst);
4883 break;
4884 }
4885 default:
4886 UNREACHABLE();
4887 break;
4888 }
4889 break;
4890 }
4815 case 0xd: { 4891 case 0xd: {
4816 if (instr->Bit(21) == 0 && instr->Bit(6) == 1 && instr->Bit(4) == 1) { 4892 if (instr->Bit(21) == 0 && instr->Bit(6) == 1 && instr->Bit(4) == 1) {
4817 // vmul.f32 Qd, Qn, Qm 4893 // vmul.f32 Qd, Qn, Qm
4818 float src1[4], src2[4]; 4894 float src1[4], src2[4];
4819 get_q_register(Vn, src1); 4895 get_q_register(Vn, src1);
4820 get_q_register(Vm, src2); 4896 get_q_register(Vm, src2);
4821 for (int i = 0; i < 4; i++) { 4897 for (int i = 0; i < 4; i++) {
4822 src1[i] = src1[i] * src2[i]; 4898 src1[i] = src1[i] * src2[i];
4823 } 4899 }
4824 set_q_register(Vd, src1); 4900 set_q_register(Vd, src1);
(...skipping 1157 matching lines...) Expand 10 before | Expand all | Expand 10 after
5982 processor->prev_ = nullptr; 6058 processor->prev_ = nullptr;
5983 processor->next_ = nullptr; 6059 processor->next_ = nullptr;
5984 } 6060 }
5985 6061
5986 } // namespace internal 6062 } // namespace internal
5987 } // namespace v8 6063 } // namespace v8
5988 6064
5989 #endif // USE_SIMULATOR 6065 #endif // USE_SIMULATOR
5990 6066
5991 #endif // V8_TARGET_ARCH_ARM 6067 #endif // V8_TARGET_ARCH_ARM
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698