Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(4)

Side by Side Diff: src/arm/simulator-arm.cc

Issue 2711863002: Implement remaining Boolean SIMD operations on ARM. (Closed)
Patch Set: Fix macro assembler test. Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/arm/simulator-arm.h ('k') | src/compiler/arm/code-generator-arm.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include <stdarg.h> 5 #include <stdarg.h>
6 #include <stdlib.h> 6 #include <stdlib.h>
7 #include <cmath> 7 #include <cmath>
8 8
9 #if V8_TARGET_ARCH_ARM 9 #if V8_TARGET_ARCH_ARM
10 10
(...skipping 883 matching lines...) Expand 10 before | Expand all | Expand 10 after
894 memcpy(value, vfp_registers_ + dreg * 2, sizeof(*value) * 2); 894 memcpy(value, vfp_registers_ + dreg * 2, sizeof(*value) * 2);
895 } 895 }
896 896
897 897
898 void Simulator::set_d_register(int dreg, const uint32_t* value) { 898 void Simulator::set_d_register(int dreg, const uint32_t* value) {
899 DCHECK((dreg >= 0) && (dreg < DwVfpRegister::NumRegisters())); 899 DCHECK((dreg >= 0) && (dreg < DwVfpRegister::NumRegisters()));
900 memcpy(vfp_registers_ + dreg * 2, value, sizeof(*value) * 2); 900 memcpy(vfp_registers_ + dreg * 2, value, sizeof(*value) * 2);
901 } 901 }
902 902
903 template <typename T> 903 template <typename T>
904 void Simulator::get_d_register(int dreg, T* value) {
905 DCHECK((dreg >= 0) && (dreg < num_d_registers));
906 memcpy(value, vfp_registers_ + dreg * 2, kDoubleSize);
907 }
908
909 template <typename T>
910 void Simulator::set_d_register(int dreg, const T* value) {
911 DCHECK((dreg >= 0) && (dreg < num_d_registers));
912 memcpy(vfp_registers_ + dreg * 2, value, kDoubleSize);
913 }
914
915 template <typename T>
904 void Simulator::get_q_register(int qreg, T* value) { 916 void Simulator::get_q_register(int qreg, T* value) {
905 DCHECK((qreg >= 0) && (qreg < num_q_registers)); 917 DCHECK((qreg >= 0) && (qreg < num_q_registers));
906 memcpy(value, vfp_registers_ + qreg * 4, kSimd128Size); 918 memcpy(value, vfp_registers_ + qreg * 4, kSimd128Size);
907 } 919 }
908 920
909 template <typename T> 921 template <typename T>
910 void Simulator::set_q_register(int qreg, const T* value) { 922 void Simulator::set_q_register(int qreg, const T* value) {
911 DCHECK((qreg >= 0) && (qreg < num_q_registers)); 923 DCHECK((qreg >= 0) && (qreg < num_q_registers));
912 memcpy(vfp_registers_ + qreg * 4, value, kSimd128Size); 924 memcpy(vfp_registers_ + qreg * 4, value, kSimd128Size);
913 } 925 }
914 926
915
916 // Raw access to the PC register. 927 // Raw access to the PC register.
917 void Simulator::set_pc(int32_t value) { 928 void Simulator::set_pc(int32_t value) {
918 pc_modified_ = true; 929 pc_modified_ = true;
919 registers_[pc] = value; 930 registers_[pc] = value;
920 } 931 }
921 932
922 933
923 bool Simulator::has_bad_pc() const { 934 bool Simulator::has_bad_pc() const {
924 return ((registers_[pc] == bad_lr) || (registers_[pc] == end_sim_pc)); 935 return ((registers_[pc] == bad_lr) || (registers_[pc] == end_sim_pc));
925 } 936 }
(...skipping 3074 matching lines...) Expand 10 before | Expand all | Expand 10 after
4000 template <typename T> 4011 template <typename T>
4001 T Clamp(int64_t value) { 4012 T Clamp(int64_t value) {
4002 static_assert(sizeof(int64_t) > sizeof(T), "T must be int32_t or smaller"); 4013 static_assert(sizeof(int64_t) > sizeof(T), "T must be int32_t or smaller");
4003 int64_t min = static_cast<int64_t>(std::numeric_limits<T>::min()); 4014 int64_t min = static_cast<int64_t>(std::numeric_limits<T>::min());
4004 int64_t max = static_cast<int64_t>(std::numeric_limits<T>::max()); 4015 int64_t max = static_cast<int64_t>(std::numeric_limits<T>::max());
4005 int64_t clamped = std::max(min, std::min(max, value)); 4016 int64_t clamped = std::max(min, std::min(max, value));
4006 return static_cast<T>(clamped); 4017 return static_cast<T>(clamped);
4007 } 4018 }
4008 4019
4009 template <typename T> 4020 template <typename T>
4021 T MinMax(T a, T b, bool is_min) {
4022 return is_min ? std::min(a, b) : std::max(a, b);
4023 }
4024
4025 template <typename T>
4010 void AddSaturate(Simulator* simulator, int Vd, int Vm, int Vn) { 4026 void AddSaturate(Simulator* simulator, int Vd, int Vm, int Vn) {
4011 static const int kLanes = 16 / sizeof(T); 4027 static const int kLanes = 16 / sizeof(T);
4012 T src1[kLanes], src2[kLanes]; 4028 T src1[kLanes], src2[kLanes];
4013 simulator->get_q_register(Vn, src1); 4029 simulator->get_q_register(Vn, src1);
4014 simulator->get_q_register(Vm, src2); 4030 simulator->get_q_register(Vm, src2);
4015 for (int i = 0; i < kLanes; i++) { 4031 for (int i = 0; i < kLanes; i++) {
4016 src1[i] = Clamp<T>(Widen(src1[i]) + Widen(src2[i])); 4032 src1[i] = Clamp<T>(Widen(src1[i]) + Widen(src2[i]));
4017 } 4033 }
4018 simulator->set_q_register(Vd, src1); 4034 simulator->set_q_register(Vd, src1);
4019 } 4035 }
(...skipping 153 matching lines...) Expand 10 before | Expand all | Expand 10 after
4173 case 0x6: { 4189 case 0x6: {
4174 // vmin/vmax.s<size> Qd, Qm, Qn. 4190 // vmin/vmax.s<size> Qd, Qm, Qn.
4175 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); 4191 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
4176 bool min = instr->Bit(4) != 0; 4192 bool min = instr->Bit(4) != 0;
4177 switch (size) { 4193 switch (size) {
4178 case Neon8: { 4194 case Neon8: {
4179 int8_t src1[16], src2[16]; 4195 int8_t src1[16], src2[16];
4180 get_q_register(Vn, src1); 4196 get_q_register(Vn, src1);
4181 get_q_register(Vm, src2); 4197 get_q_register(Vm, src2);
4182 for (int i = 0; i < 16; i++) { 4198 for (int i = 0; i < 16; i++) {
4183 if (min) 4199 src1[i] = MinMax(src1[i], src2[i], min);
4184 src1[i] = std::min(src1[i], src2[i]);
4185 else
4186 src1[i] = std::max(src1[i], src2[i]);
4187 } 4200 }
4188 set_q_register(Vd, src1); 4201 set_q_register(Vd, src1);
4189 break; 4202 break;
4190 } 4203 }
4191 case Neon16: { 4204 case Neon16: {
4192 int16_t src1[8], src2[8]; 4205 int16_t src1[8], src2[8];
4193 get_q_register(Vn, src1); 4206 get_q_register(Vn, src1);
4194 get_q_register(Vm, src2); 4207 get_q_register(Vm, src2);
4195 for (int i = 0; i < 8; i++) { 4208 for (int i = 0; i < 8; i++) {
4196 if (min) 4209 src1[i] = MinMax(src1[i], src2[i], min);
4197 src1[i] = std::min(src1[i], src2[i]);
4198 else
4199 src1[i] = std::max(src1[i], src2[i]);
4200 } 4210 }
4201 set_q_register(Vd, src1); 4211 set_q_register(Vd, src1);
4202 break; 4212 break;
4203 } 4213 }
4204 case Neon32: { 4214 case Neon32: {
4205 int32_t src1[4], src2[4]; 4215 int32_t src1[4], src2[4];
4206 get_q_register(Vn, src1); 4216 get_q_register(Vn, src1);
4207 get_q_register(Vm, src2); 4217 get_q_register(Vm, src2);
4208 for (int i = 0; i < 4; i++) { 4218 for (int i = 0; i < 4; i++) {
4209 if (min) 4219 src1[i] = MinMax(src1[i], src2[i], min);
4210 src1[i] = std::min(src1[i], src2[i]);
4211 else
4212 src1[i] = std::max(src1[i], src2[i]);
4213 } 4220 }
4214 set_q_register(Vd, src1); 4221 set_q_register(Vd, src1);
4215 break; 4222 break;
4216 } 4223 }
4217 default: 4224 default:
4218 UNREACHABLE(); 4225 UNREACHABLE();
4219 break; 4226 break;
4220 } 4227 }
4221 break; 4228 break;
4222 } 4229 }
(...skipping 114 matching lines...) Expand 10 before | Expand all | Expand 10 after
4337 } 4344 }
4338 default: 4345 default:
4339 UNREACHABLE(); 4346 UNREACHABLE();
4340 break; 4347 break;
4341 } 4348 }
4342 } else { 4349 } else {
4343 UNIMPLEMENTED(); 4350 UNIMPLEMENTED();
4344 } 4351 }
4345 break; 4352 break;
4346 } 4353 }
4354 case 0xa: {
4355 // vpmin/vpmax.s<size> Dd, Dm, Dn.
4356 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
4357 bool min = instr->Bit(4) != 0;
4358 switch (size) {
4359 case Neon8: {
4360 int8_t dst[8], src1[8], src2[8];
4361 get_d_register(Vn, src1);
4362 get_d_register(Vm, src2);
4363 for (int i = 0; i < 4; i++) {
4364 dst[i + 0] = MinMax(src1[i * 2], src1[i * 2 + 1], min);
4365 dst[i + 4] = MinMax(src2[i * 2], src2[i * 2 + 1], min);
4366 }
4367 set_d_register(Vd, dst);
4368 break;
4369 }
4370 case Neon16: {
4371 int16_t dst[4], src1[4], src2[4];
4372 get_d_register(Vn, src1);
4373 get_d_register(Vm, src2);
4374 for (int i = 0; i < 2; i++) {
4375 dst[i + 0] = MinMax(src1[i * 2], src1[i * 2 + 1], min);
4376 dst[i + 2] = MinMax(src2[i * 2], src2[i * 2 + 1], min);
4377 }
4378 set_d_register(Vd, dst);
4379 break;
4380 }
4381 case Neon32: {
4382 int32_t dst[2], src1[2], src2[2];
4383 get_d_register(Vn, src1);
4384 get_d_register(Vm, src2);
4385 dst[0] = MinMax(src1[0], src1[1], min);
4386 dst[1] = MinMax(src2[0], src2[1], min);
4387 set_d_register(Vd, dst);
4388 break;
4389 }
4390 default:
4391 UNREACHABLE();
4392 break;
4393 }
4394 break;
4395 }
4347 case 0xd: { 4396 case 0xd: {
4348 if (instr->Bit(4) == 0) { 4397 if (instr->Bit(4) == 0) {
4349 float src1[4], src2[4]; 4398 float src1[4], src2[4];
4350 get_q_register(Vn, src1); 4399 get_q_register(Vn, src1);
4351 get_q_register(Vm, src2); 4400 get_q_register(Vm, src2);
4352 for (int i = 0; i < 4; i++) { 4401 for (int i = 0; i < 4; i++) {
4353 if (instr->Bit(21) == 0) { 4402 if (instr->Bit(21) == 0) {
4354 // vadd.f32 Qd, Qm, Qn. 4403 // vadd.f32 Qd, Qm, Qn.
4355 src1[i] = src1[i] + src2[i]; 4404 src1[i] = src1[i] + src2[i];
4356 } else { 4405 } else {
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
4391 for (int i = 0; i < 4; i++) { 4440 for (int i = 0; i < 4; i++) {
4392 src1[i] = 2.0f - src1[i] * src2[i]; 4441 src1[i] = 2.0f - src1[i] * src2[i];
4393 } 4442 }
4394 } else { 4443 } else {
4395 // vrsqrts.f32 Qd, Qm, Qn. 4444 // vrsqrts.f32 Qd, Qm, Qn.
4396 for (int i = 0; i < 4; i++) { 4445 for (int i = 0; i < 4; i++) {
4397 src1[i] = (3.0f - src1[i] * src2[i]) * 0.5f; 4446 src1[i] = (3.0f - src1[i] * src2[i]) * 0.5f;
4398 } 4447 }
4399 } 4448 }
4400 } else { 4449 } else {
4401 if (instr->Bit(21) == 1) { 4450 // vmin/vmax.f32 Qd, Qm, Qn.
4402 // vmin.f32 Qd, Qm, Qn. 4451 bool min = instr->Bit(21) == 1;
4403 for (int i = 0; i < 4; i++) { 4452 for (int i = 0; i < 4; i++) {
4404 src1[i] = std::min(src1[i], src2[i]); 4453 src1[i] = MinMax(src1[i], src2[i], min);
4405 }
4406 } else {
4407 // vmax.f32 Qd, Qm, Qn.
4408 for (int i = 0; i < 4; i++) {
4409 src1[i] = std::max(src1[i], src2[i]);
4410 }
4411 } 4454 }
4412 } 4455 }
4413 set_q_register(Vd, src1); 4456 set_q_register(Vd, src1);
4414 } else { 4457 } else {
4415 UNIMPLEMENTED(); 4458 UNIMPLEMENTED();
4416 } 4459 }
4417 break; 4460 break;
4418 } 4461 }
4419 default: 4462 default:
4420 UNIMPLEMENTED(); 4463 UNIMPLEMENTED();
(...skipping 265 matching lines...) Expand 10 before | Expand all | Expand 10 after
4686 case 0x6: { 4729 case 0x6: {
4687 // vmin/vmax.u<size> Qd, Qm, Qn. 4730 // vmin/vmax.u<size> Qd, Qm, Qn.
4688 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); 4731 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
4689 bool min = instr->Bit(4) != 0; 4732 bool min = instr->Bit(4) != 0;
4690 switch (size) { 4733 switch (size) {
4691 case Neon8: { 4734 case Neon8: {
4692 uint8_t src1[16], src2[16]; 4735 uint8_t src1[16], src2[16];
4693 get_q_register(Vn, src1); 4736 get_q_register(Vn, src1);
4694 get_q_register(Vm, src2); 4737 get_q_register(Vm, src2);
4695 for (int i = 0; i < 16; i++) { 4738 for (int i = 0; i < 16; i++) {
4696 if (min) 4739 src1[i] = MinMax(src1[i], src2[i], min);
4697 src1[i] = std::min(src1[i], src2[i]);
4698 else
4699 src1[i] = std::max(src1[i], src2[i]);
4700 } 4740 }
4701 set_q_register(Vd, src1); 4741 set_q_register(Vd, src1);
4702 break; 4742 break;
4703 } 4743 }
4704 case Neon16: { 4744 case Neon16: {
4705 uint16_t src1[8], src2[8]; 4745 uint16_t src1[8], src2[8];
4706 get_q_register(Vn, src1); 4746 get_q_register(Vn, src1);
4707 get_q_register(Vm, src2); 4747 get_q_register(Vm, src2);
4708 for (int i = 0; i < 8; i++) { 4748 for (int i = 0; i < 8; i++) {
4709 if (min) 4749 src1[i] = MinMax(src1[i], src2[i], min);
4710 src1[i] = std::min(src1[i], src2[i]);
4711 else
4712 src1[i] = std::max(src1[i], src2[i]);
4713 } 4750 }
4714 set_q_register(Vd, src1); 4751 set_q_register(Vd, src1);
4715 break; 4752 break;
4716 } 4753 }
4717 case Neon32: { 4754 case Neon32: {
4718 uint32_t src1[4], src2[4]; 4755 uint32_t src1[4], src2[4];
4719 get_q_register(Vn, src1); 4756 get_q_register(Vn, src1);
4720 get_q_register(Vm, src2); 4757 get_q_register(Vm, src2);
4721 for (int i = 0; i < 4; i++) { 4758 for (int i = 0; i < 4; i++) {
4722 if (min) 4759 src1[i] = MinMax(src1[i], src2[i], min);
4723 src1[i] = std::min(src1[i], src2[i]);
4724 else
4725 src1[i] = std::max(src1[i], src2[i]);
4726 } 4760 }
4727 set_q_register(Vd, src1); 4761 set_q_register(Vd, src1);
4728 break; 4762 break;
4729 } 4763 }
4730 default: 4764 default:
4731 UNREACHABLE(); 4765 UNREACHABLE();
4732 break; 4766 break;
4733 } 4767 }
4734 break; 4768 break;
4735 } 4769 }
(...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after
4806 set_q_register(Vd, src1); 4840 set_q_register(Vd, src1);
4807 break; 4841 break;
4808 } 4842 }
4809 default: 4843 default:
4810 UNREACHABLE(); 4844 UNREACHABLE();
4811 break; 4845 break;
4812 } 4846 }
4813 } 4847 }
4814 break; 4848 break;
4815 } 4849 }
4850 case 0xa: {
4851 // vpmin/vpmax.u<size> Dd, Dm, Dn.
4852 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
4853 bool min = instr->Bit(4) != 0;
4854 switch (size) {
4855 case Neon8: {
4856 uint8_t dst[8], src1[8], src2[8];
4857 get_d_register(Vn, src1);
4858 get_d_register(Vm, src2);
4859 for (int i = 0; i < 4; i++) {
4860 dst[i + 0] = MinMax(src1[i * 2], src1[i * 2 + 1], min);
4861 dst[i + 4] = MinMax(src2[i * 2], src2[i * 2 + 1], min);
4862 }
4863 set_d_register(Vd, dst);
4864 break;
4865 }
4866 case Neon16: {
4867 uint16_t dst[4], src1[4], src2[4];
4868 get_d_register(Vn, src1);
4869 get_d_register(Vm, src2);
4870 for (int i = 0; i < 2; i++) {
4871 dst[i + 0] = MinMax(src1[i * 2], src1[i * 2 + 1], min);
4872 dst[i + 2] = MinMax(src2[i * 2], src2[i * 2 + 1], min);
4873 }
4874 set_d_register(Vd, dst);
4875 break;
4876 }
4877 case Neon32: {
4878 uint32_t dst[2], src1[2], src2[2];
4879 get_d_register(Vn, src1);
4880 get_d_register(Vm, src2);
4881 dst[0] = MinMax(src1[0], src1[1], min);
4882 dst[1] = MinMax(src2[0], src2[1], min);
4883 set_d_register(Vd, dst);
4884 break;
4885 }
4886 default:
4887 UNREACHABLE();
4888 break;
4889 }
4890 break;
4891 }
4816 case 0xd: { 4892 case 0xd: {
4817 if (instr->Bit(21) == 0 && instr->Bit(6) == 1 && instr->Bit(4) == 1) { 4893 if (instr->Bit(21) == 0 && instr->Bit(6) == 1 && instr->Bit(4) == 1) {
4818 // vmul.f32 Qd, Qn, Qm 4894 // vmul.f32 Qd, Qn, Qm
4819 float src1[4], src2[4]; 4895 float src1[4], src2[4];
4820 get_q_register(Vn, src1); 4896 get_q_register(Vn, src1);
4821 get_q_register(Vm, src2); 4897 get_q_register(Vm, src2);
4822 for (int i = 0; i < 4; i++) { 4898 for (int i = 0; i < 4; i++) {
4823 src1[i] = src1[i] * src2[i]; 4899 src1[i] = src1[i] * src2[i];
4824 } 4900 }
4825 set_q_register(Vd, src1); 4901 set_q_register(Vd, src1);
(...skipping 1157 matching lines...) Expand 10 before | Expand all | Expand 10 after
5983 processor->prev_ = nullptr; 6059 processor->prev_ = nullptr;
5984 processor->next_ = nullptr; 6060 processor->next_ = nullptr;
5985 } 6061 }
5986 6062
5987 } // namespace internal 6063 } // namespace internal
5988 } // namespace v8 6064 } // namespace v8
5989 6065
5990 #endif // USE_SIMULATOR 6066 #endif // USE_SIMULATOR
5991 6067
5992 #endif // V8_TARGET_ARCH_ARM 6068 #endif // V8_TARGET_ARCH_ARM
OLDNEW
« no previous file with comments | « src/arm/simulator-arm.h ('k') | src/compiler/arm/code-generator-arm.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698