Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(381)

Side by Side Diff: src/arm/simulator-arm.cc

Issue 2804883008: [WASM SIMD] Implement horizontal add for float and integer types. (Closed)
Patch Set: Fix MIPS. Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include <stdarg.h> 5 #include <stdarg.h>
6 #include <stdlib.h> 6 #include <stdlib.h>
7 #include <cmath> 7 #include <cmath>
8 8
9 #if V8_TARGET_ARCH_ARM 9 #if V8_TARGET_ARCH_ARM
10 10
(...skipping 4255 matching lines...) Expand 10 before | Expand all | Expand 10 after
4266 T dst[kElems], src1[kElems], src2[kElems]; 4266 T dst[kElems], src1[kElems], src2[kElems];
4267 simulator->get_neon_register<T, kDoubleSize>(Vn, src1); 4267 simulator->get_neon_register<T, kDoubleSize>(Vn, src1);
4268 simulator->get_neon_register<T, kDoubleSize>(Vm, src2); 4268 simulator->get_neon_register<T, kDoubleSize>(Vm, src2);
4269 for (int i = 0; i < kPairs; i++) { 4269 for (int i = 0; i < kPairs; i++) {
4270 dst[i] = MinMax(src1[i * 2], src1[i * 2 + 1], min); 4270 dst[i] = MinMax(src1[i * 2], src1[i * 2 + 1], min);
4271 dst[i + kPairs] = MinMax(src2[i * 2], src2[i * 2 + 1], min); 4271 dst[i + kPairs] = MinMax(src2[i * 2], src2[i * 2 + 1], min);
4272 } 4272 }
4273 simulator->set_neon_register<T, kDoubleSize>(Vd, dst); 4273 simulator->set_neon_register<T, kDoubleSize>(Vd, dst);
4274 } 4274 }
4275 4275
4276 template <typename T>
4277 void PairwiseAdd(Simulator* simulator, int Vd, int Vm, int Vn) {
4278 static const int kElems = kDoubleSize / sizeof(T);
4279 static const int kPairs = kElems / 2;
4280 T dst[kElems], src1[kElems], src2[kElems];
4281 simulator->get_neon_register<T, kDoubleSize>(Vn, src1);
4282 simulator->get_neon_register<T, kDoubleSize>(Vm, src2);
4283 for (int i = 0; i < kPairs; i++) {
4284 dst[i] = src1[i * 2] + src1[i * 2 + 1];
4285 dst[i + kPairs] = src2[i * 2] + src2[i * 2 + 1];
4286 }
4287 simulator->set_neon_register<T, kDoubleSize>(Vd, dst);
4288 }
4289
4276 void Simulator::DecodeSpecialCondition(Instruction* instr) { 4290 void Simulator::DecodeSpecialCondition(Instruction* instr) {
4277 switch (instr->SpecialValue()) { 4291 switch (instr->SpecialValue()) {
4278 case 4: { 4292 case 4: {
4279 int Vd, Vm, Vn; 4293 int Vd, Vm, Vn;
4280 if (instr->Bit(6) == 0) { 4294 if (instr->Bit(6) == 0) {
4281 Vd = instr->VFPDRegValue(kDoublePrecision); 4295 Vd = instr->VFPDRegValue(kDoublePrecision);
4282 Vm = instr->VFPMRegValue(kDoublePrecision); 4296 Vm = instr->VFPMRegValue(kDoublePrecision);
4283 Vn = instr->VFPNRegValue(kDoublePrecision); 4297 Vn = instr->VFPNRegValue(kDoublePrecision);
4284 } else { 4298 } else {
4285 Vd = instr->VFPDRegValue(kSimd128Precision); 4299 Vd = instr->VFPDRegValue(kSimd128Precision);
(...skipping 191 matching lines...) Expand 10 before | Expand all | Expand 10 after
4477 break; 4491 break;
4478 case Neon32: 4492 case Neon32:
4479 PairwiseMinMax<int32_t>(this, Vd, Vm, Vn, min); 4493 PairwiseMinMax<int32_t>(this, Vd, Vm, Vn, min);
4480 break; 4494 break;
4481 default: 4495 default:
4482 UNREACHABLE(); 4496 UNREACHABLE();
4483 break; 4497 break;
4484 } 4498 }
4485 break; 4499 break;
4486 } 4500 }
4501 case 0xb: {
4502 // vpadd.i<size> Dd, Dm, Dn.
4503 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
4504 switch (size) {
4505 case Neon8:
4506 PairwiseAdd<int8_t>(this, Vd, Vm, Vn);
4507 break;
4508 case Neon16:
4509 PairwiseAdd<int16_t>(this, Vd, Vm, Vn);
4510 break;
4511 case Neon32:
4512 PairwiseAdd<int32_t>(this, Vd, Vm, Vn);
4513 break;
4514 default:
4515 UNREACHABLE();
4516 break;
4517 }
4518 break;
4519 }
4487 case 0xd: { 4520 case 0xd: {
4488 if (instr->Bit(4) == 0) { 4521 if (instr->Bit(4) == 0) {
4489 float src1[4], src2[4]; 4522 float src1[4], src2[4];
4490 get_neon_register(Vn, src1); 4523 get_neon_register(Vn, src1);
4491 get_neon_register(Vm, src2); 4524 get_neon_register(Vm, src2);
4492 for (int i = 0; i < 4; i++) { 4525 for (int i = 0; i < 4; i++) {
4493 if (instr->Bit(21) == 0) { 4526 if (instr->Bit(21) == 0) {
4494 // vadd.f32 Qd, Qm, Qn. 4527 // vadd.f32 Qd, Qm, Qn.
4495 src1[i] = src1[i] + src2[i]; 4528 src1[i] = src1[i] + src2[i];
4496 } else { 4529 } else {
(...skipping 328 matching lines...) Expand 10 before | Expand all | Expand 10 after
4825 case Neon32: 4858 case Neon32:
4826 PairwiseMinMax<uint32_t>(this, Vd, Vm, Vn, min); 4859 PairwiseMinMax<uint32_t>(this, Vd, Vm, Vn, min);
4827 break; 4860 break;
4828 default: 4861 default:
4829 UNREACHABLE(); 4862 UNREACHABLE();
4830 break; 4863 break;
4831 } 4864 }
4832 break; 4865 break;
4833 } 4866 }
4834 case 0xd: { 4867 case 0xd: {
4835 if (instr->Bit(21) == 0 && instr->Bit(6) == 1 && instr->Bit(4) == 1) { 4868 if (instr->Bits(21, 20) == 0 && instr->Bit(6) == 1 &&
4869 instr->Bit(4) == 1) {
4836 // vmul.f32 Qd, Qn, Qm 4870 // vmul.f32 Qd, Qn, Qm
4837 float src1[4], src2[4]; 4871 float src1[4], src2[4];
4838 get_neon_register(Vn, src1); 4872 get_neon_register(Vn, src1);
4839 get_neon_register(Vm, src2); 4873 get_neon_register(Vm, src2);
4840 for (int i = 0; i < 4; i++) { 4874 for (int i = 0; i < 4; i++) {
4841 src1[i] = src1[i] * src2[i]; 4875 src1[i] = src1[i] * src2[i];
4842 } 4876 }
4843 set_neon_register(Vd, src1); 4877 set_neon_register(Vd, src1);
4878 } else if (instr->Bits(21, 20) == 0 && instr->Bit(6) == 0 &&
4879 instr->Bit(4) == 0) {
4880 // vpadd.f32 Dd, Dn, Dm
4881 PairwiseAdd<float>(this, Vd, Vm, Vn);
4844 } else { 4882 } else {
4845 UNIMPLEMENTED(); 4883 UNIMPLEMENTED();
4846 } 4884 }
4847 break; 4885 break;
4848 } 4886 }
4849 case 0xe: { 4887 case 0xe: {
4850 if (instr->Bit(20) == 0 && instr->Bit(4) == 0) { 4888 if (instr->Bit(20) == 0 && instr->Bit(4) == 0) {
4851 // vcge/vcgt.f32 Qd, Qm, Qn 4889 // vcge/vcgt.f32 Qd, Qm, Qn
4852 bool ge = instr->Bit(21) == 0; 4890 bool ge = instr->Bit(21) == 0;
4853 float src1[4], src2[4]; 4891 float src1[4], src2[4];
(...skipping 1190 matching lines...) Expand 10 before | Expand all | Expand 10 after
6044 processor->prev_ = nullptr; 6082 processor->prev_ = nullptr;
6045 processor->next_ = nullptr; 6083 processor->next_ = nullptr;
6046 } 6084 }
6047 6085
6048 } // namespace internal 6086 } // namespace internal
6049 } // namespace v8 6087 } // namespace v8
6050 6088
6051 #endif // USE_SIMULATOR 6089 #endif // USE_SIMULATOR
6052 6090
6053 #endif // V8_TARGET_ARCH_ARM 6091 #endif // V8_TARGET_ARCH_ARM
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698