Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1598)

Side by Side Diff: src/arm/simulator-arm.cc

Issue 2804883008: [WASM SIMD] Implement horizontal add for float and integer types. (Closed)
Patch Set: Rebase, reformat. Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/arm/disasm-arm.cc ('k') | src/compiler/arm/code-generator-arm.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include <stdarg.h> 5 #include <stdarg.h>
6 #include <stdlib.h> 6 #include <stdlib.h>
7 #include <cmath> 7 #include <cmath>
8 8
9 #if V8_TARGET_ARCH_ARM 9 #if V8_TARGET_ARCH_ARM
10 10
(...skipping 4260 matching lines...) Expand 10 before | Expand all | Expand 10 after
4271 T dst[kElems], src1[kElems], src2[kElems]; 4271 T dst[kElems], src1[kElems], src2[kElems];
4272 simulator->get_neon_register<T, kDoubleSize>(Vn, src1); 4272 simulator->get_neon_register<T, kDoubleSize>(Vn, src1);
4273 simulator->get_neon_register<T, kDoubleSize>(Vm, src2); 4273 simulator->get_neon_register<T, kDoubleSize>(Vm, src2);
4274 for (int i = 0; i < kPairs; i++) { 4274 for (int i = 0; i < kPairs; i++) {
4275 dst[i] = MinMax(src1[i * 2], src1[i * 2 + 1], min); 4275 dst[i] = MinMax(src1[i * 2], src1[i * 2 + 1], min);
4276 dst[i + kPairs] = MinMax(src2[i * 2], src2[i * 2 + 1], min); 4276 dst[i + kPairs] = MinMax(src2[i * 2], src2[i * 2 + 1], min);
4277 } 4277 }
4278 simulator->set_neon_register<T, kDoubleSize>(Vd, dst); 4278 simulator->set_neon_register<T, kDoubleSize>(Vd, dst);
4279 } 4279 }
4280 4280
4281 template <typename T>
4282 void PairwiseAdd(Simulator* simulator, int Vd, int Vm, int Vn) {
4283 static const int kElems = kDoubleSize / sizeof(T);
4284 static const int kPairs = kElems / 2;
4285 T dst[kElems], src1[kElems], src2[kElems];
4286 simulator->get_neon_register<T, kDoubleSize>(Vn, src1);
4287 simulator->get_neon_register<T, kDoubleSize>(Vm, src2);
4288 for (int i = 0; i < kPairs; i++) {
4289 dst[i] = src1[i * 2] + src1[i * 2 + 1];
4290 dst[i + kPairs] = src2[i * 2] + src2[i * 2 + 1];
4291 }
4292 simulator->set_neon_register<T, kDoubleSize>(Vd, dst);
4293 }
4294
4281 void Simulator::DecodeSpecialCondition(Instruction* instr) { 4295 void Simulator::DecodeSpecialCondition(Instruction* instr) {
4282 switch (instr->SpecialValue()) { 4296 switch (instr->SpecialValue()) {
4283 case 4: { 4297 case 4: {
4284 int Vd, Vm, Vn; 4298 int Vd, Vm, Vn;
4285 if (instr->Bit(6) == 0) { 4299 if (instr->Bit(6) == 0) {
4286 Vd = instr->VFPDRegValue(kDoublePrecision); 4300 Vd = instr->VFPDRegValue(kDoublePrecision);
4287 Vm = instr->VFPMRegValue(kDoublePrecision); 4301 Vm = instr->VFPMRegValue(kDoublePrecision);
4288 Vn = instr->VFPNRegValue(kDoublePrecision); 4302 Vn = instr->VFPNRegValue(kDoublePrecision);
4289 } else { 4303 } else {
4290 Vd = instr->VFPDRegValue(kSimd128Precision); 4304 Vd = instr->VFPDRegValue(kSimd128Precision);
(...skipping 191 matching lines...) Expand 10 before | Expand all | Expand 10 after
4482 break; 4496 break;
4483 case Neon32: 4497 case Neon32:
4484 PairwiseMinMax<int32_t>(this, Vd, Vm, Vn, min); 4498 PairwiseMinMax<int32_t>(this, Vd, Vm, Vn, min);
4485 break; 4499 break;
4486 default: 4500 default:
4487 UNREACHABLE(); 4501 UNREACHABLE();
4488 break; 4502 break;
4489 } 4503 }
4490 break; 4504 break;
4491 } 4505 }
4506 case 0xb: {
4507 // vpadd.i<size> Dd, Dm, Dn.
4508 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
4509 switch (size) {
4510 case Neon8:
4511 PairwiseAdd<int8_t>(this, Vd, Vm, Vn);
4512 break;
4513 case Neon16:
4514 PairwiseAdd<int16_t>(this, Vd, Vm, Vn);
4515 break;
4516 case Neon32:
4517 PairwiseAdd<int32_t>(this, Vd, Vm, Vn);
4518 break;
4519 default:
4520 UNREACHABLE();
4521 break;
4522 }
4523 break;
4524 }
4492 case 0xd: { 4525 case 0xd: {
4493 if (instr->Bit(4) == 0) { 4526 if (instr->Bit(4) == 0) {
4494 float src1[4], src2[4]; 4527 float src1[4], src2[4];
4495 get_neon_register(Vn, src1); 4528 get_neon_register(Vn, src1);
4496 get_neon_register(Vm, src2); 4529 get_neon_register(Vm, src2);
4497 for (int i = 0; i < 4; i++) { 4530 for (int i = 0; i < 4; i++) {
4498 if (instr->Bit(21) == 0) { 4531 if (instr->Bit(21) == 0) {
4499 // vadd.f32 Qd, Qm, Qn. 4532 // vadd.f32 Qd, Qm, Qn.
4500 src1[i] = src1[i] + src2[i]; 4533 src1[i] = src1[i] + src2[i];
4501 } else { 4534 } else {
(...skipping 328 matching lines...) Expand 10 before | Expand all | Expand 10 after
4830 case Neon32: 4863 case Neon32:
4831 PairwiseMinMax<uint32_t>(this, Vd, Vm, Vn, min); 4864 PairwiseMinMax<uint32_t>(this, Vd, Vm, Vn, min);
4832 break; 4865 break;
4833 default: 4866 default:
4834 UNREACHABLE(); 4867 UNREACHABLE();
4835 break; 4868 break;
4836 } 4869 }
4837 break; 4870 break;
4838 } 4871 }
4839 case 0xd: { 4872 case 0xd: {
4840 if (instr->Bit(21) == 0 && instr->Bit(6) == 1 && instr->Bit(4) == 1) { 4873 if (instr->Bits(21, 20) == 0 && instr->Bit(6) == 1 &&
4874 instr->Bit(4) == 1) {
4841 // vmul.f32 Qd, Qn, Qm 4875 // vmul.f32 Qd, Qn, Qm
4842 float src1[4], src2[4]; 4876 float src1[4], src2[4];
4843 get_neon_register(Vn, src1); 4877 get_neon_register(Vn, src1);
4844 get_neon_register(Vm, src2); 4878 get_neon_register(Vm, src2);
4845 for (int i = 0; i < 4; i++) { 4879 for (int i = 0; i < 4; i++) {
4846 src1[i] = src1[i] * src2[i]; 4880 src1[i] = src1[i] * src2[i];
4847 } 4881 }
4848 set_neon_register(Vd, src1); 4882 set_neon_register(Vd, src1);
4883 } else if (instr->Bits(21, 20) == 0 && instr->Bit(6) == 0 &&
4884 instr->Bit(4) == 0) {
4885 // vpadd.f32 Dd, Dn, Dm
4886 PairwiseAdd<float>(this, Vd, Vm, Vn);
4849 } else { 4887 } else {
4850 UNIMPLEMENTED(); 4888 UNIMPLEMENTED();
4851 } 4889 }
4852 break; 4890 break;
4853 } 4891 }
4854 case 0xe: { 4892 case 0xe: {
4855 if (instr->Bit(20) == 0 && instr->Bit(4) == 0) { 4893 if (instr->Bit(20) == 0 && instr->Bit(4) == 0) {
4856 // vcge/vcgt.f32 Qd, Qm, Qn 4894 // vcge/vcgt.f32 Qd, Qm, Qn
4857 bool ge = instr->Bit(21) == 0; 4895 bool ge = instr->Bit(21) == 0;
4858 float src1[4], src2[4]; 4896 float src1[4], src2[4];
(...skipping 1190 matching lines...) Expand 10 before | Expand all | Expand 10 after
6049 processor->prev_ = nullptr; 6087 processor->prev_ = nullptr;
6050 processor->next_ = nullptr; 6088 processor->next_ = nullptr;
6051 } 6089 }
6052 6090
6053 } // namespace internal 6091 } // namespace internal
6054 } // namespace v8 6092 } // namespace v8
6055 6093
6056 #endif // USE_SIMULATOR 6094 #endif // USE_SIMULATOR
6057 6095
6058 #endif // V8_TARGET_ARCH_ARM 6096 #endif // V8_TARGET_ARCH_ARM
OLDNEW
« no previous file with comments | « src/arm/disasm-arm.cc ('k') | src/compiler/arm/code-generator-arm.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698