Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(120)

Side by Side Diff: src/arm/simulator-arm.cc

Issue 2868603002: [ARM] Improve VFP register moves. (Closed)
Patch Set: Rebase. Created 3 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/arm/macro-assembler-arm.cc ('k') | src/compiler/arm/code-generator-arm.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include <stdarg.h> 5 #include <stdarg.h>
6 #include <stdlib.h> 6 #include <stdlib.h>
7 #include <cmath> 7 #include <cmath>
8 8
9 #if V8_TARGET_ARCH_ARM 9 #if V8_TARGET_ARCH_ARM
10 10
(...skipping 4205 matching lines...) Expand 10 before | Expand all | Expand 10 after
4216 static const int kElems = SIZE / sizeof(T); 4216 static const int kElems = SIZE / sizeof(T);
4217 T src[kElems]; 4217 T src[kElems];
4218 simulator->get_neon_register<T, SIZE>(Vm, src); 4218 simulator->get_neon_register<T, SIZE>(Vm, src);
4219 for (int i = 0; i < kElems; i++) { 4219 for (int i = 0; i < kElems; i++) {
4220 src[i] = ArithmeticShiftRight(src[i], shift); 4220 src[i] = ArithmeticShiftRight(src[i], shift);
4221 } 4221 }
4222 simulator->set_neon_register<T, SIZE>(Vd, src); 4222 simulator->set_neon_register<T, SIZE>(Vd, src);
4223 } 4223 }
4224 4224
4225 template <typename T, int SIZE> 4225 template <typename T, int SIZE>
4226 void ShiftLeftAndInsert(Simulator* simulator, int Vd, int Vm, int shift) {
4227 static const int kElems = SIZE / sizeof(T);
4228 T src[kElems];
4229 T dst[kElems];
4230 simulator->get_neon_register<T, SIZE>(Vm, src);
4231 simulator->get_neon_register<T, SIZE>(Vd, dst);
4232 uint64_t mask = (1llu << shift) - 1llu;
4233 for (int i = 0; i < kElems; i++) {
4234 dst[i] = (src[i] << shift) | (dst[i] & mask);
4235 }
4236 simulator->set_neon_register<T, SIZE>(Vd, dst);
4237 }
4238
4239 template <typename T, int SIZE>
4240 void ShiftRightAndInsert(Simulator* simulator, int Vd, int Vm, int shift) {
4241 static const int kElems = SIZE / sizeof(T);
4242 T src[kElems];
4243 T dst[kElems];
4244 simulator->get_neon_register<T, SIZE>(Vm, src);
4245 simulator->get_neon_register<T, SIZE>(Vd, dst);
4246 uint64_t mask = ~((1llu << (kBitsPerByte * SIZE - shift)) - 1llu);
4247 for (int i = 0; i < kElems; i++) {
4248 dst[i] = (src[i] >> shift) | (dst[i] & mask);
4249 }
4250 simulator->set_neon_register<T, SIZE>(Vd, dst);
4251 }
4252
4253 template <typename T, int SIZE>
4226 void CompareEqual(Simulator* simulator, int Vd, int Vm, int Vn) { 4254 void CompareEqual(Simulator* simulator, int Vd, int Vm, int Vn) {
4227 static const int kElems = SIZE / sizeof(T); 4255 static const int kElems = SIZE / sizeof(T);
4228 T src1[kElems], src2[kElems]; 4256 T src1[kElems], src2[kElems];
4229 simulator->get_neon_register<T, SIZE>(Vn, src1); 4257 simulator->get_neon_register<T, SIZE>(Vn, src1);
4230 simulator->get_neon_register<T, SIZE>(Vm, src2); 4258 simulator->get_neon_register<T, SIZE>(Vm, src2);
4231 for (int i = 0; i < kElems; i++) { 4259 for (int i = 0; i < kElems; i++) {
4232 src1[i] = src1[i] == src2[i] ? -1 : 0; 4260 src1[i] = src1[i] == src2[i] ? -1 : 0;
4233 } 4261 }
4234 simulator->set_neon_register<T, SIZE>(Vd, src1); 4262 simulator->set_neon_register<T, SIZE>(Vd, src1);
4235 } 4263 }
(...skipping 752 matching lines...) Expand 10 before | Expand all | Expand 10 after
4988 // vswp Qd, Qm. 5016 // vswp Qd, Qm.
4989 uint32_t dval[4], mval[4]; 5017 uint32_t dval[4], mval[4];
4990 int vd = instr->VFPDRegValue(kSimd128Precision); 5018 int vd = instr->VFPDRegValue(kSimd128Precision);
4991 int vm = instr->VFPMRegValue(kSimd128Precision); 5019 int vm = instr->VFPMRegValue(kSimd128Precision);
4992 get_neon_register(vd, dval); 5020 get_neon_register(vd, dval);
4993 get_neon_register(vm, mval); 5021 get_neon_register(vm, mval);
4994 set_neon_register(vm, dval); 5022 set_neon_register(vm, dval);
4995 set_neon_register(vd, mval); 5023 set_neon_register(vd, mval);
4996 } 5024 }
4997 } else if (instr->Bits(11, 7) == 0x18) { 5025 } else if (instr->Bits(11, 7) == 0x18) {
4998 // vdup.32 Qd, Sm. 5026 // vdup.<size> Dd, Dm[index].
4999 int vd = instr->VFPDRegValue(kSimd128Precision); 5027 // vdup.<size> Qd, Dm[index].
5000 int vm = instr->VFPMRegValue(kDoublePrecision); 5028 int vm = instr->VFPMRegValue(kDoublePrecision);
5001 int index = instr->Bit(19); 5029 int imm4 = instr->Bits(19, 16);
5002 uint32_t s_data = get_s_register(vm * 2 + index); 5030 int size = 0, index = 0, mask = 0;
5003 uint32_t q_data[4]; 5031 if ((imm4 & 0x1) != 0) {
5004 for (int i = 0; i < 4; i++) q_data[i] = s_data; 5032 size = 8;
5005 set_neon_register(vd, q_data); 5033 index = imm4 >> 1;
5034 mask = 0xffu;
5035 } else if ((imm4 & 0x2) != 0) {
5036 size = 16;
5037 index = imm4 >> 2;
5038 mask = 0xffffu;
5039 } else {
5040 size = 32;
5041 index = imm4 >> 3;
5042 mask = 0xffffffffu;
5043 }
5044 uint64_t d_data;
5045 get_d_register(vm, &d_data);
5046 uint32_t scalar = (d_data >> (size * index)) & mask;
5047 uint32_t duped = scalar;
5048 for (int i = 1; i < 32 / size; i++) {
5049 scalar <<= size;
5050 duped |= scalar;
5051 }
5052 uint32_t result[4] = {duped, duped, duped, duped};
5053 if (instr->Bit(6) == 0) {
5054 int vd = instr->VFPDRegValue(kDoublePrecision);
5055 set_d_register(vd, result);
5056 } else {
5057 int vd = instr->VFPDRegValue(kSimd128Precision);
5058 set_neon_register(vd, result);
5059 }
5006 } else if (instr->Bits(19, 16) == 0 && instr->Bits(11, 6) == 0x17) { 5060 } else if (instr->Bits(19, 16) == 0 && instr->Bits(11, 6) == 0x17) {
5007 // vmvn Qd, Qm. 5061 // vmvn Qd, Qm.
5008 int vd = instr->VFPDRegValue(kSimd128Precision); 5062 int vd = instr->VFPDRegValue(kSimd128Precision);
5009 int vm = instr->VFPMRegValue(kSimd128Precision); 5063 int vm = instr->VFPMRegValue(kSimd128Precision);
5010 uint32_t q_data[4]; 5064 uint32_t q_data[4];
5011 get_neon_register(vm, q_data); 5065 get_neon_register(vm, q_data);
5012 for (int i = 0; i < 4; i++) q_data[i] = ~q_data[i]; 5066 for (int i = 0; i < 4; i++) q_data[i] = ~q_data[i];
5013 set_neon_register(vd, q_data); 5067 set_neon_register(vd, q_data);
5014 } else if (instr->Bits(11, 10) == 0x2) { 5068 } else if (instr->Bits(11, 10) == 0x2) {
5015 // vtb[l,x] Dd, <list>, Dm. 5069 // vtb[l,x] Dd, <list>, Dm.
(...skipping 356 matching lines...) Expand 10 before | Expand all | Expand 10 after
5372 case Neon16: 5426 case Neon16:
5373 ShiftRight<uint16_t, kSimd128Size>(this, Vd, Vm, shift); 5427 ShiftRight<uint16_t, kSimd128Size>(this, Vd, Vm, shift);
5374 break; 5428 break;
5375 case Neon32: 5429 case Neon32:
5376 ShiftRight<uint32_t, kSimd128Size>(this, Vd, Vm, shift); 5430 ShiftRight<uint32_t, kSimd128Size>(this, Vd, Vm, shift);
5377 break; 5431 break;
5378 default: 5432 default:
5379 UNREACHABLE(); 5433 UNREACHABLE();
5380 break; 5434 break;
5381 } 5435 }
5436 } else if (instr->Bits(11, 8) == 0x5 && instr->Bit(6) == 0 &&
5437 instr->Bit(4) == 1) {
5438 // vsli.<size> Dd, Dm, shift
5439 int imm7 = instr->Bits(21, 16);
5440 if (instr->Bit(7) != 0) imm7 += 64;
5441 int size = base::bits::RoundDownToPowerOfTwo32(imm7);
5442 int shift = imm7 - size;
5443 int Vd = instr->VFPDRegValue(kDoublePrecision);
5444 int Vm = instr->VFPMRegValue(kDoublePrecision);
5445 switch (size) {
5446 case 8:
5447 ShiftLeftAndInsert<uint8_t, kDoubleSize>(this, Vd, Vm, shift);
5448 break;
5449 case 16:
5450 ShiftLeftAndInsert<uint16_t, kDoubleSize>(this, Vd, Vm, shift);
5451 break;
5452 case 32:
5453 ShiftLeftAndInsert<uint32_t, kDoubleSize>(this, Vd, Vm, shift);
5454 break;
5455 case 64:
5456 ShiftLeftAndInsert<uint64_t, kDoubleSize>(this, Vd, Vm, shift);
5457 break;
5458 default:
5459 UNREACHABLE();
5460 break;
5461 }
5462 } else if (instr->Bits(11, 8) == 0x4 && instr->Bit(6) == 0 &&
5463 instr->Bit(4) == 1) {
5464 // vsri.<size> Dd, Dm, shift
5465 int imm7 = instr->Bits(21, 16);
5466 if (instr->Bit(7) != 0) imm7 += 64;
5467 int size = base::bits::RoundDownToPowerOfTwo32(imm7);
5468 int shift = 2 * size - imm7;
5469 int Vd = instr->VFPDRegValue(kDoublePrecision);
5470 int Vm = instr->VFPMRegValue(kDoublePrecision);
5471 switch (size) {
5472 case 8:
5473 ShiftRightAndInsert<uint8_t, kDoubleSize>(this, Vd, Vm, shift);
5474 break;
5475 case 16:
5476 ShiftRightAndInsert<uint16_t, kDoubleSize>(this, Vd, Vm, shift);
5477 break;
5478 case 32:
5479 ShiftRightAndInsert<uint32_t, kDoubleSize>(this, Vd, Vm, shift);
5480 break;
5481 case 64:
5482 ShiftRightAndInsert<uint64_t, kDoubleSize>(this, Vd, Vm, shift);
5483 break;
5484 default:
5485 UNREACHABLE();
5486 break;
5487 }
5382 } else { 5488 } else {
5383 UNIMPLEMENTED(); 5489 UNIMPLEMENTED();
5384 } 5490 }
5385 break; 5491 break;
5386 case 8: 5492 case 8:
5387 if (instr->Bits(21, 20) == 0) { 5493 if (instr->Bits(21, 20) == 0) {
5388 // vst1 5494 // vst1
5389 int Vd = (instr->Bit(22) << 4) | instr->VdValue(); 5495 int Vd = (instr->Bit(22) << 4) | instr->VdValue();
5390 int Rn = instr->VnValue(); 5496 int Rn = instr->VnValue();
5391 int type = instr->Bits(11, 8); 5497 int type = instr->Bits(11, 8);
(...skipping 697 matching lines...) Expand 10 before | Expand all | Expand 10 after
6089 processor->prev_ = nullptr; 6195 processor->prev_ = nullptr;
6090 processor->next_ = nullptr; 6196 processor->next_ = nullptr;
6091 } 6197 }
6092 6198
6093 } // namespace internal 6199 } // namespace internal
6094 } // namespace v8 6200 } // namespace v8
6095 6201
6096 #endif // USE_SIMULATOR 6202 #endif // USE_SIMULATOR
6097 6203
6098 #endif // V8_TARGET_ARCH_ARM 6204 #endif // V8_TARGET_ARCH_ARM
OLDNEW
« no previous file with comments | « src/arm/macro-assembler-arm.cc ('k') | src/compiler/arm/code-generator-arm.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698