Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(69)

Side by Side Diff: src/arm/simulator-arm.cc

Issue 2739033002: [ARM] Implement more NEON permutation instructions. (Closed)
Patch Set: Martyn's review comments. Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/arm/disasm-arm.cc ('k') | test/cctest/test-assembler-arm.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include <stdarg.h> 5 #include <stdarg.h>
6 #include <stdlib.h> 6 #include <stdlib.h>
7 #include <cmath> 7 #include <cmath>
8 8
9 #if V8_TARGET_ARCH_ARM 9 #if V8_TARGET_ARCH_ARM
10 10
(...skipping 5028 matching lines...) Expand 10 before | Expand all | Expand 10 after
5039 uint64_t table; 5039 uint64_t table;
5040 get_d_register(vn + index / kDoubleSize, &table); 5040 get_d_register(vn + index / kDoubleSize, &table);
5041 result |= 5041 result |=
5042 ((table >> ((index % kDoubleSize) * kBitsPerByte)) & 0xFF) 5042 ((table >> ((index % kDoubleSize) * kBitsPerByte)) & 0xFF)
5043 << shift; 5043 << shift;
5044 } else if (vtbx) { 5044 } else if (vtbx) {
5045 result |= destination & (0xFFull << shift); 5045 result |= destination & (0xFFull << shift);
5046 } 5046 }
5047 } 5047 }
5048 set_d_register(vd, &result); 5048 set_d_register(vd, &result);
5049 } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 6) == 0x7) { 5049 } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 8) == 0x1 &&
5050 // vzip.<size> Qd, Qm. 5050 instr->Bit(6) == 1) {
5051 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); 5051 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18));
5052 int Vd = instr->VFPDRegValue(kSimd128Precision); 5052 int Vd = instr->VFPDRegValue(kSimd128Precision);
5053 int Vm = instr->VFPMRegValue(kSimd128Precision); 5053 int Vm = instr->VFPMRegValue(kSimd128Precision);
5054 switch (size) { 5054 if (instr->Bit(7) == 1) {
5055 case Neon8: { 5055 // vzip.<size> Qd, Qm.
5056 uint8_t src1[16], src2[16], dst1[16], dst2[16]; 5056 switch (size) {
5057 get_q_register(Vd, src1); 5057 case Neon8: {
5058 get_q_register(Vm, src2); 5058 uint8_t src1[16], src2[16], dst1[16], dst2[16];
5059 for (int i = 0; i < 8; i++) { 5059 get_q_register(Vd, src1);
5060 dst1[i * 2] = src1[i]; 5060 get_q_register(Vm, src2);
5061 dst1[i * 2 + 1] = src2[i]; 5061 for (int i = 0; i < 8; i++) {
5062 dst2[i * 2] = src1[i + 8]; 5062 dst1[i * 2] = src1[i];
5063 dst2[i * 2 + 1] = src2[i + 8]; 5063 dst1[i * 2 + 1] = src2[i];
5064 dst2[i * 2] = src1[i + 8];
5065 dst2[i * 2 + 1] = src2[i + 8];
5066 }
5067 set_q_register(Vd, dst1);
5068 set_q_register(Vm, dst2);
5069 break;
5064 } 5070 }
5065 set_q_register(Vd, dst1); 5071 case Neon16: {
5066 set_q_register(Vm, dst2); 5072 uint16_t src1[8], src2[8], dst1[8], dst2[8];
5067 break; 5073 get_q_register(Vd, src1);
5074 get_q_register(Vm, src2);
5075 for (int i = 0; i < 4; i++) {
5076 dst1[i * 2] = src1[i];
5077 dst1[i * 2 + 1] = src2[i];
5078 dst2[i * 2] = src1[i + 4];
5079 dst2[i * 2 + 1] = src2[i + 4];
5080 }
5081 set_q_register(Vd, dst1);
5082 set_q_register(Vm, dst2);
5083 break;
5084 }
5085 case Neon32: {
5086 uint32_t src1[4], src2[4], dst1[4], dst2[4];
5087 get_q_register(Vd, src1);
5088 get_q_register(Vm, src2);
5089 for (int i = 0; i < 2; i++) {
5090 dst1[i * 2] = src1[i];
5091 dst1[i * 2 + 1] = src2[i];
5092 dst2[i * 2] = src1[i + 2];
5093 dst2[i * 2 + 1] = src2[i + 2];
5094 }
5095 set_q_register(Vd, dst1);
5096 set_q_register(Vm, dst2);
5097 break;
5098 }
5099 default:
5100 UNREACHABLE();
5101 break;
5068 } 5102 }
5069 case Neon16: { 5103 } else {
5070 uint16_t src1[8], src2[8], dst1[8], dst2[8]; 5104 // vuzp.<size> Qd, Qm.
5071 get_q_register(Vd, src1); 5105 switch (size) {
5072 get_q_register(Vm, src2); 5106 case Neon8: {
5073 for (int i = 0; i < 8; i += 2) { 5107 uint8_t src1[16], src2[16], dst1[16], dst2[16];
5074 dst1[i] = src1[i / 2]; 5108 get_q_register(Vd, src1);
5075 dst1[i + 1] = src2[i / 2]; 5109 get_q_register(Vm, src2);
5076 dst2[i] = src1[i / 2 + 4]; 5110 for (int i = 0; i < 8; i++) {
5077 dst2[i + 1] = src2[i / 2 + 4]; 5111 dst1[i] = src1[i * 2];
5112 dst1[i + 8] = src2[i * 2];
5113 dst2[i] = src1[i * 2 + 1];
5114 dst2[i + 8] = src2[i * 2 + 1];
5115 }
5116 set_q_register(Vd, dst1);
5117 set_q_register(Vm, dst2);
5118 break;
5078 } 5119 }
5079 set_q_register(Vd, dst1); 5120 case Neon16: {
5080 set_q_register(Vm, dst2); 5121 uint16_t src1[8], src2[8], dst1[8], dst2[8];
5081 break; 5122 get_q_register(Vd, src1);
5123 get_q_register(Vm, src2);
5124 for (int i = 0; i < 4; i++) {
5125 dst1[i] = src1[i * 2];
5126 dst1[i + 4] = src2[i * 2];
5127 dst2[i] = src1[i * 2 + 1];
5128 dst2[i + 4] = src2[i * 2 + 1];
5129 }
5130 set_q_register(Vd, dst1);
5131 set_q_register(Vm, dst2);
5132 break;
5133 }
5134 case Neon32: {
5135 uint32_t src1[4], src2[4], dst1[4], dst2[4];
5136 get_q_register(Vd, src1);
5137 get_q_register(Vm, src2);
5138 for (int i = 0; i < 2; i++) {
5139 dst1[i] = src1[i * 2];
5140 dst1[i + 2] = src2[i * 2];
5141 dst2[i] = src1[i * 2 + 1];
5142 dst2[i + 2] = src2[i * 2 + 1];
5143 }
5144 set_q_register(Vd, dst1);
5145 set_q_register(Vm, dst2);
5146 break;
5147 }
5148 default:
5149 UNREACHABLE();
5150 break;
5082 } 5151 }
5083 case Neon32: {
5084 uint32_t src1[4], src2[4], dst1[4], dst2[4];
5085 get_q_register(Vd, src1);
5086 get_q_register(Vm, src2);
5087 for (int i = 0; i < 2; i++) {
5088 dst1[i * 2] = src1[i];
5089 dst1[i * 2 + 1] = src2[i];
5090 dst2[i * 2] = src1[i + 2];
5091 dst2[i * 2 + 1] = src2[i + 2];
5092 }
5093 set_q_register(Vd, dst1);
5094 set_q_register(Vm, dst2);
5095 break;
5096 }
5097 default:
5098 UNREACHABLE();
5099 break;
5100 } 5152 }
5101 } else if (instr->Bits(17, 16) == 0 && instr->Bits(11, 9) == 0) { 5153 } else if (instr->Bits(17, 16) == 0 && instr->Bits(11, 9) == 0) {
5102 // vrev<op>.size Qd, Qm 5154 // vrev<op>.size Qd, Qm
5103 int Vd = instr->VFPDRegValue(kSimd128Precision); 5155 int Vd = instr->VFPDRegValue(kSimd128Precision);
5104 int Vm = instr->VFPMRegValue(kSimd128Precision); 5156 int Vm = instr->VFPMRegValue(kSimd128Precision);
5105 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); 5157 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18));
5106 NeonSize op = static_cast<NeonSize>(static_cast<int>(Neon64) - 5158 NeonSize op = static_cast<NeonSize>(static_cast<int>(Neon64) -
5107 instr->Bits(8, 7)); 5159 instr->Bits(8, 7));
5108 switch (op) { 5160 switch (op) {
5109 case Neon16: { 5161 case Neon16: {
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after
5176 default: 5228 default:
5177 UNREACHABLE(); 5229 UNREACHABLE();
5178 break; 5230 break;
5179 } 5231 }
5180 break; 5232 break;
5181 } 5233 }
5182 default: 5234 default:
5183 UNREACHABLE(); 5235 UNREACHABLE();
5184 break; 5236 break;
5185 } 5237 }
5238 } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 6) == 0x3) {
5239 int Vd = instr->VFPDRegValue(kSimd128Precision);
5240 int Vm = instr->VFPMRegValue(kSimd128Precision);
5241 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18));
5242 // vtrn.<size> Qd, Qm.
5243 switch (size) {
5244 case Neon8: {
5245 uint8_t src[16], dst[16];
5246 get_q_register(Vd, dst);
5247 get_q_register(Vm, src);
5248 for (int i = 0; i < 8; i++) {
5249 std::swap(dst[2 * i + 1], src[2 * i]);
5250 }
5251 set_q_register(Vd, dst);
5252 set_q_register(Vm, src);
5253 break;
5254 }
5255 case Neon16: {
5256 uint16_t src[8], dst[8];
5257 get_q_register(Vd, dst);
5258 get_q_register(Vm, src);
5259 for (int i = 0; i < 4; i++) {
5260 std::swap(dst[2 * i + 1], src[2 * i]);
5261 }
5262 set_q_register(Vd, dst);
5263 set_q_register(Vm, src);
5264 break;
5265 }
5266 case Neon32: {
5267 uint32_t src[4], dst[4];
5268 get_q_register(Vd, dst);
5269 get_q_register(Vm, src);
5270 for (int i = 0; i < 2; i++) {
5271 std::swap(dst[2 * i + 1], src[2 * i]);
5272 }
5273 set_q_register(Vd, dst);
5274 set_q_register(Vm, src);
5275 break;
5276 }
5277 default:
5278 UNREACHABLE();
5279 break;
5280 }
5186 } else if (instr->Bits(17, 16) == 0x1 && instr->Bit(11) == 0) { 5281 } else if (instr->Bits(17, 16) == 0x1 && instr->Bit(11) == 0) {
5187 int Vd = instr->VFPDRegValue(kSimd128Precision); 5282 int Vd = instr->VFPDRegValue(kSimd128Precision);
5188 int Vm = instr->VFPMRegValue(kSimd128Precision); 5283 int Vm = instr->VFPMRegValue(kSimd128Precision);
5189 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); 5284 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18));
5190 if (instr->Bits(9, 6) == 0xd) { 5285 if (instr->Bits(9, 6) == 0xd) {
5191 // vabs<type>.<size> Qd, Qm 5286 // vabs<type>.<size> Qd, Qm
5192 if (instr->Bit(10) != 0) { 5287 if (instr->Bit(10) != 0) {
5193 // floating point (clear sign bits) 5288 // floating point (clear sign bits)
5194 uint32_t src[4]; 5289 uint32_t src[4];
5195 get_q_register(Vm, src); 5290 get_q_register(Vm, src);
(...skipping 863 matching lines...) Expand 10 before | Expand all | Expand 10 after
6059 processor->prev_ = nullptr; 6154 processor->prev_ = nullptr;
6060 processor->next_ = nullptr; 6155 processor->next_ = nullptr;
6061 } 6156 }
6062 6157
6063 } // namespace internal 6158 } // namespace internal
6064 } // namespace v8 6159 } // namespace v8
6065 6160
6066 #endif // USE_SIMULATOR 6161 #endif // USE_SIMULATOR
6067 6162
6068 #endif // V8_TARGET_ARCH_ARM 6163 #endif // V8_TARGET_ARCH_ARM
OLDNEW
« no previous file with comments | « src/arm/disasm-arm.cc ('k') | test/cctest/test-assembler-arm.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698