Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(443)

Side by Side Diff: src/arm/simulator-arm.cc

Issue 2797923006: [ARM] Implement D-register versions of vzip, vuzp, and vtrn. (Closed)
Patch Set: Initial Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include <stdarg.h> 5 #include <stdarg.h>
6 #include <stdlib.h> 6 #include <stdlib.h>
7 #include <cmath> 7 #include <cmath>
8 8
9 #if V8_TARGET_ARCH_ARM 9 #if V8_TARGET_ARCH_ARM
10 10
(...skipping 5064 matching lines...) Expand 10 before | Expand all | Expand 10 after
5075 uint64_t table; 5075 uint64_t table;
5076 get_d_register(vn + index / kDoubleSize, &table); 5076 get_d_register(vn + index / kDoubleSize, &table);
5077 result |= 5077 result |=
5078 ((table >> ((index % kDoubleSize) * kBitsPerByte)) & 0xFF) 5078 ((table >> ((index % kDoubleSize) * kBitsPerByte)) & 0xFF)
5079 << shift; 5079 << shift;
5080 } else if (vtbx) { 5080 } else if (vtbx) {
5081 result |= destination & (0xFFull << shift); 5081 result |= destination & (0xFFull << shift);
5082 } 5082 }
5083 } 5083 }
5084 set_d_register(vd, &result); 5084 set_d_register(vd, &result);
5085 } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 8) == 0x1 && 5085 } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 8) == 0x1) {
5086 instr->Bit(6) == 1) {
5087 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); 5086 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18));
5088 int Vd = instr->VFPDRegValue(kSimd128Precision); 5087 if (instr->Bit(6) == 0) {
5089 int Vm = instr->VFPMRegValue(kSimd128Precision); 5088 int Vd = instr->VFPDRegValue(kDoublePrecision);
5090 if (instr->Bit(7) == 1) { 5089 int Vm = instr->VFPMRegValue(kDoublePrecision);
5091 // vzip.<size> Qd, Qm. 5090 if (instr->Bit(7) == 1) {
5092 switch (size) { 5091 // vzip.<size> Dd, Dm.
5093 case Neon8: { 5092 switch (size) {
5094 uint8_t src1[16], src2[16], dst1[16], dst2[16]; 5093 case Neon8: {
martyn.capewell 2017/04/06 13:09:49 It would be good to factorise the zip and uzp oper
bbudge 2017/04/06 17:49:43 Done. I rewrote the get_ and set_q_register templ
5095 get_q_register(Vd, src1); 5094 uint8_t src1[8], src2[8], dst1[8], dst2[8];
5096 get_q_register(Vm, src2); 5095 get_d_register(Vd, src1);
5097 for (int i = 0; i < 8; i++) { 5096 get_d_register(Vm, src2);
5098 dst1[i * 2] = src1[i]; 5097 for (int i = 0; i < 4; i++) {
5099 dst1[i * 2 + 1] = src2[i]; 5098 dst1[i * 2] = src1[i];
5100 dst2[i * 2] = src1[i + 8]; 5099 dst1[i * 2 + 1] = src2[i];
5101 dst2[i * 2 + 1] = src2[i + 8]; 5100 dst2[i * 2] = src1[i + 4];
5102 } 5101 dst2[i * 2 + 1] = src2[i + 4];
5103 set_q_register(Vd, dst1); 5102 }
5104 set_q_register(Vm, dst2); 5103 set_d_register(Vd, dst1);
5105 break; 5104 set_d_register(Vm, dst2);
5106 } 5105 break;
5107 case Neon16: { 5106 }
5108 uint16_t src1[8], src2[8], dst1[8], dst2[8]; 5107 case Neon16: {
5109 get_q_register(Vd, src1); 5108 uint16_t src1[4], src2[4], dst1[4], dst2[4];
5110 get_q_register(Vm, src2); 5109 get_d_register(Vd, src1);
5111 for (int i = 0; i < 4; i++) { 5110 get_d_register(Vm, src2);
5112 dst1[i * 2] = src1[i]; 5111 for (int i = 0; i < 2; i++) {
5113 dst1[i * 2 + 1] = src2[i]; 5112 dst1[i * 2] = src1[i];
5114 dst2[i * 2] = src1[i + 4]; 5113 dst1[i * 2 + 1] = src2[i];
5115 dst2[i * 2 + 1] = src2[i + 4]; 5114 dst2[i * 2] = src1[i + 2];
5116 } 5115 dst2[i * 2 + 1] = src2[i + 2];
5117 set_q_register(Vd, dst1); 5116 }
5118 set_q_register(Vm, dst2); 5117 set_d_register(Vd, dst1);
5119 break; 5118 set_d_register(Vm, dst2);
5120 } 5119 break;
5121 case Neon32: { 5120 }
5122 uint32_t src1[4], src2[4], dst1[4], dst2[4]; 5121 case Neon32: {
5123 get_q_register(Vd, src1); 5122 uint32_t src1[2], src2[2], dst1[2], dst2[2];
5124 get_q_register(Vm, src2); 5123 get_d_register(Vd, src1);
5125 for (int i = 0; i < 2; i++) { 5124 get_d_register(Vm, src2);
5126 dst1[i * 2] = src1[i]; 5125 dst1[0] = src1[0];
5127 dst1[i * 2 + 1] = src2[i]; 5126 dst1[1] = src2[0];
5128 dst2[i * 2] = src1[i + 2]; 5127 dst2[0] = src1[1];
5129 dst2[i * 2 + 1] = src2[i + 2]; 5128 dst2[1] = src2[1];
5130 } 5129 set_d_register(Vd, dst1);
5131 set_q_register(Vd, dst1); 5130 set_d_register(Vm, dst2);
5132 set_q_register(Vm, dst2); 5131 break;
5133 break; 5132 }
5134 } 5133 default:
5135 default: 5134 UNREACHABLE();
5136 UNREACHABLE(); 5135 break;
5137 break; 5136 }
5137 } else {
5138 // vuzp.<size> Dd, Dm.
5139 switch (size) {
5140 case Neon8: {
5141 uint8_t src1[8], src2[8], dst1[8], dst2[8];
5142 get_d_register(Vd, src1);
5143 get_d_register(Vm, src2);
5144 for (int i = 0; i < 4; i++) {
5145 dst1[i] = src1[i * 2];
5146 dst1[i + 4] = src2[i * 2];
5147 dst2[i] = src1[i * 2 + 1];
5148 dst2[i + 4] = src2[i * 2 + 1];
5149 }
5150 set_d_register(Vd, dst1);
5151 set_d_register(Vm, dst2);
5152 break;
5153 }
5154 case Neon16: {
5155 uint16_t src1[4], src2[4], dst1[4], dst2[4];
5156 get_d_register(Vd, src1);
5157 get_d_register(Vm, src2);
5158 for (int i = 0; i < 2; i++) {
5159 dst1[i] = src1[i * 2];
5160 dst1[i + 2] = src2[i * 2];
5161 dst2[i] = src1[i * 2 + 1];
5162 dst2[i + 2] = src2[i * 2 + 1];
5163 }
5164 set_d_register(Vd, dst1);
5165 set_d_register(Vm, dst2);
5166 break;
5167 }
5168 case Neon32: {
5169 uint32_t src1[2], src2[2], dst1[2], dst2[2];
5170 get_d_register(Vd, src1);
5171 get_d_register(Vm, src2);
5172 dst1[0] = src1[0];
5173 dst1[1] = src2[0];
5174 dst2[0] = src1[1];
5175 dst2[1] = src2[1];
5176 set_d_register(Vd, dst1);
5177 set_d_register(Vm, dst2);
5178 break;
5179 }
5180 default:
5181 UNREACHABLE();
5182 break;
5183 }
5138 } 5184 }
5139 } else { 5185 } else {
5140 // vuzp.<size> Qd, Qm. 5186 int Vd = instr->VFPDRegValue(kSimd128Precision);
5141 switch (size) { 5187 int Vm = instr->VFPMRegValue(kSimd128Precision);
5142 case Neon8: { 5188 if (instr->Bit(7) == 1) {
5143 uint8_t src1[16], src2[16], dst1[16], dst2[16]; 5189 // vzip.<size> Qd, Qm.
5144 get_q_register(Vd, src1); 5190 switch (size) {
5145 get_q_register(Vm, src2); 5191 case Neon8: {
5146 for (int i = 0; i < 8; i++) { 5192 uint8_t src1[16], src2[16], dst1[16], dst2[16];
5147 dst1[i] = src1[i * 2]; 5193 get_q_register(Vd, src1);
5148 dst1[i + 8] = src2[i * 2]; 5194 get_q_register(Vm, src2);
5149 dst2[i] = src1[i * 2 + 1]; 5195 for (int i = 0; i < 8; i++) {
5150 dst2[i + 8] = src2[i * 2 + 1]; 5196 dst1[i * 2] = src1[i];
5151 } 5197 dst1[i * 2 + 1] = src2[i];
5152 set_q_register(Vd, dst1); 5198 dst2[i * 2] = src1[i + 8];
5153 set_q_register(Vm, dst2); 5199 dst2[i * 2 + 1] = src2[i + 8];
5154 break; 5200 }
5155 } 5201 set_q_register(Vd, dst1);
5156 case Neon16: { 5202 set_q_register(Vm, dst2);
5157 uint16_t src1[8], src2[8], dst1[8], dst2[8]; 5203 break;
5158 get_q_register(Vd, src1); 5204 }
5159 get_q_register(Vm, src2); 5205 case Neon16: {
5160 for (int i = 0; i < 4; i++) { 5206 uint16_t src1[8], src2[8], dst1[8], dst2[8];
5161 dst1[i] = src1[i * 2]; 5207 get_q_register(Vd, src1);
5162 dst1[i + 4] = src2[i * 2]; 5208 get_q_register(Vm, src2);
5163 dst2[i] = src1[i * 2 + 1]; 5209 for (int i = 0; i < 4; i++) {
5164 dst2[i + 4] = src2[i * 2 + 1]; 5210 dst1[i * 2] = src1[i];
5165 } 5211 dst1[i * 2 + 1] = src2[i];
5166 set_q_register(Vd, dst1); 5212 dst2[i * 2] = src1[i + 4];
5167 set_q_register(Vm, dst2); 5213 dst2[i * 2 + 1] = src2[i + 4];
5168 break; 5214 }
5169 } 5215 set_q_register(Vd, dst1);
5170 case Neon32: { 5216 set_q_register(Vm, dst2);
5171 uint32_t src1[4], src2[4], dst1[4], dst2[4]; 5217 break;
5172 get_q_register(Vd, src1); 5218 }
5173 get_q_register(Vm, src2); 5219 case Neon32: {
5174 for (int i = 0; i < 2; i++) { 5220 uint32_t src1[4], src2[4], dst1[4], dst2[4];
5175 dst1[i] = src1[i * 2]; 5221 get_q_register(Vd, src1);
5176 dst1[i + 2] = src2[i * 2]; 5222 get_q_register(Vm, src2);
5177 dst2[i] = src1[i * 2 + 1]; 5223 for (int i = 0; i < 2; i++) {
5178 dst2[i + 2] = src2[i * 2 + 1]; 5224 dst1[i * 2] = src1[i];
5179 } 5225 dst1[i * 2 + 1] = src2[i];
5180 set_q_register(Vd, dst1); 5226 dst2[i * 2] = src1[i + 2];
5181 set_q_register(Vm, dst2); 5227 dst2[i * 2 + 1] = src2[i + 2];
5182 break; 5228 }
5183 } 5229 set_q_register(Vd, dst1);
5184 default: 5230 set_q_register(Vm, dst2);
5185 UNREACHABLE(); 5231 break;
5186 break; 5232 }
5233 default:
5234 UNREACHABLE();
5235 break;
5236 }
5237 } else {
5238 // vuzp.<size> Qd, Qm.
5239 switch (size) {
5240 case Neon8: {
5241 uint8_t src1[16], src2[16], dst1[16], dst2[16];
5242 get_q_register(Vd, src1);
5243 get_q_register(Vm, src2);
5244 for (int i = 0; i < 8; i++) {
5245 dst1[i] = src1[i * 2];
5246 dst1[i + 8] = src2[i * 2];
5247 dst2[i] = src1[i * 2 + 1];
5248 dst2[i + 8] = src2[i * 2 + 1];
5249 }
5250 set_q_register(Vd, dst1);
5251 set_q_register(Vm, dst2);
5252 break;
5253 }
5254 case Neon16: {
5255 uint16_t src1[8], src2[8], dst1[8], dst2[8];
5256 get_q_register(Vd, src1);
5257 get_q_register(Vm, src2);
5258 for (int i = 0; i < 4; i++) {
5259 dst1[i] = src1[i * 2];
5260 dst1[i + 4] = src2[i * 2];
5261 dst2[i] = src1[i * 2 + 1];
5262 dst2[i + 4] = src2[i * 2 + 1];
5263 }
5264 set_q_register(Vd, dst1);
5265 set_q_register(Vm, dst2);
5266 break;
5267 }
5268 case Neon32: {
5269 uint32_t src1[4], src2[4], dst1[4], dst2[4];
5270 get_q_register(Vd, src1);
5271 get_q_register(Vm, src2);
5272 for (int i = 0; i < 2; i++) {
5273 dst1[i] = src1[i * 2];
5274 dst1[i + 2] = src2[i * 2];
5275 dst2[i] = src1[i * 2 + 1];
5276 dst2[i + 2] = src2[i * 2 + 1];
5277 }
5278 set_q_register(Vd, dst1);
5279 set_q_register(Vm, dst2);
5280 break;
5281 }
5282 default:
5283 UNREACHABLE();
5284 break;
5285 }
5187 } 5286 }
5188 } 5287 }
5189 } else if (instr->Bits(17, 16) == 0 && instr->Bits(11, 9) == 0) { 5288 } else if (instr->Bits(17, 16) == 0 && instr->Bits(11, 9) == 0) {
5190 // vrev<op>.size Qd, Qm 5289 // vrev<op>.size Qd, Qm
5191 int Vd = instr->VFPDRegValue(kSimd128Precision); 5290 int Vd = instr->VFPDRegValue(kSimd128Precision);
5192 int Vm = instr->VFPMRegValue(kSimd128Precision); 5291 int Vm = instr->VFPMRegValue(kSimd128Precision);
5193 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); 5292 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18));
5194 NeonSize op = static_cast<NeonSize>(static_cast<int>(Neon64) - 5293 NeonSize op = static_cast<NeonSize>(static_cast<int>(Neon64) -
5195 instr->Bits(8, 7)); 5294 instr->Bits(8, 7));
5196 switch (op) { 5295 switch (op) {
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after
5264 default: 5363 default:
5265 UNREACHABLE(); 5364 UNREACHABLE();
5266 break; 5365 break;
5267 } 5366 }
5268 break; 5367 break;
5269 } 5368 }
5270 default: 5369 default:
5271 UNREACHABLE(); 5370 UNREACHABLE();
5272 break; 5371 break;
5273 } 5372 }
5274 } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 6) == 0x3) { 5373 } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 7) == 0x1) {
5275 int Vd = instr->VFPDRegValue(kSimd128Precision);
5276 int Vm = instr->VFPMRegValue(kSimd128Precision);
5277 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); 5374 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18));
5278 // vtrn.<size> Qd, Qm. 5375 if (instr->Bit(6) == 0) {
5279 switch (size) { 5376 int Vd = instr->VFPDRegValue(kDoublePrecision);
5280 case Neon8: { 5377 int Vm = instr->VFPMRegValue(kDoublePrecision);
5281 uint8_t src[16], dst[16]; 5378 // vtrn.<size> Dd, Dm.
5282 get_q_register(Vd, dst); 5379 switch (size) {
5283 get_q_register(Vm, src); 5380 case Neon8: {
5284 for (int i = 0; i < 8; i++) { 5381 uint8_t src[8], dst[8];
5285 std::swap(dst[2 * i + 1], src[2 * i]); 5382 get_d_register(Vd, dst);
5383 get_d_register(Vm, src);
5384 for (int i = 0; i < 4; i++) {
5385 std::swap(dst[2 * i + 1], src[2 * i]);
5386 }
5387 set_d_register(Vd, dst);
5388 set_d_register(Vm, src);
5389 break;
5286 } 5390 }
5287 set_q_register(Vd, dst); 5391 case Neon16: {
5288 set_q_register(Vm, src); 5392 uint16_t src[4], dst[4];
5289 break; 5393 get_d_register(Vd, dst);
5394 get_d_register(Vm, src);
5395 for (int i = 0; i < 2; i++) {
5396 std::swap(dst[2 * i + 1], src[2 * i]);
5397 }
5398 set_d_register(Vd, dst);
5399 set_d_register(Vm, src);
5400 break;
5401 }
5402 case Neon32: {
5403 uint32_t src[2], dst[2];
5404 get_d_register(Vd, dst);
5405 get_d_register(Vm, src);
5406 std::swap(dst[1], src[0]);
5407 set_d_register(Vd, dst);
5408 set_d_register(Vm, src);
5409 break;
5410 }
5411 default:
5412 UNREACHABLE();
5413 break;
5290 } 5414 }
5291 case Neon16: { 5415 } else {
5292 uint16_t src[8], dst[8]; 5416 int Vd = instr->VFPDRegValue(kSimd128Precision);
5293 get_q_register(Vd, dst); 5417 int Vm = instr->VFPMRegValue(kSimd128Precision);
5294 get_q_register(Vm, src); 5418 // vtrn.<size> Qd, Qm.
5295 for (int i = 0; i < 4; i++) { 5419 switch (size) {
5296 std::swap(dst[2 * i + 1], src[2 * i]); 5420 case Neon8: {
5421 uint8_t src[16], dst[16];
5422 get_q_register(Vd, dst);
5423 get_q_register(Vm, src);
5424 for (int i = 0; i < 8; i++) {
5425 std::swap(dst[2 * i + 1], src[2 * i]);
5426 }
5427 set_q_register(Vd, dst);
5428 set_q_register(Vm, src);
5429 break;
5297 } 5430 }
5298 set_q_register(Vd, dst); 5431 case Neon16: {
5299 set_q_register(Vm, src); 5432 uint16_t src[8], dst[8];
5300 break; 5433 get_q_register(Vd, dst);
5434 get_q_register(Vm, src);
5435 for (int i = 0; i < 4; i++) {
5436 std::swap(dst[2 * i + 1], src[2 * i]);
5437 }
5438 set_q_register(Vd, dst);
5439 set_q_register(Vm, src);
5440 break;
5441 }
5442 case Neon32: {
5443 uint32_t src[4], dst[4];
5444 get_q_register(Vd, dst);
5445 get_q_register(Vm, src);
5446 for (int i = 0; i < 2; i++) {
5447 std::swap(dst[2 * i + 1], src[2 * i]);
5448 }
5449 set_q_register(Vd, dst);
5450 set_q_register(Vm, src);
5451 break;
5452 }
5453 default:
5454 UNREACHABLE();
5455 break;
5301 } 5456 }
5302 case Neon32: {
5303 uint32_t src[4], dst[4];
5304 get_q_register(Vd, dst);
5305 get_q_register(Vm, src);
5306 for (int i = 0; i < 2; i++) {
5307 std::swap(dst[2 * i + 1], src[2 * i]);
5308 }
5309 set_q_register(Vd, dst);
5310 set_q_register(Vm, src);
5311 break;
5312 }
5313 default:
5314 UNREACHABLE();
5315 break;
5316 } 5457 }
5317 } else if (instr->Bits(17, 16) == 0x1 && instr->Bit(11) == 0) { 5458 } else if (instr->Bits(17, 16) == 0x1 && instr->Bit(11) == 0) {
5318 int Vd = instr->VFPDRegValue(kSimd128Precision); 5459 int Vd = instr->VFPDRegValue(kSimd128Precision);
5319 int Vm = instr->VFPMRegValue(kSimd128Precision); 5460 int Vm = instr->VFPMRegValue(kSimd128Precision);
5320 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); 5461 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18));
5321 if (instr->Bits(9, 6) == 0xd) { 5462 if (instr->Bits(9, 6) == 0xd) {
5322 // vabs<type>.<size> Qd, Qm 5463 // vabs<type>.<size> Qd, Qm
5323 if (instr->Bit(10) != 0) { 5464 if (instr->Bit(10) != 0) {
5324 // floating point (clear sign bits) 5465 // floating point (clear sign bits)
5325 uint32_t src[4]; 5466 uint32_t src[4];
(...skipping 896 matching lines...) Expand 10 before | Expand all | Expand 10 after
6222 processor->prev_ = nullptr; 6363 processor->prev_ = nullptr;
6223 processor->next_ = nullptr; 6364 processor->next_ = nullptr;
6224 } 6365 }
6225 6366
6226 } // namespace internal 6367 } // namespace internal
6227 } // namespace v8 6368 } // namespace v8
6228 6369
6229 #endif // USE_SIMULATOR 6370 #endif // USE_SIMULATOR
6230 6371
6231 #endif // V8_TARGET_ARCH_ARM 6372 #endif // V8_TARGET_ARCH_ARM
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698