OLD | NEW |
---|---|
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include <stdarg.h> | 5 #include <stdarg.h> |
6 #include <stdlib.h> | 6 #include <stdlib.h> |
7 #include <cmath> | 7 #include <cmath> |
8 | 8 |
9 #if V8_TARGET_ARCH_ARM | 9 #if V8_TARGET_ARCH_ARM |
10 | 10 |
(...skipping 5064 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
5075 uint64_t table; | 5075 uint64_t table; |
5076 get_d_register(vn + index / kDoubleSize, &table); | 5076 get_d_register(vn + index / kDoubleSize, &table); |
5077 result |= | 5077 result |= |
5078 ((table >> ((index % kDoubleSize) * kBitsPerByte)) & 0xFF) | 5078 ((table >> ((index % kDoubleSize) * kBitsPerByte)) & 0xFF) |
5079 << shift; | 5079 << shift; |
5080 } else if (vtbx) { | 5080 } else if (vtbx) { |
5081 result |= destination & (0xFFull << shift); | 5081 result |= destination & (0xFFull << shift); |
5082 } | 5082 } |
5083 } | 5083 } |
5084 set_d_register(vd, &result); | 5084 set_d_register(vd, &result); |
5085 } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 8) == 0x1 && | 5085 } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 8) == 0x1) { |
5086 instr->Bit(6) == 1) { | |
5087 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); | 5086 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); |
5088 int Vd = instr->VFPDRegValue(kSimd128Precision); | 5087 if (instr->Bit(6) == 0) { |
5089 int Vm = instr->VFPMRegValue(kSimd128Precision); | 5088 int Vd = instr->VFPDRegValue(kDoublePrecision); |
5090 if (instr->Bit(7) == 1) { | 5089 int Vm = instr->VFPMRegValue(kDoublePrecision); |
5091 // vzip.<size> Qd, Qm. | 5090 if (instr->Bit(7) == 1) { |
5092 switch (size) { | 5091 // vzip.<size> Dd, Dm. |
5093 case Neon8: { | 5092 switch (size) { |
5094 uint8_t src1[16], src2[16], dst1[16], dst2[16]; | 5093 case Neon8: { |
martyn.capewell
2017/04/06 13:09:49
It would be good to factorise the zip and uzp oper
bbudge
2017/04/06 17:49:43
Done.
I rewrote the get_ and set_q_register templ
| |
5095 get_q_register(Vd, src1); | 5094 uint8_t src1[8], src2[8], dst1[8], dst2[8]; |
5096 get_q_register(Vm, src2); | 5095 get_d_register(Vd, src1); |
5097 for (int i = 0; i < 8; i++) { | 5096 get_d_register(Vm, src2); |
5098 dst1[i * 2] = src1[i]; | 5097 for (int i = 0; i < 4; i++) { |
5099 dst1[i * 2 + 1] = src2[i]; | 5098 dst1[i * 2] = src1[i]; |
5100 dst2[i * 2] = src1[i + 8]; | 5099 dst1[i * 2 + 1] = src2[i]; |
5101 dst2[i * 2 + 1] = src2[i + 8]; | 5100 dst2[i * 2] = src1[i + 4]; |
5102 } | 5101 dst2[i * 2 + 1] = src2[i + 4]; |
5103 set_q_register(Vd, dst1); | 5102 } |
5104 set_q_register(Vm, dst2); | 5103 set_d_register(Vd, dst1); |
5105 break; | 5104 set_d_register(Vm, dst2); |
5106 } | 5105 break; |
5107 case Neon16: { | 5106 } |
5108 uint16_t src1[8], src2[8], dst1[8], dst2[8]; | 5107 case Neon16: { |
5109 get_q_register(Vd, src1); | 5108 uint16_t src1[4], src2[4], dst1[4], dst2[4]; |
5110 get_q_register(Vm, src2); | 5109 get_d_register(Vd, src1); |
5111 for (int i = 0; i < 4; i++) { | 5110 get_d_register(Vm, src2); |
5112 dst1[i * 2] = src1[i]; | 5111 for (int i = 0; i < 2; i++) { |
5113 dst1[i * 2 + 1] = src2[i]; | 5112 dst1[i * 2] = src1[i]; |
5114 dst2[i * 2] = src1[i + 4]; | 5113 dst1[i * 2 + 1] = src2[i]; |
5115 dst2[i * 2 + 1] = src2[i + 4]; | 5114 dst2[i * 2] = src1[i + 2]; |
5116 } | 5115 dst2[i * 2 + 1] = src2[i + 2]; |
5117 set_q_register(Vd, dst1); | 5116 } |
5118 set_q_register(Vm, dst2); | 5117 set_d_register(Vd, dst1); |
5119 break; | 5118 set_d_register(Vm, dst2); |
5120 } | 5119 break; |
5121 case Neon32: { | 5120 } |
5122 uint32_t src1[4], src2[4], dst1[4], dst2[4]; | 5121 case Neon32: { |
5123 get_q_register(Vd, src1); | 5122 uint32_t src1[2], src2[2], dst1[2], dst2[2]; |
5124 get_q_register(Vm, src2); | 5123 get_d_register(Vd, src1); |
5125 for (int i = 0; i < 2; i++) { | 5124 get_d_register(Vm, src2); |
5126 dst1[i * 2] = src1[i]; | 5125 dst1[0] = src1[0]; |
5127 dst1[i * 2 + 1] = src2[i]; | 5126 dst1[1] = src2[0]; |
5128 dst2[i * 2] = src1[i + 2]; | 5127 dst2[0] = src1[1]; |
5129 dst2[i * 2 + 1] = src2[i + 2]; | 5128 dst2[1] = src2[1]; |
5130 } | 5129 set_d_register(Vd, dst1); |
5131 set_q_register(Vd, dst1); | 5130 set_d_register(Vm, dst2); |
5132 set_q_register(Vm, dst2); | 5131 break; |
5133 break; | 5132 } |
5134 } | 5133 default: |
5135 default: | 5134 UNREACHABLE(); |
5136 UNREACHABLE(); | 5135 break; |
5137 break; | 5136 } |
5137 } else { | |
5138 // vuzp.<size> Dd, Dm. | |
5139 switch (size) { | |
5140 case Neon8: { | |
5141 uint8_t src1[8], src2[8], dst1[8], dst2[8]; | |
5142 get_d_register(Vd, src1); | |
5143 get_d_register(Vm, src2); | |
5144 for (int i = 0; i < 4; i++) { | |
5145 dst1[i] = src1[i * 2]; | |
5146 dst1[i + 4] = src2[i * 2]; | |
5147 dst2[i] = src1[i * 2 + 1]; | |
5148 dst2[i + 4] = src2[i * 2 + 1]; | |
5149 } | |
5150 set_d_register(Vd, dst1); | |
5151 set_d_register(Vm, dst2); | |
5152 break; | |
5153 } | |
5154 case Neon16: { | |
5155 uint16_t src1[4], src2[4], dst1[4], dst2[4]; | |
5156 get_d_register(Vd, src1); | |
5157 get_d_register(Vm, src2); | |
5158 for (int i = 0; i < 2; i++) { | |
5159 dst1[i] = src1[i * 2]; | |
5160 dst1[i + 2] = src2[i * 2]; | |
5161 dst2[i] = src1[i * 2 + 1]; | |
5162 dst2[i + 2] = src2[i * 2 + 1]; | |
5163 } | |
5164 set_d_register(Vd, dst1); | |
5165 set_d_register(Vm, dst2); | |
5166 break; | |
5167 } | |
5168 case Neon32: { | |
5169 uint32_t src1[2], src2[2], dst1[2], dst2[2]; | |
5170 get_d_register(Vd, src1); | |
5171 get_d_register(Vm, src2); | |
5172 dst1[0] = src1[0]; | |
5173 dst1[1] = src2[0]; | |
5174 dst2[0] = src1[1]; | |
5175 dst2[1] = src2[1]; | |
5176 set_d_register(Vd, dst1); | |
5177 set_d_register(Vm, dst2); | |
5178 break; | |
5179 } | |
5180 default: | |
5181 UNREACHABLE(); | |
5182 break; | |
5183 } | |
5138 } | 5184 } |
5139 } else { | 5185 } else { |
5140 // vuzp.<size> Qd, Qm. | 5186 int Vd = instr->VFPDRegValue(kSimd128Precision); |
5141 switch (size) { | 5187 int Vm = instr->VFPMRegValue(kSimd128Precision); |
5142 case Neon8: { | 5188 if (instr->Bit(7) == 1) { |
5143 uint8_t src1[16], src2[16], dst1[16], dst2[16]; | 5189 // vzip.<size> Qd, Qm. |
5144 get_q_register(Vd, src1); | 5190 switch (size) { |
5145 get_q_register(Vm, src2); | 5191 case Neon8: { |
5146 for (int i = 0; i < 8; i++) { | 5192 uint8_t src1[16], src2[16], dst1[16], dst2[16]; |
5147 dst1[i] = src1[i * 2]; | 5193 get_q_register(Vd, src1); |
5148 dst1[i + 8] = src2[i * 2]; | 5194 get_q_register(Vm, src2); |
5149 dst2[i] = src1[i * 2 + 1]; | 5195 for (int i = 0; i < 8; i++) { |
5150 dst2[i + 8] = src2[i * 2 + 1]; | 5196 dst1[i * 2] = src1[i]; |
5151 } | 5197 dst1[i * 2 + 1] = src2[i]; |
5152 set_q_register(Vd, dst1); | 5198 dst2[i * 2] = src1[i + 8]; |
5153 set_q_register(Vm, dst2); | 5199 dst2[i * 2 + 1] = src2[i + 8]; |
5154 break; | 5200 } |
5155 } | 5201 set_q_register(Vd, dst1); |
5156 case Neon16: { | 5202 set_q_register(Vm, dst2); |
5157 uint16_t src1[8], src2[8], dst1[8], dst2[8]; | 5203 break; |
5158 get_q_register(Vd, src1); | 5204 } |
5159 get_q_register(Vm, src2); | 5205 case Neon16: { |
5160 for (int i = 0; i < 4; i++) { | 5206 uint16_t src1[8], src2[8], dst1[8], dst2[8]; |
5161 dst1[i] = src1[i * 2]; | 5207 get_q_register(Vd, src1); |
5162 dst1[i + 4] = src2[i * 2]; | 5208 get_q_register(Vm, src2); |
5163 dst2[i] = src1[i * 2 + 1]; | 5209 for (int i = 0; i < 4; i++) { |
5164 dst2[i + 4] = src2[i * 2 + 1]; | 5210 dst1[i * 2] = src1[i]; |
5165 } | 5211 dst1[i * 2 + 1] = src2[i]; |
5166 set_q_register(Vd, dst1); | 5212 dst2[i * 2] = src1[i + 4]; |
5167 set_q_register(Vm, dst2); | 5213 dst2[i * 2 + 1] = src2[i + 4]; |
5168 break; | 5214 } |
5169 } | 5215 set_q_register(Vd, dst1); |
5170 case Neon32: { | 5216 set_q_register(Vm, dst2); |
5171 uint32_t src1[4], src2[4], dst1[4], dst2[4]; | 5217 break; |
5172 get_q_register(Vd, src1); | 5218 } |
5173 get_q_register(Vm, src2); | 5219 case Neon32: { |
5174 for (int i = 0; i < 2; i++) { | 5220 uint32_t src1[4], src2[4], dst1[4], dst2[4]; |
5175 dst1[i] = src1[i * 2]; | 5221 get_q_register(Vd, src1); |
5176 dst1[i + 2] = src2[i * 2]; | 5222 get_q_register(Vm, src2); |
5177 dst2[i] = src1[i * 2 + 1]; | 5223 for (int i = 0; i < 2; i++) { |
5178 dst2[i + 2] = src2[i * 2 + 1]; | 5224 dst1[i * 2] = src1[i]; |
5179 } | 5225 dst1[i * 2 + 1] = src2[i]; |
5180 set_q_register(Vd, dst1); | 5226 dst2[i * 2] = src1[i + 2]; |
5181 set_q_register(Vm, dst2); | 5227 dst2[i * 2 + 1] = src2[i + 2]; |
5182 break; | 5228 } |
5183 } | 5229 set_q_register(Vd, dst1); |
5184 default: | 5230 set_q_register(Vm, dst2); |
5185 UNREACHABLE(); | 5231 break; |
5186 break; | 5232 } |
5233 default: | |
5234 UNREACHABLE(); | |
5235 break; | |
5236 } | |
5237 } else { | |
5238 // vuzp.<size> Qd, Qm. | |
5239 switch (size) { | |
5240 case Neon8: { | |
5241 uint8_t src1[16], src2[16], dst1[16], dst2[16]; | |
5242 get_q_register(Vd, src1); | |
5243 get_q_register(Vm, src2); | |
5244 for (int i = 0; i < 8; i++) { | |
5245 dst1[i] = src1[i * 2]; | |
5246 dst1[i + 8] = src2[i * 2]; | |
5247 dst2[i] = src1[i * 2 + 1]; | |
5248 dst2[i + 8] = src2[i * 2 + 1]; | |
5249 } | |
5250 set_q_register(Vd, dst1); | |
5251 set_q_register(Vm, dst2); | |
5252 break; | |
5253 } | |
5254 case Neon16: { | |
5255 uint16_t src1[8], src2[8], dst1[8], dst2[8]; | |
5256 get_q_register(Vd, src1); | |
5257 get_q_register(Vm, src2); | |
5258 for (int i = 0; i < 4; i++) { | |
5259 dst1[i] = src1[i * 2]; | |
5260 dst1[i + 4] = src2[i * 2]; | |
5261 dst2[i] = src1[i * 2 + 1]; | |
5262 dst2[i + 4] = src2[i * 2 + 1]; | |
5263 } | |
5264 set_q_register(Vd, dst1); | |
5265 set_q_register(Vm, dst2); | |
5266 break; | |
5267 } | |
5268 case Neon32: { | |
5269 uint32_t src1[4], src2[4], dst1[4], dst2[4]; | |
5270 get_q_register(Vd, src1); | |
5271 get_q_register(Vm, src2); | |
5272 for (int i = 0; i < 2; i++) { | |
5273 dst1[i] = src1[i * 2]; | |
5274 dst1[i + 2] = src2[i * 2]; | |
5275 dst2[i] = src1[i * 2 + 1]; | |
5276 dst2[i + 2] = src2[i * 2 + 1]; | |
5277 } | |
5278 set_q_register(Vd, dst1); | |
5279 set_q_register(Vm, dst2); | |
5280 break; | |
5281 } | |
5282 default: | |
5283 UNREACHABLE(); | |
5284 break; | |
5285 } | |
5187 } | 5286 } |
5188 } | 5287 } |
5189 } else if (instr->Bits(17, 16) == 0 && instr->Bits(11, 9) == 0) { | 5288 } else if (instr->Bits(17, 16) == 0 && instr->Bits(11, 9) == 0) { |
5190 // vrev<op>.size Qd, Qm | 5289 // vrev<op>.size Qd, Qm |
5191 int Vd = instr->VFPDRegValue(kSimd128Precision); | 5290 int Vd = instr->VFPDRegValue(kSimd128Precision); |
5192 int Vm = instr->VFPMRegValue(kSimd128Precision); | 5291 int Vm = instr->VFPMRegValue(kSimd128Precision); |
5193 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); | 5292 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); |
5194 NeonSize op = static_cast<NeonSize>(static_cast<int>(Neon64) - | 5293 NeonSize op = static_cast<NeonSize>(static_cast<int>(Neon64) - |
5195 instr->Bits(8, 7)); | 5294 instr->Bits(8, 7)); |
5196 switch (op) { | 5295 switch (op) { |
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
5264 default: | 5363 default: |
5265 UNREACHABLE(); | 5364 UNREACHABLE(); |
5266 break; | 5365 break; |
5267 } | 5366 } |
5268 break; | 5367 break; |
5269 } | 5368 } |
5270 default: | 5369 default: |
5271 UNREACHABLE(); | 5370 UNREACHABLE(); |
5272 break; | 5371 break; |
5273 } | 5372 } |
5274 } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 6) == 0x3) { | 5373 } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 7) == 0x1) { |
5275 int Vd = instr->VFPDRegValue(kSimd128Precision); | |
5276 int Vm = instr->VFPMRegValue(kSimd128Precision); | |
5277 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); | 5374 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); |
5278 // vtrn.<size> Qd, Qm. | 5375 if (instr->Bit(6) == 0) { |
5279 switch (size) { | 5376 int Vd = instr->VFPDRegValue(kDoublePrecision); |
5280 case Neon8: { | 5377 int Vm = instr->VFPMRegValue(kDoublePrecision); |
5281 uint8_t src[16], dst[16]; | 5378 // vtrn.<size> Dd, Dm. |
5282 get_q_register(Vd, dst); | 5379 switch (size) { |
5283 get_q_register(Vm, src); | 5380 case Neon8: { |
5284 for (int i = 0; i < 8; i++) { | 5381 uint8_t src[8], dst[8]; |
5285 std::swap(dst[2 * i + 1], src[2 * i]); | 5382 get_d_register(Vd, dst); |
5383 get_d_register(Vm, src); | |
5384 for (int i = 0; i < 4; i++) { | |
5385 std::swap(dst[2 * i + 1], src[2 * i]); | |
5386 } | |
5387 set_d_register(Vd, dst); | |
5388 set_d_register(Vm, src); | |
5389 break; | |
5286 } | 5390 } |
5287 set_q_register(Vd, dst); | 5391 case Neon16: { |
5288 set_q_register(Vm, src); | 5392 uint16_t src[4], dst[4]; |
5289 break; | 5393 get_d_register(Vd, dst); |
5394 get_d_register(Vm, src); | |
5395 for (int i = 0; i < 2; i++) { | |
5396 std::swap(dst[2 * i + 1], src[2 * i]); | |
5397 } | |
5398 set_d_register(Vd, dst); | |
5399 set_d_register(Vm, src); | |
5400 break; | |
5401 } | |
5402 case Neon32: { | |
5403 uint32_t src[2], dst[2]; | |
5404 get_d_register(Vd, dst); | |
5405 get_d_register(Vm, src); | |
5406 std::swap(dst[1], src[0]); | |
5407 set_d_register(Vd, dst); | |
5408 set_d_register(Vm, src); | |
5409 break; | |
5410 } | |
5411 default: | |
5412 UNREACHABLE(); | |
5413 break; | |
5290 } | 5414 } |
5291 case Neon16: { | 5415 } else { |
5292 uint16_t src[8], dst[8]; | 5416 int Vd = instr->VFPDRegValue(kSimd128Precision); |
5293 get_q_register(Vd, dst); | 5417 int Vm = instr->VFPMRegValue(kSimd128Precision); |
5294 get_q_register(Vm, src); | 5418 // vtrn.<size> Qd, Qm. |
5295 for (int i = 0; i < 4; i++) { | 5419 switch (size) { |
5296 std::swap(dst[2 * i + 1], src[2 * i]); | 5420 case Neon8: { |
5421 uint8_t src[16], dst[16]; | |
5422 get_q_register(Vd, dst); | |
5423 get_q_register(Vm, src); | |
5424 for (int i = 0; i < 8; i++) { | |
5425 std::swap(dst[2 * i + 1], src[2 * i]); | |
5426 } | |
5427 set_q_register(Vd, dst); | |
5428 set_q_register(Vm, src); | |
5429 break; | |
5297 } | 5430 } |
5298 set_q_register(Vd, dst); | 5431 case Neon16: { |
5299 set_q_register(Vm, src); | 5432 uint16_t src[8], dst[8]; |
5300 break; | 5433 get_q_register(Vd, dst); |
5434 get_q_register(Vm, src); | |
5435 for (int i = 0; i < 4; i++) { | |
5436 std::swap(dst[2 * i + 1], src[2 * i]); | |
5437 } | |
5438 set_q_register(Vd, dst); | |
5439 set_q_register(Vm, src); | |
5440 break; | |
5441 } | |
5442 case Neon32: { | |
5443 uint32_t src[4], dst[4]; | |
5444 get_q_register(Vd, dst); | |
5445 get_q_register(Vm, src); | |
5446 for (int i = 0; i < 2; i++) { | |
5447 std::swap(dst[2 * i + 1], src[2 * i]); | |
5448 } | |
5449 set_q_register(Vd, dst); | |
5450 set_q_register(Vm, src); | |
5451 break; | |
5452 } | |
5453 default: | |
5454 UNREACHABLE(); | |
5455 break; | |
5301 } | 5456 } |
5302 case Neon32: { | |
5303 uint32_t src[4], dst[4]; | |
5304 get_q_register(Vd, dst); | |
5305 get_q_register(Vm, src); | |
5306 for (int i = 0; i < 2; i++) { | |
5307 std::swap(dst[2 * i + 1], src[2 * i]); | |
5308 } | |
5309 set_q_register(Vd, dst); | |
5310 set_q_register(Vm, src); | |
5311 break; | |
5312 } | |
5313 default: | |
5314 UNREACHABLE(); | |
5315 break; | |
5316 } | 5457 } |
5317 } else if (instr->Bits(17, 16) == 0x1 && instr->Bit(11) == 0) { | 5458 } else if (instr->Bits(17, 16) == 0x1 && instr->Bit(11) == 0) { |
5318 int Vd = instr->VFPDRegValue(kSimd128Precision); | 5459 int Vd = instr->VFPDRegValue(kSimd128Precision); |
5319 int Vm = instr->VFPMRegValue(kSimd128Precision); | 5460 int Vm = instr->VFPMRegValue(kSimd128Precision); |
5320 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); | 5461 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); |
5321 if (instr->Bits(9, 6) == 0xd) { | 5462 if (instr->Bits(9, 6) == 0xd) { |
5322 // vabs<type>.<size> Qd, Qm | 5463 // vabs<type>.<size> Qd, Qm |
5323 if (instr->Bit(10) != 0) { | 5464 if (instr->Bit(10) != 0) { |
5324 // floating point (clear sign bits) | 5465 // floating point (clear sign bits) |
5325 uint32_t src[4]; | 5466 uint32_t src[4]; |
(...skipping 896 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
6222 processor->prev_ = nullptr; | 6363 processor->prev_ = nullptr; |
6223 processor->next_ = nullptr; | 6364 processor->next_ = nullptr; |
6224 } | 6365 } |
6225 | 6366 |
6226 } // namespace internal | 6367 } // namespace internal |
6227 } // namespace v8 | 6368 } // namespace v8 |
6228 | 6369 |
6229 #endif // USE_SIMULATOR | 6370 #endif // USE_SIMULATOR |
6230 | 6371 |
6231 #endif // V8_TARGET_ARCH_ARM | 6372 #endif // V8_TARGET_ARCH_ARM |
OLD | NEW |