Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(274)

Side by Side Diff: src/arm/simulator-arm.cc

Issue 2546933002: [Turbofan] Add ARM NEON instructions for implementing SIMD. (Closed)
Patch Set: Third review comments. Created 4 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include <stdarg.h> 5 #include <stdarg.h>
6 #include <stdlib.h> 6 #include <stdlib.h>
7 #include <cmath> 7 #include <cmath>
8 8
9 #if V8_TARGET_ARCH_ARM 9 #if V8_TARGET_ARCH_ARM
10 10
(...skipping 3049 matching lines...) Expand 10 before | Expand all | Expand 10 after
3060 // Sd = vsub(Sn, Sm) 3060 // Sd = vsub(Sn, Sm)
3061 // Dd = vmul(Dn, Dm) 3061 // Dd = vmul(Dn, Dm)
3062 // Sd = vmul(Sn, Sm) 3062 // Sd = vmul(Sn, Sm)
3063 // Dd = vdiv(Dn, Dm) 3063 // Dd = vdiv(Dn, Dm)
3064 // Sd = vdiv(Sn, Sm) 3064 // Sd = vdiv(Sn, Sm)
3065 // vcmp(Dd, Dm) 3065 // vcmp(Dd, Dm)
3066 // vcmp(Sd, Sm) 3066 // vcmp(Sd, Sm)
3067 // Dd = vsqrt(Dm) 3067 // Dd = vsqrt(Dm)
3068 // Sd = vsqrt(Sm) 3068 // Sd = vsqrt(Sm)
3069 // vmrs 3069 // vmrs
3070 // vdup.size Qd, Rt.
3070 void Simulator::DecodeTypeVFP(Instruction* instr) { 3071 void Simulator::DecodeTypeVFP(Instruction* instr) {
3071 DCHECK((instr->TypeValue() == 7) && (instr->Bit(24) == 0x0) ); 3072 DCHECK((instr->TypeValue() == 7) && (instr->Bit(24) == 0x0) );
3072 DCHECK(instr->Bits(11, 9) == 0x5); 3073 DCHECK(instr->Bits(11, 9) == 0x5);
3073 3074
3074 // Obtain single precision register codes. 3075 // Obtain single precision register codes.
3075 int m = instr->VFPMRegValue(kSinglePrecision); 3076 int m = instr->VFPMRegValue(kSinglePrecision);
3076 int d = instr->VFPDRegValue(kSinglePrecision); 3077 int d = instr->VFPDRegValue(kSinglePrecision);
3077 int n = instr->VFPNRegValue(kSinglePrecision); 3078 int n = instr->VFPNRegValue(kSinglePrecision);
3078 // Obtain double precision register codes. 3079 // Obtain double precision register codes.
3079 int vm = instr->VFPMRegValue(kDoublePrecision); 3080 int vm = instr->VFPMRegValue(kDoublePrecision);
(...skipping 190 matching lines...) Expand 10 before | Expand all | Expand 10 after
3270 sd_value = canonicalizeNaN(sd_value); 3271 sd_value = canonicalizeNaN(sd_value);
3271 set_s_register_from_float(d, sd_value); 3272 set_s_register_from_float(d, sd_value);
3272 } 3273 }
3273 } else { 3274 } else {
3274 UNIMPLEMENTED(); // Not used by V8. 3275 UNIMPLEMENTED(); // Not used by V8.
3275 } 3276 }
3276 } else { 3277 } else {
3277 if ((instr->VCValue() == 0x0) && 3278 if ((instr->VCValue() == 0x0) &&
3278 (instr->VAValue() == 0x0)) { 3279 (instr->VAValue() == 0x0)) {
3279 DecodeVMOVBetweenCoreAndSinglePrecisionRegisters(instr); 3280 DecodeVMOVBetweenCoreAndSinglePrecisionRegisters(instr);
3280 } else if ((instr->VLValue() == 0x0) && 3281 } else if ((instr->VLValue() == 0x0) && (instr->VCValue() == 0x1)) {
3281 (instr->VCValue() == 0x1) && 3282 if (instr->Bit(23) == 0) {
3282 (instr->Bit(23) == 0x0)) { 3283 // vmov (ARM core register to scalar)
3283 // vmov (ARM core register to scalar) 3284 int vd = instr->VFPNRegValue(kDoublePrecision);
3284 int vd = instr->Bits(19, 16) | (instr->Bit(7) << 4); 3285 int rt = instr->RtValue();
3285 uint32_t data[2]; 3286 int opc1_opc2 = (instr->Bits(22, 21) << 2) | instr->Bits(6, 5);
3286 get_d_register(vd, data); 3287 if ((opc1_opc2 & 0xb) == 0) {
3287 data[instr->Bit(21)] = get_register(instr->RtValue()); 3288 // NeonS32/NeonU32
3288 set_d_register(vd, data); 3289 uint32_t data[2];
3289 } else if ((instr->VLValue() == 0x1) && 3290 get_d_register(vd, data);
3290 (instr->VCValue() == 0x1) && 3291 data[instr->Bit(21)] = get_register(rt);
3291 (instr->Bit(23) == 0x0)) { 3292 set_d_register(vd, data);
3293 } else {
3294 uint64_t data;
3295 get_d_register(vd, &data);
3296 uint64_t rt_value = get_register(rt);
3297 if ((opc1_opc2 & 0x8) != 0) {
3298 // NeonS8 / NeonU8
3299 int i = opc1_opc2 & 0x7;
3300 int shift = i * kBitsPerByte;
3301 const uint64_t mask = 0xFF;
3302 data &= ~(mask << shift);
3303 data |= (rt_value & mask) << shift;
3304 set_d_register(vd, &data);
3305 } else if ((opc1_opc2 & 0x1) != 0) {
3306 // NeonS16 / NeonU16
3307 int i = (opc1_opc2 >> 1) & 0x3;
3308 int shift = i * kBitsPerByte * kShortSize;
3309 const uint64_t mask = 0xFFFF;
3310 data &= ~(mask << shift);
3311 data |= (rt_value & mask) << shift;
3312 set_d_register(vd, &data);
3313 } else {
3314 UNREACHABLE(); // Not used by V8.
3315 }
3316 }
3317 } else {
3318 // vdup.size Qd, Rt.
3319 NeonSize size = Neon32;
3320 if (instr->Bit(5) != 0)
3321 size = Neon16;
3322 else if (instr->Bit(22) != 0)
3323 size = Neon8;
3324 int vd = instr->VFPNRegValue(kSimd128Precision);
3325 int rt = instr->RtValue();
3326 uint32_t rt_value = get_register(rt);
3327 uint32_t q_data[4];
3328 switch (size) {
3329 case Neon8: {
3330 rt_value &= 0xFF;
3331 uint8_t* dst = reinterpret_cast<uint8_t*>(q_data);
3332 for (int i = 0; i < 16; i++) {
3333 dst[i] = rt_value;
3334 }
3335 break;
3336 }
3337 case Neon16: {
3338 // Perform pairwise ops instead of casting to uint16_t.
3339 rt_value &= 0xFFFFu;
3340 uint32_t rt_rt = (rt_value << 16) | (rt_value & 0xFFFFu);
3341 for (int i = 0; i < 4; i++) {
3342 q_data[i] = rt_rt;
3343 }
3344 break;
3345 }
3346 case Neon32: {
3347 for (int i = 0; i < 4; i++) {
3348 q_data[i] = rt_value;
3349 }
3350 break;
3351 }
3352 default:
3353 UNREACHABLE();
3354 break;
3355 }
3356 set_q_register(vd, q_data);
3357 }
3358 } else if ((instr->VLValue() == 0x1) && (instr->VCValue() == 0x1)) {
3292 // vmov (scalar to ARM core register) 3359 // vmov (scalar to ARM core register)
3293 int vn = instr->Bits(19, 16) | (instr->Bit(7) << 4); 3360 int vn = instr->VFPNRegValue(kDoublePrecision);
3294 double dn_value = get_double_from_d_register(vn); 3361 int rt = instr->RtValue();
3295 int32_t data[2]; 3362 int opc1_opc2 = (instr->Bits(22, 21) << 2) | instr->Bits(6, 5);
3296 memcpy(data, &dn_value, 8); 3363 if ((opc1_opc2 & 0xb) == 0) {
3297 set_register(instr->RtValue(), data[instr->Bit(21)]); 3364 // NeonS32 / NeonU32
3365 double dn_value = get_double_from_d_register(vn);
3366 int32_t data[2];
3367 memcpy(data, &dn_value, 8);
3368 set_register(rt, data[instr->Bit(21)]);
3369 } else {
3370 uint64_t data;
3371 get_d_register(vn, &data);
3372 bool u = instr->Bit(23) != 0;
3373 if ((opc1_opc2 & 0x8) != 0) {
3374 // NeonS8 / NeonU8
3375 int i = opc1_opc2 & 0x7;
3376 int shift = i * kBitsPerByte;
3377 uint32_t scalar = (data >> shift) & 0xFFu;
3378 if (!u && (scalar & 0x80) != 0) scalar |= 0xffffff00;
3379 set_register(rt, scalar);
3380 } else if ((opc1_opc2 & 0x1) != 0) {
3381 // NeonS16 / NeonU16
3382 int i = (opc1_opc2 >> 1) & 0x3;
3383 int shift = i * kBitsPerByte * kShortSize;
3384 uint32_t scalar = (data >> shift) & 0xFFFFu;
3385 if (!u && (scalar & 0x8000) != 0) scalar |= 0xffff0000;
3386 set_register(rt, scalar);
3387 } else {
3388 UNREACHABLE(); // Not used by V8.
3389 }
3390 }
3298 } else if ((instr->VLValue() == 0x1) && 3391 } else if ((instr->VLValue() == 0x1) &&
3299 (instr->VCValue() == 0x0) && 3392 (instr->VCValue() == 0x0) &&
3300 (instr->VAValue() == 0x7) && 3393 (instr->VAValue() == 0x7) &&
3301 (instr->Bits(19, 16) == 0x1)) { 3394 (instr->Bits(19, 16) == 0x1)) {
3302 // vmrs 3395 // vmrs
3303 uint32_t rt = instr->RtValue(); 3396 uint32_t rt = instr->RtValue();
3304 if (rt == 0xF) { 3397 if (rt == 0xF) {
3305 Copy_FPSCR_to_APSR(); 3398 Copy_FPSCR_to_APSR();
3306 } else { 3399 } else {
3307 // Emulate FPSCR from the Simulator flags. 3400 // Emulate FPSCR from the Simulator flags.
(...skipping 205 matching lines...) Expand 10 before | Expand all | Expand 10 after
3513 return 0; 3606 return 0;
3514 } else { 3607 } else {
3515 if (unsigned_res) { 3608 if (unsigned_res) {
3516 return (val < 0) ? 0 : 0xffffffffu; 3609 return (val < 0) ? 0 : 0xffffffffu;
3517 } else { 3610 } else {
3518 return (val < 0) ? kMinInt : kMaxInt; 3611 return (val < 0) ? kMinInt : kMaxInt;
3519 } 3612 }
3520 } 3613 }
3521 } 3614 }
3522 3615
3616 int32_t Simulator::ConvertDoubleToInt(double val, bool unsigned_integer,
3617 VFPRoundingMode mode, bool neon) {
Rodolph Perfetta (ARM) 2016/12/14 20:27:07 nit: neon is unused.
bbudge 2016/12/14 23:50:52 Done.
3618 int32_t result =
3619 unsigned_integer ? static_cast<uint32_t>(val) : static_cast<int32_t>(val);
3620
3621 inv_op_vfp_flag_ = get_inv_op_vfp_flag(mode, val, unsigned_integer);
3622
3623 double abs_diff = unsigned_integer
3624 ? std::fabs(val - static_cast<uint32_t>(result))
3625 : std::fabs(val - result);
3626
3627 inexact_vfp_flag_ = (abs_diff != 0);
3628
3629 if (inv_op_vfp_flag_) {
3630 result = VFPConversionSaturate(val, unsigned_integer);
3631 } else {
3632 switch (mode) {
3633 case RN: {
3634 int val_sign = (val > 0) ? 1 : -1;
3635 if (abs_diff > 0.5) {
3636 result += val_sign;
3637 } else if (abs_diff == 0.5) {
3638 // Round to even if exactly halfway.
3639 result = ((result % 2) == 0) ? result : result + val_sign;
3640 }
3641 break;
3642 }
3643
3644 case RM:
3645 result = result > val ? result - 1 : result;
3646 break;
3647
3648 case RZ:
3649 // Nothing to do.
3650 break;
3651
3652 default:
3653 UNREACHABLE();
3654 }
3655 }
3656 return result;
3657 }
3523 3658
3524 void Simulator::DecodeVCVTBetweenFloatingPointAndInteger(Instruction* instr) { 3659 void Simulator::DecodeVCVTBetweenFloatingPointAndInteger(Instruction* instr) {
3525 DCHECK((instr->Bit(4) == 0) && (instr->Opc1Value() == 0x7) && 3660 DCHECK((instr->Bit(4) == 0) && (instr->Opc1Value() == 0x7) &&
3526 (instr->Bits(27, 23) == 0x1D)); 3661 (instr->Bits(27, 23) == 0x1D));
3527 DCHECK(((instr->Opc2Value() == 0x8) && (instr->Opc3Value() & 0x1)) || 3662 DCHECK(((instr->Opc2Value() == 0x8) && (instr->Opc3Value() & 0x1)) ||
3528 (((instr->Opc2Value() >> 1) == 0x6) && (instr->Opc3Value() & 0x1))); 3663 (((instr->Opc2Value() >> 1) == 0x6) && (instr->Opc3Value() & 0x1)));
3529 3664
3530 // Conversion between floating-point and integer. 3665 // Conversion between floating-point and integer.
3531 bool to_integer = (instr->Bit(18) == 1); 3666 bool to_integer = (instr->Bit(18) == 1);
3532 3667
(...skipping 16 matching lines...) Expand all
3549 VFPRoundingMode mode = (instr->Bit(7) != 1) ? FPSCR_rounding_mode_ 3684 VFPRoundingMode mode = (instr->Bit(7) != 1) ? FPSCR_rounding_mode_
3550 : RZ; 3685 : RZ;
3551 DCHECK((mode == RM) || (mode == RZ) || (mode == RN)); 3686 DCHECK((mode == RM) || (mode == RZ) || (mode == RN));
3552 3687
3553 bool unsigned_integer = (instr->Bit(16) == 0); 3688 bool unsigned_integer = (instr->Bit(16) == 0);
3554 bool double_precision = (src_precision == kDoublePrecision); 3689 bool double_precision = (src_precision == kDoublePrecision);
3555 3690
3556 double val = double_precision ? get_double_from_d_register(src) 3691 double val = double_precision ? get_double_from_d_register(src)
3557 : get_float_from_s_register(src); 3692 : get_float_from_s_register(src);
3558 3693
3559 int temp = unsigned_integer ? static_cast<uint32_t>(val) 3694 int32_t temp = ConvertDoubleToInt(val, unsigned_integer, mode, false);
3560 : static_cast<int32_t>(val);
3561
3562 inv_op_vfp_flag_ = get_inv_op_vfp_flag(mode, val, unsigned_integer);
3563
3564 double abs_diff =
3565 unsigned_integer ? std::fabs(val - static_cast<uint32_t>(temp))
3566 : std::fabs(val - temp);
3567
3568 inexact_vfp_flag_ = (abs_diff != 0);
3569
3570 if (inv_op_vfp_flag_) {
3571 temp = VFPConversionSaturate(val, unsigned_integer);
3572 } else {
3573 switch (mode) {
3574 case RN: {
3575 int val_sign = (val > 0) ? 1 : -1;
3576 if (abs_diff > 0.5) {
3577 temp += val_sign;
3578 } else if (abs_diff == 0.5) {
3579 // Round to even if exactly halfway.
3580 temp = ((temp % 2) == 0) ? temp : temp + val_sign;
3581 }
3582 break;
3583 }
3584
3585 case RM:
3586 temp = temp > val ? temp - 1 : temp;
3587 break;
3588
3589 case RZ:
3590 // Nothing to do.
3591 break;
3592
3593 default:
3594 UNREACHABLE();
3595 }
3596 }
3597 3695
3598 // Update the destination register. 3696 // Update the destination register.
3599 set_s_register_from_sinteger(dst, temp); 3697 set_s_register_from_sinteger(dst, temp);
3600 3698
3601 } else { 3699 } else {
3602 bool unsigned_integer = (instr->Bit(7) == 0); 3700 bool unsigned_integer = (instr->Bit(7) == 0);
3603 3701
3604 int dst = instr->VFPDRegValue(src_precision); 3702 int dst = instr->VFPDRegValue(src_precision);
3605 int src = instr->VFPMRegValue(kSinglePrecision); 3703 int src = instr->VFPMRegValue(kSinglePrecision);
3606 3704
(...skipping 126 matching lines...) Expand 10 before | Expand all | Expand 10 after
3733 HandleVList(instr); 3831 HandleVList(instr);
3734 break; 3832 break;
3735 default: 3833 default:
3736 UNIMPLEMENTED(); // Not used by V8. 3834 UNIMPLEMENTED(); // Not used by V8.
3737 } 3835 }
3738 } else { 3836 } else {
3739 UNIMPLEMENTED(); // Not used by V8. 3837 UNIMPLEMENTED(); // Not used by V8.
3740 } 3838 }
3741 } 3839 }
3742 3840
3841 #define HIGH_16(x) ((x) >> 16)
3842 #define LOW_16(x) ((x)&0xFFFFu)
3843 #define COMBINE_32(high, low) ((high) << 16 | (low)&0xFFFFu)
3844 #define PAIRWISE_OP(x, y, OP) \
3845 COMBINE_32(OP(HIGH_16((x)), HIGH_16((y))), OP(LOW_16((x)), LOW_16((y))))
3846
3847 #define ADD_16(x, y) ((x) + (y))
3848 #define SUB_16(x, y) ((x) - (y))
3849 #define CEQ_16(x, y) ((x) == (y) ? 0xFFFFu : 0)
3850 #define TST_16(x, y) (((x) & (y)) != 0 ? 0xFFFFu : 0)
3743 3851
3744 void Simulator::DecodeSpecialCondition(Instruction* instr) { 3852 void Simulator::DecodeSpecialCondition(Instruction* instr) {
3745 switch (instr->SpecialValue()) { 3853 switch (instr->SpecialValue()) {
3746 case 4: 3854 case 4:
3747 if (instr->Bits(21, 20) == 2 && instr->Bits(11, 8) == 1 && 3855 if (instr->Bits(21, 20) == 2 && instr->Bits(11, 8) == 1 &&
3748 instr->Bit(4) == 1) { 3856 instr->Bit(4) == 1) {
3749 // vmov Qd, Qm 3857 // vmov Qd, Qm
3750 int Vd = instr->VFPDRegValue(kSimd128Precision); 3858 int Vd = instr->VFPDRegValue(kSimd128Precision);
3751 int Vm = instr->VFPMRegValue(kSimd128Precision); 3859 int Vm = instr->VFPMRegValue(kSimd128Precision);
3752 uint32_t data[4]; 3860 uint32_t data[4];
3753 get_q_register(Vm, data); 3861 get_q_register(Vm, data);
3754 set_q_register(Vd, data); 3862 set_q_register(Vd, data);
3863 } else if (instr->Bits(11, 8) == 8) {
3864 // vadd/vtst
3865 int size = static_cast<NeonSize>(instr->Bits(21, 20));
3866 int Vd = instr->VFPDRegValue(kSimd128Precision);
3867 int Vm = instr->VFPMRegValue(kSimd128Precision);
3868 int Vn = instr->VFPNRegValue(kSimd128Precision);
3869 uint32_t src1[4], src2[4];
3870 get_q_register(Vn, src1);
3871 get_q_register(Vm, src2);
3872 if (instr->Bit(4) == 0) {
3873 // vadd.i<size> Qd, Qm, Qn.
3874 switch (size) {
3875 case Neon8: {
3876 uint8_t* s1 = reinterpret_cast<uint8_t*>(src1);
3877 uint8_t* s2 = reinterpret_cast<uint8_t*>(src2);
3878 for (int i = 0; i < 16; i++) {
3879 s1[i] += s2[i];
3880 }
3881 break;
3882 }
3883 case Neon16: {
3884 for (int i = 0; i < 4; i++) {
3885 src1[i] = PAIRWISE_OP(src1[i], src2[i], ADD_16);
3886 }
3887 break;
3888 }
3889 case Neon32: {
3890 for (int i = 0; i < 4; i++) {
3891 src1[i] += src2[i];
3892 }
3893 break;
3894 }
3895 default:
3896 UNREACHABLE();
3897 break;
3898 }
3899 } else {
3900 // vtst.i<size> Qd, Qm, Qn.
3901 switch (size) {
3902 case Neon8: {
3903 uint8_t* s1 = reinterpret_cast<uint8_t*>(src1);
3904 uint8_t* s2 = reinterpret_cast<uint8_t*>(src2);
3905 for (int i = 0; i < 16; i++) {
3906 s1[i] = (s1[i] & s2[i]) != 0 ? 0xFFu : 0;
3907 }
3908 break;
3909 }
3910 case Neon16: {
3911 for (int i = 0; i < 4; i++) {
3912 src1[i] = PAIRWISE_OP(src1[i], src2[i], TST_16);
3913 }
3914 break;
3915 }
3916 case Neon32: {
3917 for (int i = 0; i < 4; i++) {
3918 src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFFFFFFFu : 0;
3919 }
3920 break;
3921 }
3922 default:
3923 UNREACHABLE();
3924 break;
3925 }
3926 }
3927 set_q_register(Vd, src1);
3928 } else if (instr->Bit(20) == 0 && instr->Bits(11, 8) == 0xd &&
3929 instr->Bit(4) == 0) {
3930 int Vd = instr->VFPDRegValue(kSimd128Precision);
3931 int Vm = instr->VFPMRegValue(kSimd128Precision);
3932 int Vn = instr->VFPNRegValue(kSimd128Precision);
3933 uint32_t src1[4], src2[4];
3934 get_q_register(Vn, src1);
3935 get_q_register(Vm, src2);
3936 for (int i = 0; i < 4; i++) {
3937 if (instr->Bit(21) == 0) {
3938 // vadd.f32 Qd, Qm, Qn.
3939 src1[i] = bit_cast<uint32_t>(bit_cast<float>(src1[i]) +
3940 bit_cast<float>(src2[i]));
3941 } else {
3942 // vsub.f32 Qd, Qm, Qn.
3943 src1[i] = bit_cast<uint32_t>(bit_cast<float>(src1[i]) -
3944 bit_cast<float>(src2[i]));
3945 }
3946 }
3947 set_q_register(Vd, src1);
3755 } else { 3948 } else {
3756 UNIMPLEMENTED(); 3949 UNIMPLEMENTED();
3757 } 3950 }
3758 break; 3951 break;
3759 case 5: 3952 case 5:
3760 if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) && 3953 if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) &&
3761 (instr->Bit(4) == 1)) { 3954 (instr->Bit(4) == 1)) {
3762 // vmovl signed 3955 // vmovl signed
3763 if ((instr->VdValue() & 1) != 0) UNIMPLEMENTED(); 3956 if ((instr->VdValue() & 1) != 0) UNIMPLEMENTED();
3764 int Vd = (instr->Bit(22) << 3) | (instr->VdValue() >> 1); 3957 int Vd = (instr->Bit(22) << 3) | (instr->VdValue() >> 1);
3765 int Vm = (instr->Bit(5) << 4) | instr->VmValue(); 3958 int Vm = (instr->Bit(5) << 4) | instr->VmValue();
3766 int imm3 = instr->Bits(21, 19); 3959 int imm3 = instr->Bits(21, 19);
3767 if ((imm3 != 1) && (imm3 != 2) && (imm3 != 4)) UNIMPLEMENTED(); 3960 if ((imm3 != 1) && (imm3 != 2) && (imm3 != 4)) UNIMPLEMENTED();
3768 int esize = 8 * imm3; 3961 int esize = 8 * imm3;
3769 int elements = 64 / esize; 3962 int elements = 64 / esize;
3770 int8_t from[8]; 3963 int8_t from[8];
3771 get_d_register(Vm, reinterpret_cast<uint64_t*>(from)); 3964 get_d_register(Vm, reinterpret_cast<uint64_t*>(from));
3772 int16_t to[8]; 3965 int16_t to[8];
3773 int e = 0; 3966 int e = 0;
3774 while (e < elements) { 3967 while (e < elements) {
3775 to[e] = from[e]; 3968 to[e] = from[e];
3776 e++; 3969 e++;
3777 } 3970 }
3778 set_q_register(Vd, reinterpret_cast<uint64_t*>(to)); 3971 set_q_register(Vd, reinterpret_cast<uint64_t*>(to));
3779 } else { 3972 } else {
3780 UNIMPLEMENTED(); 3973 UNIMPLEMENTED();
3781 } 3974 }
3782 break; 3975 break;
3783 case 6: 3976 case 6:
3784 if (instr->Bits(21, 20) == 0 && instr->Bits(11, 8) == 1 && 3977 if (instr->Bits(11, 8) == 8 && instr->Bit(4) == 0) {
3785 instr->Bit(4) == 1) { 3978 // vsub.size Qd, Qm, Qn.
3979 int size = static_cast<NeonSize>(instr->Bits(21, 20));
3980 int Vd = instr->VFPDRegValue(kSimd128Precision);
3981 int Vm = instr->VFPMRegValue(kSimd128Precision);
3982 int Vn = instr->VFPNRegValue(kSimd128Precision);
3983 uint32_t src1[4], src2[4];
3984 get_q_register(Vn, src1);
3985 get_q_register(Vm, src2);
3986 switch (size) {
3987 case Neon8: {
3988 uint8_t* s1 = reinterpret_cast<uint8_t*>(src1);
3989 uint8_t* s2 = reinterpret_cast<uint8_t*>(src2);
3990 for (int i = 0; i < 16; i++) {
3991 s1[i] -= s2[i];
3992 }
3993 break;
3994 }
3995 case Neon16: {
3996 for (int i = 0; i < 4; i++) {
3997 src1[i] = PAIRWISE_OP(src1[i], src2[i], SUB_16);
3998 }
3999 break;
4000 }
4001 case Neon32: {
4002 for (int i = 0; i < 4; i++) {
4003 src1[i] -= src2[i];
4004 }
4005 break;
4006 }
4007 default:
4008 UNREACHABLE();
4009 break;
4010 }
4011 set_q_register(Vd, src1);
4012 } else if (instr->Bits(11, 8) == 8 && instr->Bit(4) == 1) {
4013 // vceq.size Qd, Qm, Qn.
4014 int size = static_cast<NeonSize>(instr->Bits(21, 20));
4015 int Vd = instr->VFPDRegValue(kSimd128Precision);
4016 int Vm = instr->VFPMRegValue(kSimd128Precision);
4017 int Vn = instr->VFPNRegValue(kSimd128Precision);
4018 uint32_t src1[4], src2[4];
4019 get_q_register(Vn, src1);
4020 get_q_register(Vm, src2);
4021 switch (size) {
4022 case Neon8: {
4023 uint8_t* s1 = reinterpret_cast<uint8_t*>(src1);
4024 uint8_t* s2 = reinterpret_cast<uint8_t*>(src2);
4025 for (int i = 0; i < 16; i++) {
4026 s1[i] = s1[i] == s2[i] ? 0xFF : 0;
4027 }
4028 break;
4029 }
4030 case Neon16: {
4031 for (int i = 0; i < 4; i++) {
4032 src1[i] = PAIRWISE_OP(src1[i], src2[i], CEQ_16);
4033 }
4034 break;
4035 }
4036 case Neon32: {
4037 for (int i = 0; i < 4; i++) {
4038 src1[i] = src1[i] == src2[i] ? 0xFFFFFFFF : 0;
4039 }
4040 break;
4041 }
4042 default:
4043 UNREACHABLE();
4044 break;
4045 }
4046 set_q_register(Vd, src1);
4047 } else if (instr->Bits(21, 20) == 1 && instr->Bits(11, 8) == 1 &&
4048 instr->Bit(4) == 1) {
4049 // vbsl.size Qd, Qm, Qn.
4050 int Vd = instr->VFPDRegValue(kSimd128Precision);
4051 int Vm = instr->VFPMRegValue(kSimd128Precision);
4052 int Vn = instr->VFPNRegValue(kSimd128Precision);
4053 uint32_t dst[4], src1[4], src2[4];
4054 get_q_register(Vd, dst);
4055 get_q_register(Vn, src1);
4056 get_q_register(Vm, src2);
4057 for (int i = 0; i < 4; i++) {
4058 dst[i] = (dst[i] & src1[i]) | (~dst[i] & src2[i]);
4059 }
4060 set_q_register(Vd, dst);
4061 } else if (instr->Bits(21, 20) == 0 && instr->Bits(11, 8) == 1 &&
4062 instr->Bit(4) == 1) {
3786 if (instr->Bit(6) == 0) { 4063 if (instr->Bit(6) == 0) {
3787 // veor Dd, Dn, Dm 4064 // veor Dd, Dn, Dm
3788 int Vd = instr->VFPDRegValue(kDoublePrecision); 4065 int Vd = instr->VFPDRegValue(kDoublePrecision);
3789 int Vn = instr->VFPNRegValue(kDoublePrecision); 4066 int Vn = instr->VFPNRegValue(kDoublePrecision);
3790 int Vm = instr->VFPMRegValue(kDoublePrecision); 4067 int Vm = instr->VFPMRegValue(kDoublePrecision);
3791 uint64_t n_data, m_data; 4068 uint64_t n_data, m_data;
3792 get_d_register(Vn, &n_data); 4069 get_d_register(Vn, &n_data);
3793 get_d_register(Vm, &m_data); 4070 get_d_register(Vm, &m_data);
3794 n_data ^= m_data; 4071 n_data ^= m_data;
3795 set_d_register(Vd, &n_data); 4072 set_d_register(Vd, &n_data);
(...skipping 26 matching lines...) Expand all
3822 int elements = 64 / esize; 4099 int elements = 64 / esize;
3823 uint8_t from[8]; 4100 uint8_t from[8];
3824 get_d_register(Vm, reinterpret_cast<uint64_t*>(from)); 4101 get_d_register(Vm, reinterpret_cast<uint64_t*>(from));
3825 uint16_t to[8]; 4102 uint16_t to[8];
3826 int e = 0; 4103 int e = 0;
3827 while (e < elements) { 4104 while (e < elements) {
3828 to[e] = from[e]; 4105 to[e] = from[e];
3829 e++; 4106 e++;
3830 } 4107 }
3831 set_q_register(Vd, reinterpret_cast<uint64_t*>(to)); 4108 set_q_register(Vd, reinterpret_cast<uint64_t*>(to));
4109 } else if (instr->Opc1Value() == 7 && instr->Bits(19, 16) == 0xB &&
4110 instr->Bits(11, 9) == 0x3 && instr->Bit(6) == 1 &&
4111 instr->Bit(4) == 0) {
4112 // vcvt.<Td>.<Tm> Qd, Qm.
4113 int Vd = instr->VFPDRegValue(kSimd128Precision);
4114 int Vm = instr->VFPMRegValue(kSimd128Precision);
4115 uint32_t q_data[4];
4116 get_q_register(Vm, q_data);
4117 int op = instr->Bits(8, 7);
4118 for (int i = 0; i < 4; i++) {
4119 switch (op) {
4120 case 0:
4121 // f32 <- s32, round towards nearest.
4122 q_data[i] = bit_cast<uint32_t>(
4123 std::round(static_cast<float>(bit_cast<int32_t>(q_data[i]))));
4124 break;
4125 case 1:
4126 // f32 <- u32, round towards nearest.
4127 q_data[i] =
4128 bit_cast<uint32_t>(std::round(static_cast<float>(q_data[i])));
4129 break;
4130 case 2:
4131 // s32 <- f32, round to zero.
4132 q_data[i] = static_cast<uint32_t>(ConvertDoubleToInt(
4133 bit_cast<float>(q_data[i]), false, RZ, true));
4134 break;
4135 case 3:
4136 // u32 <- f32, round to zero.
4137 q_data[i] = static_cast<uint32_t>(ConvertDoubleToInt(
4138 bit_cast<float>(q_data[i]), true, RZ, true));
4139 break;
4140 }
4141 }
4142 set_q_register(Vd, q_data);
3832 } else if ((instr->Bits(21, 16) == 0x32) && (instr->Bits(11, 7) == 0) && 4143 } else if ((instr->Bits(21, 16) == 0x32) && (instr->Bits(11, 7) == 0) &&
3833 (instr->Bit(4) == 0)) { 4144 (instr->Bit(4) == 0)) {
3834 if (instr->Bit(6) == 0) { 4145 if (instr->Bit(6) == 0) {
3835 // vswp Dd, Dm. 4146 // vswp Dd, Dm.
3836 uint64_t dval, mval; 4147 uint64_t dval, mval;
3837 int vd = instr->VFPDRegValue(kDoublePrecision); 4148 int vd = instr->VFPDRegValue(kDoublePrecision);
3838 int vm = instr->VFPMRegValue(kDoublePrecision); 4149 int vm = instr->VFPMRegValue(kDoublePrecision);
3839 get_d_register(vd, &dval); 4150 get_d_register(vd, &dval);
3840 get_d_register(vm, &mval); 4151 get_d_register(vm, &mval);
3841 set_d_register(vm, &dval); 4152 set_d_register(vm, &dval);
3842 set_d_register(vd, &mval); 4153 set_d_register(vd, &mval);
3843 } else { 4154 } else {
3844 // vswp Qd, Qm. 4155 // vswp Qd, Qm.
3845 uint32_t dval[4], mval[4]; 4156 uint32_t dval[4], mval[4];
3846 int vd = instr->VFPDRegValue(kSimd128Precision); 4157 int vd = instr->VFPDRegValue(kSimd128Precision);
3847 int vm = instr->VFPMRegValue(kSimd128Precision); 4158 int vm = instr->VFPMRegValue(kSimd128Precision);
3848 get_q_register(vd, dval); 4159 get_q_register(vd, dval);
3849 get_q_register(vm, mval); 4160 get_q_register(vm, mval);
3850 set_q_register(vm, dval); 4161 set_q_register(vm, dval);
3851 set_q_register(vd, mval); 4162 set_q_register(vd, mval);
3852 } 4163 }
4164 } else if (instr->Opc1Value() == 0x7 && instr->Bits(11, 7) == 0x18 &&
4165 instr->Bit(4) == 0x0) {
4166 // vdup.32 Qd, Sm.
4167 int vd = instr->VFPDRegValue(kSimd128Precision);
4168 int vm = instr->VFPMRegValue(kDoublePrecision);
4169 int index = instr->Bit(19);
4170 uint32_t s_data = get_s_register(vm * 2 + index);
4171 uint32_t q_data[4];
4172 for (int i = 0; i < 4; i++) q_data[i] = s_data;
4173 set_q_register(vd, q_data);
4174 } else if (instr->Opc1Value() == 7 && instr->Bits(19, 16) == 0 &&
4175 instr->Bits(11, 6) == 0x17 && instr->Bit(4) == 0) {
4176 // vmvn Qd, Qm.
4177 int vd = instr->VFPDRegValue(kSimd128Precision);
4178 int vm = instr->VFPMRegValue(kSimd128Precision);
4179 uint32_t q_data[4];
4180 get_q_register(vm, q_data);
4181 for (int i = 0; i < 4; i++) q_data[i] = ~q_data[i];
4182 set_q_register(vd, q_data);
4183 } else if (instr->Opc1Value() == 0x7 && instr->Bits(11, 10) == 0x2 &&
4184 instr->Bit(4) == 0x0) {
4185 // vtb[l,x] Dd, <list>, Dm.
4186 int vd = instr->VFPDRegValue(kDoublePrecision);
4187 int vn = instr->VFPNRegValue(kDoublePrecision);
4188 int vm = instr->VFPMRegValue(kDoublePrecision);
4189 int table_len = (instr->Bits(9, 8) + 1) * kDoubleSize;
4190 bool vtbx = instr->Bit(6) != 0; // vtbl / vtbx
4191 uint64_t destination = 0, indices = 0, result = 0;
4192 get_d_register(vd, &destination);
4193 get_d_register(vm, &indices);
4194 for (int i = 0; i < kDoubleSize; i++) {
4195 int shift = i * kBitsPerByte;
4196 int index = (indices >> shift) & 0xFF;
4197 if (index < table_len) {
4198 uint64_t table;
4199 get_d_register(vn + index / kDoubleSize, &table);
4200 result |= ((table >> ((index % kDoubleSize) * kBitsPerByte)) & 0xFF)
4201 << shift;
4202 } else if (vtbx) {
4203 result |= destination & (0xFFull << shift);
4204 }
4205 }
4206 set_d_register(vd, &result);
3853 } else { 4207 } else {
3854 UNIMPLEMENTED(); 4208 UNIMPLEMENTED();
3855 } 4209 }
3856 break; 4210 break;
3857 case 8: 4211 case 8:
3858 if (instr->Bits(21, 20) == 0) { 4212 if (instr->Bits(21, 20) == 0) {
3859 // vst1 4213 // vst1
3860 int Vd = (instr->Bit(22) << 4) | instr->VdValue(); 4214 int Vd = (instr->Bit(22) << 4) | instr->VdValue();
3861 int Rn = instr->VnValue(); 4215 int Rn = instr->VnValue();
3862 int type = instr->Bits(11, 8); 4216 int type = instr->Bits(11, 8);
(...skipping 500 matching lines...) Expand 10 before | Expand all | Expand 10 after
4363 set_register(sp, current_sp + sizeof(uintptr_t)); 4717 set_register(sp, current_sp + sizeof(uintptr_t));
4364 return address; 4718 return address;
4365 } 4719 }
4366 4720
4367 } // namespace internal 4721 } // namespace internal
4368 } // namespace v8 4722 } // namespace v8
4369 4723
4370 #endif // USE_SIMULATOR 4724 #endif // USE_SIMULATOR
4371 4725
4372 #endif // V8_TARGET_ARCH_ARM 4726 #endif // V8_TARGET_ARCH_ARM
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698