OLD | NEW |
1 // Copyright (c) 1994-2006 Sun Microsystems Inc. | 1 // Copyright (c) 1994-2006 Sun Microsystems Inc. |
2 // All Rights Reserved. | 2 // All Rights Reserved. |
3 // | 3 // |
4 // Redistribution and use in source and binary forms, with or without | 4 // Redistribution and use in source and binary forms, with or without |
5 // modification, are permitted provided that the following conditions | 5 // modification, are permitted provided that the following conditions |
6 // are met: | 6 // are met: |
7 // | 7 // |
8 // - Redistributions of source code must retain the above copyright notice, | 8 // - Redistributions of source code must retain the above copyright notice, |
9 // this list of conditions and the following disclaimer. | 9 // this list of conditions and the following disclaimer. |
10 // | 10 // |
(...skipping 3036 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3047 // Split five bit reg_code based on size of reg_type. | 3047 // Split five bit reg_code based on size of reg_type. |
3048 // 32-bit register codes are Vm:M | 3048 // 32-bit register codes are Vm:M |
3049 // 64-bit register codes are M:Vm | 3049 // 64-bit register codes are M:Vm |
3050 // where Vm is four bits, and M is a single bit. | 3050 // where Vm is four bits, and M is a single bit. |
3051 static void SplitRegCode(VFPType reg_type, | 3051 static void SplitRegCode(VFPType reg_type, |
3052 int reg_code, | 3052 int reg_code, |
3053 int* vm, | 3053 int* vm, |
3054 int* m) { | 3054 int* m) { |
3055 DCHECK((reg_code >= 0) && (reg_code <= 31)); | 3055 DCHECK((reg_code >= 0) && (reg_code <= 31)); |
3056 if (IsIntegerVFPType(reg_type) || !IsDoubleVFPType(reg_type)) { | 3056 if (IsIntegerVFPType(reg_type) || !IsDoubleVFPType(reg_type)) { |
3057 // 32 bit type. | 3057 SwVfpRegister::split_code(reg_code, vm, m); |
3058 *m = reg_code & 0x1; | |
3059 *vm = reg_code >> 1; | |
3060 } else { | 3058 } else { |
3061 // 64 bit type. | 3059 DwVfpRegister::split_code(reg_code, vm, m); |
3062 *m = (reg_code & 0x10) >> 4; | |
3063 *vm = reg_code & 0x0F; | |
3064 } | 3060 } |
3065 } | 3061 } |
3066 | 3062 |
3067 | 3063 |
3068 // Encode vcvt.src_type.dst_type instruction. | 3064 // Encode vcvt.src_type.dst_type instruction. |
3069 static Instr EncodeVCVT(const VFPType dst_type, | 3065 static Instr EncodeVCVT(const VFPType dst_type, |
3070 const int dst_code, | 3066 const int dst_code, |
3071 const VFPType src_type, | 3067 const VFPType src_type, |
3072 const int src_code, | 3068 const int src_code, |
3073 VFPConversionMode mode, | 3069 VFPConversionMode mode, |
(...skipping 822 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3896 // Instruction details available in ARM DDI 0406C.b, A8.8.320. | 3892 // Instruction details available in ARM DDI 0406C.b, A8.8.320. |
3897 // 1111(31-28) | 01000(27-23) | D(22) | 10(21-20) | Rn(19-16) | | 3893 // 1111(31-28) | 01000(27-23) | D(22) | 10(21-20) | Rn(19-16) | |
3898 // Vd(15-12) | type(11-8) | size(7-6) | align(5-4) | Rm(3-0) | 3894 // Vd(15-12) | type(11-8) | size(7-6) | align(5-4) | Rm(3-0) |
3899 DCHECK(IsEnabled(NEON)); | 3895 DCHECK(IsEnabled(NEON)); |
3900 int vd, d; | 3896 int vd, d; |
3901 dst.base().split_code(&vd, &d); | 3897 dst.base().split_code(&vd, &d); |
3902 emit(0xFU*B28 | 4*B24 | d*B22 | 2*B20 | src.rn().code()*B16 | vd*B12 | | 3898 emit(0xFU*B28 | 4*B24 | d*B22 | 2*B20 | src.rn().code()*B16 | vd*B12 | |
3903 dst.type()*B8 | size*B6 | src.align()*B4 | src.rm().code()); | 3899 dst.type()*B8 | size*B6 | src.align()*B4 | src.rm().code()); |
3904 } | 3900 } |
3905 | 3901 |
3906 | 3902 void Assembler::vst1(NeonSize size, const NeonListOperand& src, |
3907 void Assembler::vst1(NeonSize size, | |
3908 const NeonListOperand& src, | |
3909 const NeonMemOperand& dst) { | 3903 const NeonMemOperand& dst) { |
3910 // Instruction details available in ARM DDI 0406C.b, A8.8.404. | 3904 // Instruction details available in ARM DDI 0406C.b, A8.8.404. |
3911 // 1111(31-28) | 01000(27-23) | D(22) | 00(21-20) | Rn(19-16) | | 3905 // 1111(31-28) | 01000(27-23) | D(22) | 00(21-20) | Rn(19-16) | |
3912 // Vd(15-12) | type(11-8) | size(7-6) | align(5-4) | Rm(3-0) | 3906 // Vd(15-12) | type(11-8) | size(7-6) | align(5-4) | Rm(3-0) |
3913 DCHECK(IsEnabled(NEON)); | 3907 DCHECK(IsEnabled(NEON)); |
3914 int vd, d; | 3908 int vd, d; |
3915 src.base().split_code(&vd, &d); | 3909 src.base().split_code(&vd, &d); |
3916 emit(0xFU*B28 | 4*B24 | d*B22 | dst.rn().code()*B16 | vd*B12 | src.type()*B8 | | 3910 emit(0xFU*B28 | 4*B24 | d*B22 | dst.rn().code()*B16 | vd*B12 | src.type()*B8 | |
3917 size*B6 | dst.align()*B4 | dst.rm().code()); | 3911 size*B6 | dst.align()*B4 | dst.rm().code()); |
3918 } | 3912 } |
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3977 // vmov Arm scalar to core register. | 3971 // vmov Arm scalar to core register. |
3978 DCHECK(dt == NeonS32 || dt == NeonU32 || IsEnabled(NEON)); | 3972 DCHECK(dt == NeonS32 || dt == NeonU32 || IsEnabled(NEON)); |
3979 int vn, n; | 3973 int vn, n; |
3980 src.split_code(&vn, &n); | 3974 src.split_code(&vn, &n); |
3981 int opc1_opc2 = EncodeScalar(dt, index); | 3975 int opc1_opc2 = EncodeScalar(dt, index); |
3982 int u = NeonU(dt); | 3976 int u = NeonU(dt); |
3983 emit(0xEEu * B24 | u * B23 | B20 | vn * B16 | dst.code() * B12 | 0xB * B8 | | 3977 emit(0xEEu * B24 | u * B23 | B20 | vn * B16 | dst.code() * B12 | 0xB * B8 | |
3984 n * B7 | B4 | opc1_opc2); | 3978 n * B7 | B4 | opc1_opc2); |
3985 } | 3979 } |
3986 | 3980 |
3987 void Assembler::vmov(const QwNeonRegister dst, const QwNeonRegister src) { | 3981 void Assembler::vmov(QwNeonRegister dst, QwNeonRegister src) { |
3988 // Instruction details available in ARM DDI 0406C.b, A8-938. | 3982 // Instruction details available in ARM DDI 0406C.b, A8-938. |
3989 // vmov is encoded as vorr. | 3983 // vmov is encoded as vorr. |
3990 vorr(dst, src, src); | 3984 vorr(dst, src, src); |
3991 } | 3985 } |
3992 | 3986 |
3993 void Assembler::vmvn(const QwNeonRegister dst, const QwNeonRegister src) { | 3987 void Assembler::vdup(NeonSize size, QwNeonRegister dst, Register src) { |
3994 DCHECK(IsEnabled(NEON)); | |
3995 // Instruction details available in ARM DDI 0406C.b, A8-966. | |
3996 DCHECK(VfpRegisterIsAvailable(dst)); | |
3997 DCHECK(VfpRegisterIsAvailable(src)); | |
3998 int vd, d; | |
3999 dst.split_code(&vd, &d); | |
4000 int vm, m; | |
4001 src.split_code(&vm, &m); | |
4002 emit(0x1E7U * B23 | d * B22 | 3 * B20 | vd * B12 | 0x17 * B6 | m * B5 | vm); | |
4003 } | |
4004 | |
4005 void Assembler::vswp(DwVfpRegister dst, DwVfpRegister src) { | |
4006 // Instruction details available in ARM DDI 0406C.b, A8.8.418. | |
4007 // 1111(31-28) | 00111(27-23) | D(22) | 110010(21-16) | | |
4008 // Vd(15-12) | 000000(11-6) | M(5) | 0(4) | Vm(3-0) | |
4009 DCHECK(IsEnabled(NEON)); | |
4010 int vd, d; | |
4011 dst.split_code(&vd, &d); | |
4012 int vm, m; | |
4013 src.split_code(&vm, &m); | |
4014 emit(0xFU * B28 | 7 * B23 | d * B22 | 0x32 * B16 | vd * B12 | m * B5 | vm); | |
4015 } | |
4016 | |
4017 void Assembler::vswp(QwNeonRegister dst, QwNeonRegister src) { | |
4018 // Instruction details available in ARM DDI 0406C.b, A8.8.418. | |
4019 // 1111(31-28) | 00111(27-23) | D(22) | 110010(21-16) | | |
4020 // Vd(15-12) | 000000(11-6) | M(5) | 0(4) | Vm(3-0) | |
4021 DCHECK(IsEnabled(NEON)); | |
4022 int vd, d; | |
4023 dst.split_code(&vd, &d); | |
4024 int vm, m; | |
4025 src.split_code(&vm, &m); | |
4026 emit(0xFU * B28 | 7 * B23 | d * B22 | 0x32 * B16 | vd * B12 | B6 | m * B5 | | |
4027 vm); | |
4028 } | |
4029 | |
4030 void Assembler::vdup(NeonSize size, const QwNeonRegister dst, | |
4031 const Register src) { | |
4032 DCHECK(IsEnabled(NEON)); | 3988 DCHECK(IsEnabled(NEON)); |
4033 // Instruction details available in ARM DDI 0406C.b, A8-886. | 3989 // Instruction details available in ARM DDI 0406C.b, A8-886. |
4034 int B = 0, E = 0; | 3990 int B = 0, E = 0; |
4035 switch (size) { | 3991 switch (size) { |
4036 case Neon8: | 3992 case Neon8: |
4037 B = 1; | 3993 B = 1; |
4038 break; | 3994 break; |
4039 case Neon16: | 3995 case Neon16: |
4040 E = 1; | 3996 E = 1; |
4041 break; | 3997 break; |
4042 case Neon32: | 3998 case Neon32: |
4043 break; | 3999 break; |
4044 default: | 4000 default: |
4045 UNREACHABLE(); | 4001 UNREACHABLE(); |
4046 break; | 4002 break; |
4047 } | 4003 } |
4048 int vd, d; | 4004 int vd, d; |
4049 dst.split_code(&vd, &d); | 4005 dst.split_code(&vd, &d); |
4050 | 4006 |
4051 emit(al | 0x1D * B23 | B * B22 | B21 | vd * B16 | src.code() * B12 | | 4007 emit(al | 0x1D * B23 | B * B22 | B21 | vd * B16 | src.code() * B12 | |
4052 0xB * B8 | d * B7 | E * B5 | B4); | 4008 0xB * B8 | d * B7 | E * B5 | B4); |
4053 } | 4009 } |
4054 | 4010 |
4055 void Assembler::vdup(const QwNeonRegister dst, const SwVfpRegister src) { | 4011 void Assembler::vdup(QwNeonRegister dst, SwVfpRegister src) { |
4056 DCHECK(IsEnabled(NEON)); | 4012 DCHECK(IsEnabled(NEON)); |
4057 // Instruction details available in ARM DDI 0406C.b, A8-884. | 4013 // Instruction details available in ARM DDI 0406C.b, A8-884. |
4058 int index = src.code() & 1; | 4014 int index = src.code() & 1; |
4059 int d_reg = src.code() / 2; | 4015 int d_reg = src.code() / 2; |
4060 int imm4 = 4 | index << 3; // esize = 32, index in bit 3. | 4016 int imm4 = 4 | index << 3; // esize = 32, index in bit 3. |
4061 int vd, d; | 4017 int vd, d; |
4062 dst.split_code(&vd, &d); | 4018 dst.split_code(&vd, &d); |
4063 int vm, m; | 4019 int vm, m; |
4064 DwVfpRegister::from_code(d_reg).split_code(&vm, &m); | 4020 DwVfpRegister::from_code(d_reg).split_code(&vm, &m); |
4065 | 4021 |
4066 emit(0x1E7U * B23 | d * B22 | 0x3 * B20 | imm4 * B16 | vd * B12 | 0x18 * B7 | | 4022 emit(0x1E7U * B23 | d * B22 | 0x3 * B20 | imm4 * B16 | vd * B12 | 0x18 * B7 | |
4067 B6 | m * B5 | vm); | 4023 B6 | m * B5 | vm); |
4068 } | 4024 } |
4069 | 4025 |
4070 // Encode NEON vcvt.src_type.dst_type instruction. | 4026 // Encode NEON vcvt.src_type.dst_type instruction. |
4071 static Instr EncodeNeonVCVT(const VFPType dst_type, const QwNeonRegister dst, | 4027 static Instr EncodeNeonVCVT(VFPType dst_type, QwNeonRegister dst, |
4072 const VFPType src_type, const QwNeonRegister src) { | 4028 VFPType src_type, QwNeonRegister src) { |
4073 DCHECK(src_type != dst_type); | 4029 DCHECK(src_type != dst_type); |
4074 DCHECK(src_type == F32 || dst_type == F32); | 4030 DCHECK(src_type == F32 || dst_type == F32); |
4075 // Instruction details available in ARM DDI 0406C.b, A8.8.868. | 4031 // Instruction details available in ARM DDI 0406C.b, A8.8.868. |
4076 int vd, d; | 4032 int vd, d; |
4077 dst.split_code(&vd, &d); | 4033 dst.split_code(&vd, &d); |
4078 int vm, m; | 4034 int vm, m; |
4079 src.split_code(&vm, &m); | 4035 src.split_code(&vm, &m); |
4080 | 4036 |
4081 int op = 0; | 4037 int op = 0; |
4082 if (src_type == F32) { | 4038 if (src_type == F32) { |
4083 DCHECK(dst_type == S32 || dst_type == U32); | 4039 DCHECK(dst_type == S32 || dst_type == U32); |
4084 op = dst_type == U32 ? 3 : 2; | 4040 op = dst_type == U32 ? 3 : 2; |
4085 } else { | 4041 } else { |
4086 DCHECK(src_type == S32 || src_type == U32); | 4042 DCHECK(src_type == S32 || src_type == U32); |
4087 op = src_type == U32 ? 1 : 0; | 4043 op = src_type == U32 ? 1 : 0; |
4088 } | 4044 } |
4089 | 4045 |
4090 return 0x1E7U * B23 | d * B22 | 0x3B * B16 | vd * B12 | 0x3 * B9 | op * B7 | | 4046 return 0x1E7U * B23 | d * B22 | 0x3B * B16 | vd * B12 | 0x3 * B9 | op * B7 | |
4091 B6 | m * B5 | vm; | 4047 B6 | m * B5 | vm; |
4092 } | 4048 } |
4093 | 4049 |
4094 void Assembler::vcvt_f32_s32(const QwNeonRegister dst, | 4050 void Assembler::vcvt_f32_s32(QwNeonRegister dst, QwNeonRegister src) { |
4095 const QwNeonRegister src) { | |
4096 DCHECK(IsEnabled(NEON)); | 4051 DCHECK(IsEnabled(NEON)); |
4097 DCHECK(VfpRegisterIsAvailable(dst)); | 4052 DCHECK(VfpRegisterIsAvailable(dst)); |
4098 DCHECK(VfpRegisterIsAvailable(src)); | 4053 DCHECK(VfpRegisterIsAvailable(src)); |
4099 emit(EncodeNeonVCVT(F32, dst, S32, src)); | 4054 emit(EncodeNeonVCVT(F32, dst, S32, src)); |
4100 } | 4055 } |
4101 | 4056 |
4102 void Assembler::vcvt_f32_u32(const QwNeonRegister dst, | 4057 void Assembler::vcvt_f32_u32(QwNeonRegister dst, QwNeonRegister src) { |
4103 const QwNeonRegister src) { | |
4104 DCHECK(IsEnabled(NEON)); | 4058 DCHECK(IsEnabled(NEON)); |
4105 DCHECK(VfpRegisterIsAvailable(dst)); | 4059 DCHECK(VfpRegisterIsAvailable(dst)); |
4106 DCHECK(VfpRegisterIsAvailable(src)); | 4060 DCHECK(VfpRegisterIsAvailable(src)); |
4107 emit(EncodeNeonVCVT(F32, dst, U32, src)); | 4061 emit(EncodeNeonVCVT(F32, dst, U32, src)); |
4108 } | 4062 } |
4109 | 4063 |
4110 void Assembler::vcvt_s32_f32(const QwNeonRegister dst, | 4064 void Assembler::vcvt_s32_f32(QwNeonRegister dst, QwNeonRegister src) { |
4111 const QwNeonRegister src) { | |
4112 DCHECK(IsEnabled(NEON)); | 4065 DCHECK(IsEnabled(NEON)); |
4113 DCHECK(VfpRegisterIsAvailable(dst)); | 4066 DCHECK(VfpRegisterIsAvailable(dst)); |
4114 DCHECK(VfpRegisterIsAvailable(src)); | 4067 DCHECK(VfpRegisterIsAvailable(src)); |
4115 emit(EncodeNeonVCVT(S32, dst, F32, src)); | 4068 emit(EncodeNeonVCVT(S32, dst, F32, src)); |
4116 } | 4069 } |
4117 | 4070 |
4118 void Assembler::vcvt_u32_f32(const QwNeonRegister dst, | 4071 void Assembler::vcvt_u32_f32(QwNeonRegister dst, QwNeonRegister src) { |
4119 const QwNeonRegister src) { | |
4120 DCHECK(IsEnabled(NEON)); | 4072 DCHECK(IsEnabled(NEON)); |
4121 DCHECK(VfpRegisterIsAvailable(dst)); | 4073 DCHECK(VfpRegisterIsAvailable(dst)); |
4122 DCHECK(VfpRegisterIsAvailable(src)); | 4074 DCHECK(VfpRegisterIsAvailable(src)); |
4123 emit(EncodeNeonVCVT(U32, dst, F32, src)); | 4075 emit(EncodeNeonVCVT(U32, dst, F32, src)); |
4124 } | 4076 } |
4125 | 4077 |
4126 enum UnaryOp { VABS, VABSF, VNEG, VNEGF }; | 4078 enum NeonRegType { NEON_D, NEON_Q }; |
4127 | 4079 |
4128 static Instr EncodeNeonUnaryOp(UnaryOp op, NeonSize size, QwNeonRegister dst, | 4080 enum UnaryOp { VMVN, VSWP, VABS, VABSF, VNEG, VNEGF }; |
4129 QwNeonRegister src) { | 4081 |
| 4082 static Instr EncodeNeonUnaryOp(UnaryOp op, NeonRegType reg_type, NeonSize size, |
| 4083 int dst_code, int src_code) { |
4130 int op_encoding = 0; | 4084 int op_encoding = 0; |
4131 switch (op) { | 4085 switch (op) { |
| 4086 case VMVN: |
| 4087 DCHECK_EQ(Neon8, size); // size == 0 for vmvn |
| 4088 op_encoding = B10 | 0x3 * B7; |
| 4089 break; |
| 4090 case VSWP: |
| 4091 DCHECK_EQ(Neon8, size); // size == 0 for vswp |
| 4092 op_encoding = B17; |
| 4093 break; |
4132 case VABS: | 4094 case VABS: |
4133 op_encoding = 0x6 * B7; | 4095 op_encoding = B16 | 0x6 * B7; |
4134 break; | 4096 break; |
4135 case VABSF: | 4097 case VABSF: |
4136 DCHECK_EQ(Neon32, size); | 4098 DCHECK_EQ(Neon32, size); |
4137 op_encoding = 0x6 * B7 | B10; | 4099 op_encoding = B16 | B10 | 0x6 * B7; |
4138 break; | 4100 break; |
4139 case VNEG: | 4101 case VNEG: |
4140 op_encoding = 0x7 * B7; | 4102 op_encoding = B16 | 0x7 * B7; |
4141 break; | 4103 break; |
4142 case VNEGF: | 4104 case VNEGF: |
4143 DCHECK_EQ(Neon32, size); | 4105 DCHECK_EQ(Neon32, size); |
4144 op_encoding = 0x7 * B7 | B10; | 4106 op_encoding = B16 | B10 | 0x7 * B7; |
4145 break; | 4107 break; |
4146 default: | 4108 default: |
4147 UNREACHABLE(); | 4109 UNREACHABLE(); |
4148 break; | 4110 break; |
4149 } | 4111 } |
4150 int vd, d; | 4112 int vd, d, vm, m; |
4151 dst.split_code(&vd, &d); | 4113 if (reg_type == NEON_Q) { |
4152 int vm, m; | 4114 op_encoding |= B6; |
4153 src.split_code(&vm, &m); | 4115 QwNeonRegister::split_code(dst_code, &vd, &d); |
4154 return 0x1E7U * B23 | d * B22 | 0x3 * B20 | size * B18 | B16 | vd * B12 | B6 | | 4116 QwNeonRegister::split_code(src_code, &vm, &m); |
4155 m * B5 | vm | op_encoding; | 4117 } else { |
| 4118 DCHECK_EQ(reg_type, NEON_D); |
| 4119 DwVfpRegister::split_code(dst_code, &vd, &d); |
| 4120 DwVfpRegister::split_code(src_code, &vm, &m); |
| 4121 } |
| 4122 return 0x1E7U * B23 | d * B22 | 0x3 * B20 | size * B18 | vd * B12 | m * B5 | |
| 4123 vm | op_encoding; |
4156 } | 4124 } |
4157 | 4125 |
4158 void Assembler::vabs(const QwNeonRegister dst, const QwNeonRegister src) { | 4126 void Assembler::vmvn(QwNeonRegister dst, QwNeonRegister src) { |
| 4127 // Qd = vmvn(Qn, Qm) SIMD bitwise negate. |
| 4128 // Instruction details available in ARM DDI 0406C.b, A8-966. |
| 4129 DCHECK(IsEnabled(NEON)); |
| 4130 emit(EncodeNeonUnaryOp(VMVN, NEON_Q, Neon8, dst.code(), src.code())); |
| 4131 } |
| 4132 |
| 4133 void Assembler::vswp(DwVfpRegister dst, DwVfpRegister src) { |
| 4134 DCHECK(IsEnabled(NEON)); |
| 4135 // Dd = vswp(Dn, Dm) SIMD d-register swap. |
| 4136 // Instruction details available in ARM DDI 0406C.b, A8.8.418. |
| 4137 DCHECK(IsEnabled(NEON)); |
| 4138 emit(EncodeNeonUnaryOp(VSWP, NEON_D, Neon8, dst.code(), src.code())); |
| 4139 } |
| 4140 |
| 4141 void Assembler::vswp(QwNeonRegister dst, QwNeonRegister src) { |
| 4142 // Qd = vswp(Qn, Qm) SIMD q-register swap. |
| 4143 // Instruction details available in ARM DDI 0406C.b, A8.8.418. |
| 4144 DCHECK(IsEnabled(NEON)); |
| 4145 emit(EncodeNeonUnaryOp(VSWP, NEON_Q, Neon8, dst.code(), src.code())); |
| 4146 } |
| 4147 |
| 4148 void Assembler::vabs(QwNeonRegister dst, QwNeonRegister src) { |
4159 // Qd = vabs.f<size>(Qn, Qm) SIMD floating point absolute value. | 4149 // Qd = vabs.f<size>(Qn, Qm) SIMD floating point absolute value. |
4160 // Instruction details available in ARM DDI 0406C.b, A8.8.824. | 4150 // Instruction details available in ARM DDI 0406C.b, A8.8.824. |
4161 DCHECK(IsEnabled(NEON)); | 4151 DCHECK(IsEnabled(NEON)); |
4162 emit(EncodeNeonUnaryOp(VABSF, Neon32, dst, src)); | 4152 emit(EncodeNeonUnaryOp(VABSF, NEON_Q, Neon32, dst.code(), src.code())); |
4163 } | 4153 } |
4164 | 4154 |
4165 void Assembler::vabs(NeonSize size, const QwNeonRegister dst, | 4155 void Assembler::vabs(NeonSize size, QwNeonRegister dst, QwNeonRegister src) { |
4166 const QwNeonRegister src) { | |
4167 // Qd = vabs.s<size>(Qn, Qm) SIMD integer absolute value. | 4156 // Qd = vabs.s<size>(Qn, Qm) SIMD integer absolute value. |
4168 // Instruction details available in ARM DDI 0406C.b, A8.8.824. | 4157 // Instruction details available in ARM DDI 0406C.b, A8.8.824. |
4169 DCHECK(IsEnabled(NEON)); | 4158 DCHECK(IsEnabled(NEON)); |
4170 emit(EncodeNeonUnaryOp(VABS, size, dst, src)); | 4159 emit(EncodeNeonUnaryOp(VABS, NEON_Q, size, dst.code(), src.code())); |
4171 } | 4160 } |
4172 | 4161 |
4173 void Assembler::vneg(const QwNeonRegister dst, const QwNeonRegister src) { | 4162 void Assembler::vneg(QwNeonRegister dst, QwNeonRegister src) { |
4174 // Qd = vabs.f<size>(Qn, Qm) SIMD floating point negate. | 4163 // Qd = vabs.f<size>(Qn, Qm) SIMD floating point negate. |
4175 // Instruction details available in ARM DDI 0406C.b, A8.8.968. | 4164 // Instruction details available in ARM DDI 0406C.b, A8.8.968. |
4176 DCHECK(IsEnabled(NEON)); | 4165 DCHECK(IsEnabled(NEON)); |
4177 emit(EncodeNeonUnaryOp(VNEGF, Neon32, dst, src)); | 4166 emit(EncodeNeonUnaryOp(VNEGF, NEON_Q, Neon32, dst.code(), src.code())); |
4178 } | 4167 } |
4179 | 4168 |
4180 void Assembler::vneg(NeonSize size, const QwNeonRegister dst, | 4169 void Assembler::vneg(NeonSize size, QwNeonRegister dst, QwNeonRegister src) { |
4181 const QwNeonRegister src) { | |
4182 // Qd = vabs.s<size>(Qn, Qm) SIMD integer negate. | 4170 // Qd = vabs.s<size>(Qn, Qm) SIMD integer negate. |
4183 // Instruction details available in ARM DDI 0406C.b, A8.8.968. | 4171 // Instruction details available in ARM DDI 0406C.b, A8.8.968. |
4184 DCHECK(IsEnabled(NEON)); | 4172 DCHECK(IsEnabled(NEON)); |
4185 emit(EncodeNeonUnaryOp(VNEG, size, dst, src)); | 4173 emit(EncodeNeonUnaryOp(VNEG, NEON_Q, size, dst.code(), src.code())); |
4186 } | |
4187 | |
4188 void Assembler::veor(DwVfpRegister dst, DwVfpRegister src1, | |
4189 DwVfpRegister src2) { | |
4190 // Dd = veor(Dn, Dm) 64 bit integer exclusive OR. | |
4191 // Instruction details available in ARM DDI 0406C.b, A8.8.888. | |
4192 DCHECK(IsEnabled(NEON)); | |
4193 int vd, d; | |
4194 dst.split_code(&vd, &d); | |
4195 int vn, n; | |
4196 src1.split_code(&vn, &n); | |
4197 int vm, m; | |
4198 src2.split_code(&vm, &m); | |
4199 emit(0x1E6U * B23 | d * B22 | vn * B16 | vd * B12 | B8 | n * B7 | m * B5 | | |
4200 B4 | vm); | |
4201 } | 4174 } |
4202 | 4175 |
4203 enum BinaryBitwiseOp { VAND, VBIC, VBIF, VBIT, VBSL, VEOR, VORR, VORN }; | 4176 enum BinaryBitwiseOp { VAND, VBIC, VBIF, VBIT, VBSL, VEOR, VORR, VORN }; |
4204 | 4177 |
4205 static Instr EncodeNeonBinaryBitwiseOp(BinaryBitwiseOp op, QwNeonRegister dst, | 4178 static Instr EncodeNeonBinaryBitwiseOp(BinaryBitwiseOp op, NeonRegType reg_type, |
4206 QwNeonRegister src1, | 4179 int dst_code, int src_code1, |
4207 QwNeonRegister src2) { | 4180 int src_code2) { |
4208 int op_encoding = 0; | 4181 int op_encoding = 0; |
4209 switch (op) { | 4182 switch (op) { |
4210 case VBIC: | 4183 case VBIC: |
4211 op_encoding = 0x1 * B20; | 4184 op_encoding = 0x1 * B20; |
4212 break; | 4185 break; |
4213 case VBIF: | 4186 case VBIF: |
4214 op_encoding = B24 | 0x3 * B20; | 4187 op_encoding = B24 | 0x3 * B20; |
4215 break; | 4188 break; |
4216 case VBIT: | 4189 case VBIT: |
4217 op_encoding = B24 | 0x2 * B20; | 4190 op_encoding = B24 | 0x2 * B20; |
(...skipping 10 matching lines...) Expand all Loading... |
4228 case VORN: | 4201 case VORN: |
4229 op_encoding = 0x3 * B20; | 4202 op_encoding = 0x3 * B20; |
4230 break; | 4203 break; |
4231 case VAND: | 4204 case VAND: |
4232 // op_encoding is 0. | 4205 // op_encoding is 0. |
4233 break; | 4206 break; |
4234 default: | 4207 default: |
4235 UNREACHABLE(); | 4208 UNREACHABLE(); |
4236 break; | 4209 break; |
4237 } | 4210 } |
4238 int vd, d; | 4211 int vd, d, vn, n, vm, m; |
4239 dst.split_code(&vd, &d); | 4212 if (reg_type == NEON_Q) { |
4240 int vn, n; | 4213 op_encoding |= B6; |
4241 src1.split_code(&vn, &n); | 4214 QwNeonRegister::split_code(dst_code, &vd, &d); |
4242 int vm, m; | 4215 QwNeonRegister::split_code(src_code1, &vn, &n); |
4243 src2.split_code(&vm, &m); | 4216 QwNeonRegister::split_code(src_code2, &vm, &m); |
| 4217 } else { |
| 4218 DCHECK_EQ(reg_type, NEON_D); |
| 4219 DwVfpRegister::split_code(dst_code, &vd, &d); |
| 4220 DwVfpRegister::split_code(src_code1, &vn, &n); |
| 4221 DwVfpRegister::split_code(src_code2, &vm, &m); |
| 4222 } |
4244 return 0x1E4U * B23 | op_encoding | d * B22 | vn * B16 | vd * B12 | B8 | | 4223 return 0x1E4U * B23 | op_encoding | d * B22 | vn * B16 | vd * B12 | B8 | |
4245 n * B7 | B6 | m * B5 | B4 | vm; | 4224 n * B7 | m * B5 | B4 | vm; |
4246 } | 4225 } |
4247 | 4226 |
4248 void Assembler::vand(QwNeonRegister dst, QwNeonRegister src1, | 4227 void Assembler::vand(QwNeonRegister dst, QwNeonRegister src1, |
4249 QwNeonRegister src2) { | 4228 QwNeonRegister src2) { |
4250 // Qd = vand(Qn, Qm) SIMD AND. | 4229 // Qd = vand(Qn, Qm) SIMD AND. |
4251 // Instruction details available in ARM DDI 0406C.b, A8.8.836. | 4230 // Instruction details available in ARM DDI 0406C.b, A8.8.836. |
4252 DCHECK(IsEnabled(NEON)); | 4231 DCHECK(IsEnabled(NEON)); |
4253 emit(EncodeNeonBinaryBitwiseOp(VAND, dst, src1, src2)); | 4232 emit(EncodeNeonBinaryBitwiseOp(VAND, NEON_Q, dst.code(), src1.code(), |
| 4233 src2.code())); |
4254 } | 4234 } |
4255 | 4235 |
4256 void Assembler::vbsl(QwNeonRegister dst, const QwNeonRegister src1, | 4236 void Assembler::vbsl(QwNeonRegister dst, QwNeonRegister src1, |
4257 const QwNeonRegister src2) { | 4237 QwNeonRegister src2) { |
4258 DCHECK(IsEnabled(NEON)); | |
4259 // Qd = vbsl(Qn, Qm) SIMD bitwise select. | 4238 // Qd = vbsl(Qn, Qm) SIMD bitwise select. |
4260 // Instruction details available in ARM DDI 0406C.b, A8-844. | 4239 // Instruction details available in ARM DDI 0406C.b, A8-844. |
4261 emit(EncodeNeonBinaryBitwiseOp(VBSL, dst, src1, src2)); | 4240 DCHECK(IsEnabled(NEON)); |
| 4241 emit(EncodeNeonBinaryBitwiseOp(VBSL, NEON_Q, dst.code(), src1.code(), |
| 4242 src2.code())); |
| 4243 } |
| 4244 |
| 4245 void Assembler::veor(DwVfpRegister dst, DwVfpRegister src1, |
| 4246 DwVfpRegister src2) { |
| 4247 // Dd = veor(Dn, Dm) SIMD exclusive OR. |
| 4248 // Instruction details available in ARM DDI 0406C.b, A8.8.888. |
| 4249 DCHECK(IsEnabled(NEON)); |
| 4250 emit(EncodeNeonBinaryBitwiseOp(VEOR, NEON_D, dst.code(), src1.code(), |
| 4251 src2.code())); |
4262 } | 4252 } |
4263 | 4253 |
4264 void Assembler::veor(QwNeonRegister dst, QwNeonRegister src1, | 4254 void Assembler::veor(QwNeonRegister dst, QwNeonRegister src1, |
4265 QwNeonRegister src2) { | 4255 QwNeonRegister src2) { |
4266 // Qd = veor(Qn, Qm) SIMD exclusive OR. | 4256 // Qd = veor(Qn, Qm) SIMD exclusive OR. |
4267 // Instruction details available in ARM DDI 0406C.b, A8.8.888. | 4257 // Instruction details available in ARM DDI 0406C.b, A8.8.888. |
4268 DCHECK(IsEnabled(NEON)); | 4258 DCHECK(IsEnabled(NEON)); |
4269 emit(EncodeNeonBinaryBitwiseOp(VEOR, dst, src1, src2)); | 4259 emit(EncodeNeonBinaryBitwiseOp(VEOR, NEON_Q, dst.code(), src1.code(), |
| 4260 src2.code())); |
4270 } | 4261 } |
4271 | 4262 |
4272 void Assembler::vorr(QwNeonRegister dst, QwNeonRegister src1, | 4263 void Assembler::vorr(QwNeonRegister dst, QwNeonRegister src1, |
4273 QwNeonRegister src2) { | 4264 QwNeonRegister src2) { |
4274 // Qd = vorr(Qn, Qm) SIMD OR. | 4265 // Qd = vorr(Qn, Qm) SIMD OR. |
4275 // Instruction details available in ARM DDI 0406C.b, A8.8.976. | 4266 // Instruction details available in ARM DDI 0406C.b, A8.8.976. |
4276 DCHECK(IsEnabled(NEON)); | 4267 DCHECK(IsEnabled(NEON)); |
4277 emit(EncodeNeonBinaryBitwiseOp(VORR, dst, src1, src2)); | 4268 emit(EncodeNeonBinaryBitwiseOp(VORR, NEON_Q, dst.code(), src1.code(), |
| 4269 src2.code())); |
4278 } | 4270 } |
4279 | 4271 |
4280 enum FPBinOp { | 4272 enum FPBinOp { |
4281 VADDF, | 4273 VADDF, |
4282 VSUBF, | 4274 VSUBF, |
4283 VMULF, | 4275 VMULF, |
4284 VMINF, | 4276 VMINF, |
4285 VMAXF, | 4277 VMAXF, |
4286 VRECPS, | 4278 VRECPS, |
4287 VRSQRTS, | 4279 VRSQRTS, |
(...skipping 174 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4462 } | 4454 } |
4463 | 4455 |
4464 void Assembler::vmul(QwNeonRegister dst, QwNeonRegister src1, | 4456 void Assembler::vmul(QwNeonRegister dst, QwNeonRegister src1, |
4465 QwNeonRegister src2) { | 4457 QwNeonRegister src2) { |
4466 DCHECK(IsEnabled(NEON)); | 4458 DCHECK(IsEnabled(NEON)); |
4467 // Qd = vadd(Qn, Qm) SIMD floating point multiply. | 4459 // Qd = vadd(Qn, Qm) SIMD floating point multiply. |
4468 // Instruction details available in ARM DDI 0406C.b, A8-958. | 4460 // Instruction details available in ARM DDI 0406C.b, A8-958. |
4469 emit(EncodeNeonBinOp(VMULF, dst, src1, src2)); | 4461 emit(EncodeNeonBinOp(VMULF, dst, src1, src2)); |
4470 } | 4462 } |
4471 | 4463 |
4472 void Assembler::vmul(NeonSize size, QwNeonRegister dst, | 4464 void Assembler::vmul(NeonSize size, QwNeonRegister dst, QwNeonRegister src1, |
4473 const QwNeonRegister src1, const QwNeonRegister src2) { | 4465 QwNeonRegister src2) { |
4474 DCHECK(IsEnabled(NEON)); | 4466 DCHECK(IsEnabled(NEON)); |
4475 // Qd = vadd(Qn, Qm) SIMD integer multiply. | 4467 // Qd = vadd(Qn, Qm) SIMD integer multiply. |
4476 // Instruction details available in ARM DDI 0406C.b, A8-960. | 4468 // Instruction details available in ARM DDI 0406C.b, A8-960. |
4477 emit(EncodeNeonBinOp(VMUL, size, dst, src1, src2)); | 4469 emit(EncodeNeonBinOp(VMUL, size, dst, src1, src2)); |
4478 } | 4470 } |
4479 | 4471 |
4480 void Assembler::vmin(const QwNeonRegister dst, const QwNeonRegister src1, | 4472 void Assembler::vmin(QwNeonRegister dst, QwNeonRegister src1, |
4481 const QwNeonRegister src2) { | 4473 QwNeonRegister src2) { |
4482 DCHECK(IsEnabled(NEON)); | 4474 DCHECK(IsEnabled(NEON)); |
4483 // Qd = vmin(Qn, Qm) SIMD floating point MIN. | 4475 // Qd = vmin(Qn, Qm) SIMD floating point MIN. |
4484 // Instruction details available in ARM DDI 0406C.b, A8-928. | 4476 // Instruction details available in ARM DDI 0406C.b, A8-928. |
4485 emit(EncodeNeonBinOp(VMINF, dst, src1, src2)); | 4477 emit(EncodeNeonBinOp(VMINF, dst, src1, src2)); |
4486 } | 4478 } |
4487 | 4479 |
4488 void Assembler::vmin(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1, | 4480 void Assembler::vmin(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1, |
4489 QwNeonRegister src2) { | 4481 QwNeonRegister src2) { |
4490 DCHECK(IsEnabled(NEON)); | 4482 DCHECK(IsEnabled(NEON)); |
4491 // Qd = vmin(Qn, Qm) SIMD integer MIN. | 4483 // Qd = vmin(Qn, Qm) SIMD integer MIN. |
(...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4585 } | 4577 } |
4586 | 4578 |
4587 void Assembler::vrsqrts(QwNeonRegister dst, QwNeonRegister src1, | 4579 void Assembler::vrsqrts(QwNeonRegister dst, QwNeonRegister src1, |
4588 QwNeonRegister src2) { | 4580 QwNeonRegister src2) { |
4589 DCHECK(IsEnabled(NEON)); | 4581 DCHECK(IsEnabled(NEON)); |
4590 // Qd = vrsqrts(Qn, Qm) SIMD reciprocal square root refinement step. | 4582 // Qd = vrsqrts(Qn, Qm) SIMD reciprocal square root refinement step. |
4591 // Instruction details available in ARM DDI 0406C.b, A8-1040. | 4583 // Instruction details available in ARM DDI 0406C.b, A8-1040. |
4592 emit(EncodeNeonBinOp(VRSQRTS, dst, src1, src2)); | 4584 emit(EncodeNeonBinOp(VRSQRTS, dst, src1, src2)); |
4593 } | 4585 } |
4594 | 4586 |
4595 enum PairwiseOp { VPMIN, VPMAX }; | 4587 enum NeonPairwiseOp { VPMIN, VPMAX }; |
4596 | 4588 |
4597 static Instr EncodeNeonPairwiseOp(PairwiseOp op, NeonDataType dt, | 4589 static Instr EncodeNeonPairwiseOp(NeonPairwiseOp op, NeonDataType dt, |
4598 DwVfpRegister dst, DwVfpRegister src1, | 4590 DwVfpRegister dst, DwVfpRegister src1, |
4599 DwVfpRegister src2) { | 4591 DwVfpRegister src2) { |
4600 int op_encoding = 0; | 4592 int op_encoding = 0; |
4601 switch (op) { | 4593 switch (op) { |
4602 case VPMIN: | 4594 case VPMIN: |
4603 op_encoding = 0xA * B8 | B4; | 4595 op_encoding = 0xA * B8 | B4; |
4604 break; | 4596 break; |
4605 case VPMAX: | 4597 case VPMAX: |
4606 op_encoding = 0xA * B8; | 4598 op_encoding = 0xA * B8; |
4607 break; | 4599 break; |
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4686 } | 4678 } |
4687 | 4679 |
4688 void Assembler::vcgt(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1, | 4680 void Assembler::vcgt(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1, |
4689 QwNeonRegister src2) { | 4681 QwNeonRegister src2) { |
4690 DCHECK(IsEnabled(NEON)); | 4682 DCHECK(IsEnabled(NEON)); |
4691 // Qd = vcgt(Qn, Qm) SIMD integer compare greater than. | 4683 // Qd = vcgt(Qn, Qm) SIMD integer compare greater than. |
4692 // Instruction details available in ARM DDI 0406C.b, A8-852. | 4684 // Instruction details available in ARM DDI 0406C.b, A8-852. |
4693 emit(EncodeNeonBinOp(VCGT, dt, dst, src1, src2)); | 4685 emit(EncodeNeonBinOp(VCGT, dt, dst, src1, src2)); |
4694 } | 4686 } |
4695 | 4687 |
4696 void Assembler::vext(QwNeonRegister dst, const QwNeonRegister src1, | 4688 void Assembler::vext(QwNeonRegister dst, QwNeonRegister src1, |
4697 const QwNeonRegister src2, int bytes) { | 4689 QwNeonRegister src2, int bytes) { |
4698 DCHECK(IsEnabled(NEON)); | 4690 DCHECK(IsEnabled(NEON)); |
4699 // Qd = vext(Qn, Qm) SIMD byte extract. | 4691 // Qd = vext(Qn, Qm) SIMD byte extract. |
4700 // Instruction details available in ARM DDI 0406C.b, A8-890. | 4692 // Instruction details available in ARM DDI 0406C.b, A8-890. |
4701 int vd, d; | 4693 int vd, d; |
4702 dst.split_code(&vd, &d); | 4694 dst.split_code(&vd, &d); |
4703 int vn, n; | 4695 int vn, n; |
4704 src1.split_code(&vn, &n); | 4696 src1.split_code(&vn, &n); |
4705 int vm, m; | 4697 int vm, m; |
4706 src2.split_code(&vm, &m); | 4698 src2.split_code(&vm, &m); |
4707 DCHECK_GT(16, bytes); | 4699 DCHECK_GT(16, bytes); |
4708 emit(0x1E5U * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 | bytes * B8 | | 4700 emit(0x1E5U * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 | bytes * B8 | |
4709 n * B7 | B6 | m * B5 | vm); | 4701 n * B7 | B6 | m * B5 | vm); |
4710 } | 4702 } |
4711 | 4703 |
4712 void Assembler::vzip(NeonSize size, QwNeonRegister dst, | 4704 enum NeonSizedOp { VZIP, VUZP, VREV16, VREV32, VREV64, VTRN }; |
4713 const QwNeonRegister src) { | 4705 |
4714 DCHECK(IsEnabled(NEON)); | 4706 static Instr EncodeNeonSizedOp(NeonSizedOp op, NeonSize size, |
4715 // Qd = vzip.<size>(Qn, Qm) SIMD zip (interleave). | 4707 QwNeonRegister dst, QwNeonRegister src) { |
4716 // Instruction details available in ARM DDI 0406C.b, A8-1102. | 4708 int op_encoding = 0; |
| 4709 switch (op) { |
| 4710 case VZIP: |
| 4711 op_encoding = 0x2 * B16 | 0x3 * B7; |
| 4712 break; |
| 4713 case VUZP: |
| 4714 op_encoding = 0x2 * B16 | 0x2 * B7; |
| 4715 break; |
| 4716 case VREV16: |
| 4717 op_encoding = 0x2 * B7; |
| 4718 break; |
| 4719 case VREV32: |
| 4720 op_encoding = 0x1 * B7; |
| 4721 break; |
| 4722 case VREV64: |
| 4723 // op_encoding is 0; |
| 4724 break; |
| 4725 case VTRN: |
| 4726 op_encoding = 0x2 * B16 | B7; |
| 4727 break; |
| 4728 default: |
| 4729 UNREACHABLE(); |
| 4730 break; |
| 4731 } |
4717 int vd, d; | 4732 int vd, d; |
4718 dst.split_code(&vd, &d); | 4733 dst.split_code(&vd, &d); |
4719 int vm, m; | 4734 int vm, m; |
4720 src.split_code(&vm, &m); | 4735 src.split_code(&vm, &m); |
4721 int sz = static_cast<int>(size); | 4736 int sz = static_cast<int>(size); |
4722 emit(0x1E7U * B23 | d * B22 | 0x3 * B20 | sz * B18 | 2 * B16 | vd * B12 | | 4737 return 0x1E7U * B23 | d * B22 | 0x3 * B20 | sz * B18 | vd * B12 | B6 | |
4723 0x3 * B7 | B6 | m * B5 | vm); | 4738 m * B5 | vm | op_encoding; |
4724 } | 4739 } |
4725 | 4740 |
4726 static Instr EncodeNeonVREV(NeonSize op_size, NeonSize size, | 4741 void Assembler::vzip(NeonSize size, QwNeonRegister src1, QwNeonRegister src2) { |
4727 const QwNeonRegister dst, | 4742 DCHECK(IsEnabled(NEON)); |
4728 const QwNeonRegister src) { | 4743 // Qd = vzip.<size>(Qn, Qm) SIMD zip (interleave). |
| 4744 // Instruction details available in ARM DDI 0406C.b, A8-1102. |
| 4745 emit(EncodeNeonSizedOp(VZIP, size, src1, src2)); |
| 4746 } |
| 4747 |
| 4748 void Assembler::vuzp(NeonSize size, QwNeonRegister src1, QwNeonRegister src2) { |
| 4749 DCHECK(IsEnabled(NEON)); |
| 4750 // Qd = vuzp.<size>(Qn, Qm) SIMD un-zip (de-interleave). |
| 4751 // Instruction details available in ARM DDI 0406C.b, A8-1100. |
| 4752 emit(EncodeNeonSizedOp(VUZP, size, src1, src2)); |
| 4753 } |
| 4754 |
| 4755 void Assembler::vrev16(NeonSize size, QwNeonRegister dst, QwNeonRegister src) { |
| 4756 DCHECK(IsEnabled(NEON)); |
4729 // Qd = vrev<op_size>.<size>(Qn, Qm) SIMD scalar reverse. | 4757 // Qd = vrev<op_size>.<size>(Qn, Qm) SIMD scalar reverse. |
4730 // Instruction details available in ARM DDI 0406C.b, A8-1028. | 4758 // Instruction details available in ARM DDI 0406C.b, A8-1028. |
4731 DCHECK_GT(op_size, static_cast<int>(size)); | 4759 emit(EncodeNeonSizedOp(VREV16, size, dst, src)); |
4732 int vd, d; | |
4733 dst.split_code(&vd, &d); | |
4734 int vm, m; | |
4735 src.split_code(&vm, &m); | |
4736 int sz = static_cast<int>(size); | |
4737 int op = static_cast<int>(Neon64) - static_cast<int>(op_size); | |
4738 return 0x1E7U * B23 | d * B22 | 0x3 * B20 | sz * B18 | vd * B12 | op * B7 | | |
4739 B6 | m * B5 | vm; | |
4740 } | 4760 } |
4741 | 4761 |
4742 void Assembler::vrev16(NeonSize size, const QwNeonRegister dst, | 4762 void Assembler::vrev32(NeonSize size, QwNeonRegister dst, QwNeonRegister src) { |
4743 const QwNeonRegister src) { | |
4744 DCHECK(IsEnabled(NEON)); | 4763 DCHECK(IsEnabled(NEON)); |
4745 emit(EncodeNeonVREV(Neon16, size, dst, src)); | 4764 // Qd = vrev<op_size>.<size>(Qn, Qm) SIMD scalar reverse. |
| 4765 // Instruction details available in ARM DDI 0406C.b, A8-1028. |
| 4766 emit(EncodeNeonSizedOp(VREV32, size, dst, src)); |
4746 } | 4767 } |
4747 | 4768 |
4748 void Assembler::vrev32(NeonSize size, const QwNeonRegister dst, | 4769 void Assembler::vrev64(NeonSize size, QwNeonRegister dst, QwNeonRegister src) { |
4749 const QwNeonRegister src) { | |
4750 DCHECK(IsEnabled(NEON)); | 4770 DCHECK(IsEnabled(NEON)); |
4751 emit(EncodeNeonVREV(Neon32, size, dst, src)); | 4771 // Qd = vrev<op_size>.<size>(Qn, Qm) SIMD scalar reverse. |
| 4772 // Instruction details available in ARM DDI 0406C.b, A8-1028. |
| 4773 emit(EncodeNeonSizedOp(VREV64, size, dst, src)); |
4752 } | 4774 } |
4753 | 4775 |
4754 void Assembler::vrev64(NeonSize size, const QwNeonRegister dst, | 4776 void Assembler::vtrn(NeonSize size, QwNeonRegister src1, QwNeonRegister src2) { |
4755 const QwNeonRegister src) { | |
4756 DCHECK(IsEnabled(NEON)); | 4777 DCHECK(IsEnabled(NEON)); |
4757 emit(EncodeNeonVREV(Neon64, size, dst, src)); | 4778 // Qd = vrev<op_size>.<size>(Qn, Qm) SIMD scalar reverse. |
| 4779 // Instruction details available in ARM DDI 0406C.b, A8-1096. |
| 4780 emit(EncodeNeonSizedOp(VTRN, size, src1, src2)); |
4758 } | 4781 } |
4759 | 4782 |
4760 // Encode NEON vtbl / vtbx instruction. | 4783 // Encode NEON vtbl / vtbx instruction. |
4761 static Instr EncodeNeonVTB(const DwVfpRegister dst, const NeonListOperand& list, | 4784 static Instr EncodeNeonVTB(DwVfpRegister dst, const NeonListOperand& list, |
4762 const DwVfpRegister index, bool vtbx) { | 4785 DwVfpRegister index, bool vtbx) { |
4763 // Dd = vtbl(table, Dm) SIMD vector permute, zero at out of range indices. | 4786 // Dd = vtbl(table, Dm) SIMD vector permute, zero at out of range indices. |
4764 // Instruction details available in ARM DDI 0406C.b, A8-1094. | 4787 // Instruction details available in ARM DDI 0406C.b, A8-1094. |
4765 // Dd = vtbx(table, Dm) SIMD vector permute, skip out of range indices. | 4788 // Dd = vtbx(table, Dm) SIMD vector permute, skip out of range indices. |
4766 // Instruction details available in ARM DDI 0406C.b, A8-1094. | 4789 // Instruction details available in ARM DDI 0406C.b, A8-1094. |
4767 int vd, d; | 4790 int vd, d; |
4768 dst.split_code(&vd, &d); | 4791 dst.split_code(&vd, &d); |
4769 int vn, n; | 4792 int vn, n; |
4770 list.base().split_code(&vn, &n); | 4793 list.base().split_code(&vn, &n); |
4771 int vm, m; | 4794 int vm, m; |
4772 index.split_code(&vm, &m); | 4795 index.split_code(&vm, &m); |
4773 int op = vtbx ? 1 : 0; // vtbl = 0, vtbx = 1. | 4796 int op = vtbx ? 1 : 0; // vtbl = 0, vtbx = 1. |
4774 return 0x1E7U * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 | 0x2 * B10 | | 4797 return 0x1E7U * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 | 0x2 * B10 | |
4775 list.length() * B8 | n * B7 | op * B6 | m * B5 | vm; | 4798 list.length() * B8 | n * B7 | op * B6 | m * B5 | vm; |
4776 } | 4799 } |
4777 | 4800 |
4778 void Assembler::vtbl(const DwVfpRegister dst, const NeonListOperand& list, | 4801 void Assembler::vtbl(DwVfpRegister dst, const NeonListOperand& list, |
4779 const DwVfpRegister index) { | 4802 DwVfpRegister index) { |
4780 DCHECK(IsEnabled(NEON)); | 4803 DCHECK(IsEnabled(NEON)); |
4781 emit(EncodeNeonVTB(dst, list, index, false)); | 4804 emit(EncodeNeonVTB(dst, list, index, false)); |
4782 } | 4805 } |
4783 | 4806 |
4784 void Assembler::vtbx(const DwVfpRegister dst, const NeonListOperand& list, | 4807 void Assembler::vtbx(DwVfpRegister dst, const NeonListOperand& list, |
4785 const DwVfpRegister index) { | 4808 DwVfpRegister index) { |
4786 DCHECK(IsEnabled(NEON)); | 4809 DCHECK(IsEnabled(NEON)); |
4787 emit(EncodeNeonVTB(dst, list, index, true)); | 4810 emit(EncodeNeonVTB(dst, list, index, true)); |
4788 } | 4811 } |
4789 | 4812 |
4790 // Pseudo instructions. | 4813 // Pseudo instructions. |
4791 void Assembler::nop(int type) { | 4814 void Assembler::nop(int type) { |
4792 // ARMv6{K/T2} and v7 have an actual NOP instruction but it serializes | 4815 // ARMv6{K/T2} and v7 have an actual NOP instruction but it serializes |
4793 // some of the CPU's pipeline and has to issue. Older ARM chips simply used | 4816 // some of the CPU's pipeline and has to issue. Older ARM chips simply used |
4794 // MOV Rx, Rx as NOP and it performs better even in newer CPUs. | 4817 // MOV Rx, Rx as NOP and it performs better even in newer CPUs. |
4795 // We therefore use MOV Rx, Rx, even on newer CPUs, and use Rx to encode | 4818 // We therefore use MOV Rx, Rx, even on newer CPUs, and use Rx to encode |
(...skipping 538 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5334 DCHECK(is_uint12(offset)); | 5357 DCHECK(is_uint12(offset)); |
5335 instr_at_put(pc, SetLdrRegisterImmediateOffset(instr, offset)); | 5358 instr_at_put(pc, SetLdrRegisterImmediateOffset(instr, offset)); |
5336 } | 5359 } |
5337 } | 5360 } |
5338 | 5361 |
5339 | 5362 |
5340 } // namespace internal | 5363 } // namespace internal |
5341 } // namespace v8 | 5364 } // namespace v8 |
5342 | 5365 |
5343 #endif // V8_TARGET_ARCH_ARM | 5366 #endif // V8_TARGET_ARCH_ARM |
OLD | NEW |