Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 1994-2006 Sun Microsystems Inc. | 1 // Copyright (c) 1994-2006 Sun Microsystems Inc. |
| 2 // All Rights Reserved. | 2 // All Rights Reserved. |
| 3 // | 3 // |
| 4 // Redistribution and use in source and binary forms, with or without | 4 // Redistribution and use in source and binary forms, with or without |
| 5 // modification, are permitted provided that the following conditions | 5 // modification, are permitted provided that the following conditions |
| 6 // are met: | 6 // are met: |
| 7 // | 7 // |
| 8 // - Redistributions of source code must retain the above copyright notice, | 8 // - Redistributions of source code must retain the above copyright notice, |
| 9 // this list of conditions and the following disclaimer. | 9 // this list of conditions and the following disclaimer. |
| 10 // | 10 // |
| (...skipping 3885 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3896 // Instruction details available in ARM DDI 0406C.b, A8.8.320. | 3896 // Instruction details available in ARM DDI 0406C.b, A8.8.320. |
| 3897 // 1111(31-28) | 01000(27-23) | D(22) | 10(21-20) | Rn(19-16) | | 3897 // 1111(31-28) | 01000(27-23) | D(22) | 10(21-20) | Rn(19-16) | |
| 3898 // Vd(15-12) | type(11-8) | size(7-6) | align(5-4) | Rm(3-0) | 3898 // Vd(15-12) | type(11-8) | size(7-6) | align(5-4) | Rm(3-0) |
| 3899 DCHECK(IsEnabled(NEON)); | 3899 DCHECK(IsEnabled(NEON)); |
| 3900 int vd, d; | 3900 int vd, d; |
| 3901 dst.base().split_code(&vd, &d); | 3901 dst.base().split_code(&vd, &d); |
| 3902 emit(0xFU*B28 | 4*B24 | d*B22 | 2*B20 | src.rn().code()*B16 | vd*B12 | | 3902 emit(0xFU*B28 | 4*B24 | d*B22 | 2*B20 | src.rn().code()*B16 | vd*B12 | |
| 3903 dst.type()*B8 | size*B6 | src.align()*B4 | src.rm().code()); | 3903 dst.type()*B8 | size*B6 | src.align()*B4 | src.rm().code()); |
| 3904 } | 3904 } |
| 3905 | 3905 |
| 3906 | 3906 void Assembler::vst1(NeonSize size, const NeonListOperand& src, |
| 3907 void Assembler::vst1(NeonSize size, | |
| 3908 const NeonListOperand& src, | |
| 3909 const NeonMemOperand& dst) { | 3907 const NeonMemOperand& dst) { |
| 3910 // Instruction details available in ARM DDI 0406C.b, A8.8.404. | 3908 // Instruction details available in ARM DDI 0406C.b, A8.8.404. |
| 3911 // 1111(31-28) | 01000(27-23) | D(22) | 00(21-20) | Rn(19-16) | | 3909 // 1111(31-28) | 01000(27-23) | D(22) | 00(21-20) | Rn(19-16) | |
| 3912 // Vd(15-12) | type(11-8) | size(7-6) | align(5-4) | Rm(3-0) | 3910 // Vd(15-12) | type(11-8) | size(7-6) | align(5-4) | Rm(3-0) |
| 3913 DCHECK(IsEnabled(NEON)); | 3911 DCHECK(IsEnabled(NEON)); |
| 3914 int vd, d; | 3912 int vd, d; |
| 3915 src.base().split_code(&vd, &d); | 3913 src.base().split_code(&vd, &d); |
| 3916 emit(0xFU*B28 | 4*B24 | d*B22 | dst.rn().code()*B16 | vd*B12 | src.type()*B8 | | 3914 emit(0xFU*B28 | 4*B24 | d*B22 | dst.rn().code()*B16 | vd*B12 | src.type()*B8 | |
| 3917 size*B6 | dst.align()*B4 | dst.rm().code()); | 3915 size*B6 | dst.align()*B4 | dst.rm().code()); |
| 3918 } | 3916 } |
| (...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3977 // vmov Arm scalar to core register. | 3975 // vmov Arm scalar to core register. |
| 3978 DCHECK(dt == NeonS32 || dt == NeonU32 || IsEnabled(NEON)); | 3976 DCHECK(dt == NeonS32 || dt == NeonU32 || IsEnabled(NEON)); |
| 3979 int vn, n; | 3977 int vn, n; |
| 3980 src.split_code(&vn, &n); | 3978 src.split_code(&vn, &n); |
| 3981 int opc1_opc2 = EncodeScalar(dt, index); | 3979 int opc1_opc2 = EncodeScalar(dt, index); |
| 3982 int u = NeonU(dt); | 3980 int u = NeonU(dt); |
| 3983 emit(0xEEu * B24 | u * B23 | B20 | vn * B16 | dst.code() * B12 | 0xB * B8 | | 3981 emit(0xEEu * B24 | u * B23 | B20 | vn * B16 | dst.code() * B12 | 0xB * B8 | |
| 3984 n * B7 | B4 | opc1_opc2); | 3982 n * B7 | B4 | opc1_opc2); |
| 3985 } | 3983 } |
| 3986 | 3984 |
| 3987 void Assembler::vmov(const QwNeonRegister dst, const QwNeonRegister src) { | 3985 void Assembler::vmov(QwNeonRegister dst, QwNeonRegister src) { |
| 3988 // Instruction details available in ARM DDI 0406C.b, A8-938. | 3986 // Instruction details available in ARM DDI 0406C.b, A8-938. |
| 3989 // vmov is encoded as vorr. | 3987 // vmov is encoded as vorr. |
| 3990 vorr(dst, src, src); | 3988 vorr(dst, src, src); |
| 3991 } | 3989 } |
| 3992 | 3990 |
| 3993 void Assembler::vmvn(const QwNeonRegister dst, const QwNeonRegister src) { | 3991 void Assembler::vdup(NeonSize size, QwNeonRegister dst, Register src) { |
| 3994 DCHECK(IsEnabled(NEON)); | |
| 3995 // Instruction details available in ARM DDI 0406C.b, A8-966. | |
| 3996 DCHECK(VfpRegisterIsAvailable(dst)); | |
| 3997 DCHECK(VfpRegisterIsAvailable(src)); | |
| 3998 int vd, d; | |
| 3999 dst.split_code(&vd, &d); | |
| 4000 int vm, m; | |
| 4001 src.split_code(&vm, &m); | |
| 4002 emit(0x1E7U * B23 | d * B22 | 3 * B20 | vd * B12 | 0x17 * B6 | m * B5 | vm); | |
| 4003 } | |
| 4004 | |
| 4005 void Assembler::vswp(DwVfpRegister dst, DwVfpRegister src) { | |
| 4006 // Instruction details available in ARM DDI 0406C.b, A8.8.418. | |
| 4007 // 1111(31-28) | 00111(27-23) | D(22) | 110010(21-16) | | |
| 4008 // Vd(15-12) | 000000(11-6) | M(5) | 0(4) | Vm(3-0) | |
| 4009 DCHECK(IsEnabled(NEON)); | |
| 4010 int vd, d; | |
| 4011 dst.split_code(&vd, &d); | |
| 4012 int vm, m; | |
| 4013 src.split_code(&vm, &m); | |
| 4014 emit(0xFU * B28 | 7 * B23 | d * B22 | 0x32 * B16 | vd * B12 | m * B5 | vm); | |
| 4015 } | |
| 4016 | |
| 4017 void Assembler::vswp(QwNeonRegister dst, QwNeonRegister src) { | |
| 4018 // Instruction details available in ARM DDI 0406C.b, A8.8.418. | |
| 4019 // 1111(31-28) | 00111(27-23) | D(22) | 110010(21-16) | | |
| 4020 // Vd(15-12) | 000000(11-6) | M(5) | 0(4) | Vm(3-0) | |
| 4021 DCHECK(IsEnabled(NEON)); | |
| 4022 int vd, d; | |
| 4023 dst.split_code(&vd, &d); | |
| 4024 int vm, m; | |
| 4025 src.split_code(&vm, &m); | |
| 4026 emit(0xFU * B28 | 7 * B23 | d * B22 | 0x32 * B16 | vd * B12 | B6 | m * B5 | | |
| 4027 vm); | |
| 4028 } | |
| 4029 | |
| 4030 void Assembler::vdup(NeonSize size, const QwNeonRegister dst, | |
| 4031 const Register src) { | |
| 4032 DCHECK(IsEnabled(NEON)); | 3992 DCHECK(IsEnabled(NEON)); |
| 4033 // Instruction details available in ARM DDI 0406C.b, A8-886. | 3993 // Instruction details available in ARM DDI 0406C.b, A8-886. |
| 4034 int B = 0, E = 0; | 3994 int B = 0, E = 0; |
| 4035 switch (size) { | 3995 switch (size) { |
| 4036 case Neon8: | 3996 case Neon8: |
| 4037 B = 1; | 3997 B = 1; |
| 4038 break; | 3998 break; |
| 4039 case Neon16: | 3999 case Neon16: |
| 4040 E = 1; | 4000 E = 1; |
| 4041 break; | 4001 break; |
| 4042 case Neon32: | 4002 case Neon32: |
| 4043 break; | 4003 break; |
| 4044 default: | 4004 default: |
| 4045 UNREACHABLE(); | 4005 UNREACHABLE(); |
| 4046 break; | 4006 break; |
| 4047 } | 4007 } |
| 4048 int vd, d; | 4008 int vd, d; |
| 4049 dst.split_code(&vd, &d); | 4009 dst.split_code(&vd, &d); |
| 4050 | 4010 |
| 4051 emit(al | 0x1D * B23 | B * B22 | B21 | vd * B16 | src.code() * B12 | | 4011 emit(al | 0x1D * B23 | B * B22 | B21 | vd * B16 | src.code() * B12 | |
| 4052 0xB * B8 | d * B7 | E * B5 | B4); | 4012 0xB * B8 | d * B7 | E * B5 | B4); |
| 4053 } | 4013 } |
| 4054 | 4014 |
| 4055 void Assembler::vdup(const QwNeonRegister dst, const SwVfpRegister src) { | 4015 void Assembler::vdup(QwNeonRegister dst, SwVfpRegister src) { |
| 4056 DCHECK(IsEnabled(NEON)); | 4016 DCHECK(IsEnabled(NEON)); |
| 4057 // Instruction details available in ARM DDI 0406C.b, A8-884. | 4017 // Instruction details available in ARM DDI 0406C.b, A8-884. |
| 4058 int index = src.code() & 1; | 4018 int index = src.code() & 1; |
| 4059 int d_reg = src.code() / 2; | 4019 int d_reg = src.code() / 2; |
| 4060 int imm4 = 4 | index << 3; // esize = 32, index in bit 3. | 4020 int imm4 = 4 | index << 3; // esize = 32, index in bit 3. |
| 4061 int vd, d; | 4021 int vd, d; |
| 4062 dst.split_code(&vd, &d); | 4022 dst.split_code(&vd, &d); |
| 4063 int vm, m; | 4023 int vm, m; |
| 4064 DwVfpRegister::from_code(d_reg).split_code(&vm, &m); | 4024 DwVfpRegister::from_code(d_reg).split_code(&vm, &m); |
| 4065 | 4025 |
| 4066 emit(0x1E7U * B23 | d * B22 | 0x3 * B20 | imm4 * B16 | vd * B12 | 0x18 * B7 | | 4026 emit(0x1E7U * B23 | d * B22 | 0x3 * B20 | imm4 * B16 | vd * B12 | 0x18 * B7 | |
| 4067 B6 | m * B5 | vm); | 4027 B6 | m * B5 | vm); |
| 4068 } | 4028 } |
| 4069 | 4029 |
| 4070 // Encode NEON vcvt.src_type.dst_type instruction. | 4030 // Encode NEON vcvt.src_type.dst_type instruction. |
| 4071 static Instr EncodeNeonVCVT(const VFPType dst_type, const QwNeonRegister dst, | 4031 static Instr EncodeNeonVCVT(VFPType dst_type, QwNeonRegister dst, |
| 4072 const VFPType src_type, const QwNeonRegister src) { | 4032 VFPType src_type, QwNeonRegister src) { |
| 4073 DCHECK(src_type != dst_type); | 4033 DCHECK(src_type != dst_type); |
| 4074 DCHECK(src_type == F32 || dst_type == F32); | 4034 DCHECK(src_type == F32 || dst_type == F32); |
| 4075 // Instruction details available in ARM DDI 0406C.b, A8.8.868. | 4035 // Instruction details available in ARM DDI 0406C.b, A8.8.868. |
| 4076 int vd, d; | 4036 int vd, d; |
| 4077 dst.split_code(&vd, &d); | 4037 dst.split_code(&vd, &d); |
| 4078 int vm, m; | 4038 int vm, m; |
| 4079 src.split_code(&vm, &m); | 4039 src.split_code(&vm, &m); |
| 4080 | 4040 |
| 4081 int op = 0; | 4041 int op = 0; |
| 4082 if (src_type == F32) { | 4042 if (src_type == F32) { |
| 4083 DCHECK(dst_type == S32 || dst_type == U32); | 4043 DCHECK(dst_type == S32 || dst_type == U32); |
| 4084 op = dst_type == U32 ? 3 : 2; | 4044 op = dst_type == U32 ? 3 : 2; |
| 4085 } else { | 4045 } else { |
| 4086 DCHECK(src_type == S32 || src_type == U32); | 4046 DCHECK(src_type == S32 || src_type == U32); |
| 4087 op = src_type == U32 ? 1 : 0; | 4047 op = src_type == U32 ? 1 : 0; |
| 4088 } | 4048 } |
| 4089 | 4049 |
| 4090 return 0x1E7U * B23 | d * B22 | 0x3B * B16 | vd * B12 | 0x3 * B9 | op * B7 | | 4050 return 0x1E7U * B23 | d * B22 | 0x3B * B16 | vd * B12 | 0x3 * B9 | op * B7 | |
| 4091 B6 | m * B5 | vm; | 4051 B6 | m * B5 | vm; |
| 4092 } | 4052 } |
| 4093 | 4053 |
| 4094 void Assembler::vcvt_f32_s32(const QwNeonRegister dst, | 4054 void Assembler::vcvt_f32_s32(QwNeonRegister dst, QwNeonRegister src) { |
| 4095 const QwNeonRegister src) { | |
| 4096 DCHECK(IsEnabled(NEON)); | 4055 DCHECK(IsEnabled(NEON)); |
| 4097 DCHECK(VfpRegisterIsAvailable(dst)); | 4056 DCHECK(VfpRegisterIsAvailable(dst)); |
| 4098 DCHECK(VfpRegisterIsAvailable(src)); | 4057 DCHECK(VfpRegisterIsAvailable(src)); |
| 4099 emit(EncodeNeonVCVT(F32, dst, S32, src)); | 4058 emit(EncodeNeonVCVT(F32, dst, S32, src)); |
| 4100 } | 4059 } |
| 4101 | 4060 |
| 4102 void Assembler::vcvt_f32_u32(const QwNeonRegister dst, | 4061 void Assembler::vcvt_f32_u32(QwNeonRegister dst, QwNeonRegister src) { |
| 4103 const QwNeonRegister src) { | |
| 4104 DCHECK(IsEnabled(NEON)); | 4062 DCHECK(IsEnabled(NEON)); |
| 4105 DCHECK(VfpRegisterIsAvailable(dst)); | 4063 DCHECK(VfpRegisterIsAvailable(dst)); |
| 4106 DCHECK(VfpRegisterIsAvailable(src)); | 4064 DCHECK(VfpRegisterIsAvailable(src)); |
| 4107 emit(EncodeNeonVCVT(F32, dst, U32, src)); | 4065 emit(EncodeNeonVCVT(F32, dst, U32, src)); |
| 4108 } | 4066 } |
| 4109 | 4067 |
| 4110 void Assembler::vcvt_s32_f32(const QwNeonRegister dst, | 4068 void Assembler::vcvt_s32_f32(QwNeonRegister dst, QwNeonRegister src) { |
| 4111 const QwNeonRegister src) { | |
| 4112 DCHECK(IsEnabled(NEON)); | 4069 DCHECK(IsEnabled(NEON)); |
| 4113 DCHECK(VfpRegisterIsAvailable(dst)); | 4070 DCHECK(VfpRegisterIsAvailable(dst)); |
| 4114 DCHECK(VfpRegisterIsAvailable(src)); | 4071 DCHECK(VfpRegisterIsAvailable(src)); |
| 4115 emit(EncodeNeonVCVT(S32, dst, F32, src)); | 4072 emit(EncodeNeonVCVT(S32, dst, F32, src)); |
| 4116 } | 4073 } |
| 4117 | 4074 |
| 4118 void Assembler::vcvt_u32_f32(const QwNeonRegister dst, | 4075 void Assembler::vcvt_u32_f32(QwNeonRegister dst, QwNeonRegister src) { |
| 4119 const QwNeonRegister src) { | |
| 4120 DCHECK(IsEnabled(NEON)); | 4076 DCHECK(IsEnabled(NEON)); |
| 4121 DCHECK(VfpRegisterIsAvailable(dst)); | 4077 DCHECK(VfpRegisterIsAvailable(dst)); |
| 4122 DCHECK(VfpRegisterIsAvailable(src)); | 4078 DCHECK(VfpRegisterIsAvailable(src)); |
| 4123 emit(EncodeNeonVCVT(U32, dst, F32, src)); | 4079 emit(EncodeNeonVCVT(U32, dst, F32, src)); |
| 4124 } | 4080 } |
| 4125 | 4081 |
| 4126 enum UnaryOp { VABS, VABSF, VNEG, VNEGF }; | 4082 enum NeonRegType { NEON_D, NEON_Q }; |
| 4127 | 4083 |
| 4128 static Instr EncodeNeonUnaryOp(UnaryOp op, NeonSize size, QwNeonRegister dst, | 4084 enum UnaryOp { VMVN, VSWP, VABS, VABSF, VNEG, VNEGF }; |
| 4129 QwNeonRegister src) { | 4085 |
| 4086 static Instr EncodeNeonUnaryOp(UnaryOp op, NeonRegType reg_type, NeonSize size, | |
| 4087 int dst_code, int src_code) { | |
| 4130 int op_encoding = 0; | 4088 int op_encoding = 0; |
| 4131 switch (op) { | 4089 switch (op) { |
| 4090 case VMVN: | |
| 4091 DCHECK_EQ(Neon8, size); // size == 0 for vmvn | |
| 4092 op_encoding = B10 | 0x3 * B7; | |
| 4093 break; | |
| 4094 case VSWP: | |
| 4095 DCHECK_EQ(Neon8, size); // size == 0 for vswp | |
| 4096 op_encoding = B17; | |
| 4097 break; | |
| 4132 case VABS: | 4098 case VABS: |
| 4133 op_encoding = 0x6 * B7; | 4099 op_encoding = B16 | 0x6 * B7; |
| 4134 break; | 4100 break; |
| 4135 case VABSF: | 4101 case VABSF: |
| 4136 DCHECK_EQ(Neon32, size); | 4102 DCHECK_EQ(Neon32, size); |
| 4137 op_encoding = 0x6 * B7 | B10; | 4103 op_encoding = B16 | B10 | 0x6 * B7; |
| 4138 break; | 4104 break; |
| 4139 case VNEG: | 4105 case VNEG: |
| 4140 op_encoding = 0x7 * B7; | 4106 op_encoding = B16 | 0x7 * B7; |
| 4141 break; | 4107 break; |
| 4142 case VNEGF: | 4108 case VNEGF: |
| 4143 DCHECK_EQ(Neon32, size); | 4109 DCHECK_EQ(Neon32, size); |
| 4144 op_encoding = 0x7 * B7 | B10; | 4110 op_encoding = B16 | B10 | 0x7 * B7; |
| 4145 break; | 4111 break; |
| 4146 default: | 4112 default: |
| 4147 UNREACHABLE(); | 4113 UNREACHABLE(); |
| 4148 break; | 4114 break; |
| 4149 } | 4115 } |
| 4150 int vd, d; | 4116 if (reg_type == NEON_Q) { |
| 4151 dst.split_code(&vd, &d); | 4117 op_encoding |= B6; |
| 4152 int vm, m; | 4118 dst_code <<= 1; |
| 4153 src.split_code(&vm, &m); | 4119 src_code <<= 1; |
| 4154 return 0x1E7U * B23 | d * B22 | 0x3 * B20 | size * B18 | B16 | vd * B12 | B6 | | 4120 } |
| 4155 m * B5 | vm | op_encoding; | 4121 |
| 4122 int vd = dst_code & 0x0F; | |
|
martyn.capewell
2017/03/09 14:03:36
Perhaps there should be some static methods to spl
bbudge
2017/03/09 20:51:05
Good idea. Done.
| |
| 4123 int d = (dst_code & 0x10) >> 4; | |
| 4124 int vm = src_code & 0x0F; | |
| 4125 int m = (src_code & 0x10) >> 4; | |
| 4126 return 0x1E7U * B23 | d * B22 | 0x3 * B20 | size * B18 | vd * B12 | m * B5 | | |
| 4127 vm | op_encoding; | |
| 4156 } | 4128 } |
| 4157 | 4129 |
| 4158 void Assembler::vabs(const QwNeonRegister dst, const QwNeonRegister src) { | 4130 void Assembler::vmvn(QwNeonRegister dst, QwNeonRegister src) { |
| 4131 // Qd = vmvn(Qn, Qm) SIMD bitwise negate. | |
| 4132 // Instruction details available in ARM DDI 0406C.b, A8-966. | |
| 4133 DCHECK(IsEnabled(NEON)); | |
| 4134 emit(EncodeNeonUnaryOp(VMVN, NEON_Q, Neon8, dst.code(), src.code())); | |
| 4135 } | |
| 4136 | |
| 4137 void Assembler::vswp(DwVfpRegister dst, DwVfpRegister src) { | |
| 4138 DCHECK(IsEnabled(NEON)); | |
| 4139 // Dd = vswp(Dn, Dm) SIMD d-register swap. | |
| 4140 // Instruction details available in ARM DDI 0406C.b, A8.8.418. | |
| 4141 DCHECK(IsEnabled(NEON)); | |
| 4142 emit(EncodeNeonUnaryOp(VSWP, NEON_D, Neon8, dst.code(), src.code())); | |
| 4143 } | |
| 4144 | |
| 4145 void Assembler::vswp(QwNeonRegister dst, QwNeonRegister src) { | |
| 4146 // Qd = vswp(Qn, Qm) SIMD q-register swap. | |
| 4147 // Instruction details available in ARM DDI 0406C.b, A8.8.418. | |
| 4148 DCHECK(IsEnabled(NEON)); | |
| 4149 emit(EncodeNeonUnaryOp(VSWP, NEON_Q, Neon8, dst.code(), src.code())); | |
| 4150 } | |
| 4151 | |
| 4152 void Assembler::vabs(QwNeonRegister dst, QwNeonRegister src) { | |
| 4159 // Qd = vabs.f<size>(Qn, Qm) SIMD floating point absolute value. | 4153 // Qd = vabs.f<size>(Qn, Qm) SIMD floating point absolute value. |
| 4160 // Instruction details available in ARM DDI 0406C.b, A8.8.824. | 4154 // Instruction details available in ARM DDI 0406C.b, A8.8.824. |
| 4161 DCHECK(IsEnabled(NEON)); | 4155 DCHECK(IsEnabled(NEON)); |
| 4162 emit(EncodeNeonUnaryOp(VABSF, Neon32, dst, src)); | 4156 emit(EncodeNeonUnaryOp(VABSF, NEON_Q, Neon32, dst.code(), src.code())); |
| 4163 } | 4157 } |
| 4164 | 4158 |
| 4165 void Assembler::vabs(NeonSize size, const QwNeonRegister dst, | 4159 void Assembler::vabs(NeonSize size, QwNeonRegister dst, QwNeonRegister src) { |
| 4166 const QwNeonRegister src) { | |
| 4167 // Qd = vabs.s<size>(Qn, Qm) SIMD integer absolute value. | 4160 // Qd = vabs.s<size>(Qn, Qm) SIMD integer absolute value. |
| 4168 // Instruction details available in ARM DDI 0406C.b, A8.8.824. | 4161 // Instruction details available in ARM DDI 0406C.b, A8.8.824. |
| 4169 DCHECK(IsEnabled(NEON)); | 4162 DCHECK(IsEnabled(NEON)); |
| 4170 emit(EncodeNeonUnaryOp(VABS, size, dst, src)); | 4163 emit(EncodeNeonUnaryOp(VABS, NEON_Q, size, dst.code(), src.code())); |
| 4171 } | 4164 } |
| 4172 | 4165 |
| 4173 void Assembler::vneg(const QwNeonRegister dst, const QwNeonRegister src) { | 4166 void Assembler::vneg(QwNeonRegister dst, QwNeonRegister src) { |
| 4174 // Qd = vabs.f<size>(Qn, Qm) SIMD floating point negate. | 4167 // Qd = vabs.f<size>(Qn, Qm) SIMD floating point negate. |
| 4175 // Instruction details available in ARM DDI 0406C.b, A8.8.968. | 4168 // Instruction details available in ARM DDI 0406C.b, A8.8.968. |
| 4176 DCHECK(IsEnabled(NEON)); | 4169 DCHECK(IsEnabled(NEON)); |
| 4177 emit(EncodeNeonUnaryOp(VNEGF, Neon32, dst, src)); | 4170 emit(EncodeNeonUnaryOp(VNEGF, NEON_Q, Neon32, dst.code(), src.code())); |
| 4178 } | 4171 } |
| 4179 | 4172 |
| 4180 void Assembler::vneg(NeonSize size, const QwNeonRegister dst, | 4173 void Assembler::vneg(NeonSize size, QwNeonRegister dst, QwNeonRegister src) { |
| 4181 const QwNeonRegister src) { | |
| 4182 // Qd = vabs.s<size>(Qn, Qm) SIMD integer negate. | 4174 // Qd = vabs.s<size>(Qn, Qm) SIMD integer negate. |
| 4183 // Instruction details available in ARM DDI 0406C.b, A8.8.968. | 4175 // Instruction details available in ARM DDI 0406C.b, A8.8.968. |
| 4184 DCHECK(IsEnabled(NEON)); | 4176 DCHECK(IsEnabled(NEON)); |
| 4185 emit(EncodeNeonUnaryOp(VNEG, size, dst, src)); | 4177 emit(EncodeNeonUnaryOp(VNEG, NEON_Q, size, dst.code(), src.code())); |
| 4186 } | |
| 4187 | |
| 4188 void Assembler::veor(DwVfpRegister dst, DwVfpRegister src1, | |
| 4189 DwVfpRegister src2) { | |
| 4190 // Dd = veor(Dn, Dm) 64 bit integer exclusive OR. | |
| 4191 // Instruction details available in ARM DDI 0406C.b, A8.8.888. | |
| 4192 DCHECK(IsEnabled(NEON)); | |
| 4193 int vd, d; | |
| 4194 dst.split_code(&vd, &d); | |
| 4195 int vn, n; | |
| 4196 src1.split_code(&vn, &n); | |
| 4197 int vm, m; | |
| 4198 src2.split_code(&vm, &m); | |
| 4199 emit(0x1E6U * B23 | d * B22 | vn * B16 | vd * B12 | B8 | n * B7 | m * B5 | | |
| 4200 B4 | vm); | |
| 4201 } | 4178 } |
| 4202 | 4179 |
| 4203 enum BinaryBitwiseOp { VAND, VBIC, VBIF, VBIT, VBSL, VEOR, VORR, VORN }; | 4180 enum BinaryBitwiseOp { VAND, VBIC, VBIF, VBIT, VBSL, VEOR, VORR, VORN }; |
| 4204 | 4181 |
| 4205 static Instr EncodeNeonBinaryBitwiseOp(BinaryBitwiseOp op, QwNeonRegister dst, | 4182 static Instr EncodeNeonBinaryBitwiseOp(BinaryBitwiseOp op, NeonRegType reg_type, |
| 4206 QwNeonRegister src1, | 4183 int dst_code, int src_code1, |
| 4207 QwNeonRegister src2) { | 4184 int src_code2) { |
| 4208 int op_encoding = 0; | 4185 int op_encoding = 0; |
| 4209 switch (op) { | 4186 switch (op) { |
| 4210 case VBIC: | 4187 case VBIC: |
| 4211 op_encoding = 0x1 * B20; | 4188 op_encoding = 0x1 * B20; |
| 4212 break; | 4189 break; |
| 4213 case VBIF: | 4190 case VBIF: |
| 4214 op_encoding = B24 | 0x3 * B20; | 4191 op_encoding = B24 | 0x3 * B20; |
| 4215 break; | 4192 break; |
| 4216 case VBIT: | 4193 case VBIT: |
| 4217 op_encoding = B24 | 0x2 * B20; | 4194 op_encoding = B24 | 0x2 * B20; |
| (...skipping 10 matching lines...) Expand all Loading... | |
| 4228 case VORN: | 4205 case VORN: |
| 4229 op_encoding = 0x3 * B20; | 4206 op_encoding = 0x3 * B20; |
| 4230 break; | 4207 break; |
| 4231 case VAND: | 4208 case VAND: |
| 4232 // op_encoding is 0. | 4209 // op_encoding is 0. |
| 4233 break; | 4210 break; |
| 4234 default: | 4211 default: |
| 4235 UNREACHABLE(); | 4212 UNREACHABLE(); |
| 4236 break; | 4213 break; |
| 4237 } | 4214 } |
| 4238 int vd, d; | 4215 if (reg_type == NEON_Q) { |
| 4239 dst.split_code(&vd, &d); | 4216 op_encoding |= B6; |
| 4240 int vn, n; | 4217 dst_code <<= 1; |
| 4241 src1.split_code(&vn, &n); | 4218 src_code1 <<= 1; |
| 4242 int vm, m; | 4219 src_code2 <<= 1; |
| 4243 src2.split_code(&vm, &m); | 4220 } |
| 4221 | |
| 4222 int vd = dst_code & 0x0F; | |
| 4223 int d = (dst_code & 0x10) >> 4; | |
| 4224 int vn = src_code1 & 0x0F; | |
| 4225 int n = (src_code1 & 0x10) >> 4; | |
| 4226 int vm = src_code2 & 0x0F; | |
| 4227 int m = (src_code2 & 0x10) >> 4; | |
| 4244 return 0x1E4U * B23 | op_encoding | d * B22 | vn * B16 | vd * B12 | B8 | | 4228 return 0x1E4U * B23 | op_encoding | d * B22 | vn * B16 | vd * B12 | B8 | |
| 4245 n * B7 | B6 | m * B5 | B4 | vm; | 4229 n * B7 | m * B5 | B4 | vm; |
| 4246 } | 4230 } |
| 4247 | 4231 |
| 4248 void Assembler::vand(QwNeonRegister dst, QwNeonRegister src1, | 4232 void Assembler::vand(QwNeonRegister dst, QwNeonRegister src1, |
| 4249 QwNeonRegister src2) { | 4233 QwNeonRegister src2) { |
| 4250 // Qd = vand(Qn, Qm) SIMD AND. | 4234 // Qd = vand(Qn, Qm) SIMD AND. |
| 4251 // Instruction details available in ARM DDI 0406C.b, A8.8.836. | 4235 // Instruction details available in ARM DDI 0406C.b, A8.8.836. |
| 4252 DCHECK(IsEnabled(NEON)); | 4236 DCHECK(IsEnabled(NEON)); |
| 4253 emit(EncodeNeonBinaryBitwiseOp(VAND, dst, src1, src2)); | 4237 emit(EncodeNeonBinaryBitwiseOp(VAND, NEON_Q, dst.code(), src1.code(), |
| 4238 src2.code())); | |
| 4254 } | 4239 } |
| 4255 | 4240 |
| 4256 void Assembler::vbsl(QwNeonRegister dst, const QwNeonRegister src1, | 4241 void Assembler::vbsl(QwNeonRegister dst, QwNeonRegister src1, |
| 4257 const QwNeonRegister src2) { | 4242 QwNeonRegister src2) { |
| 4258 DCHECK(IsEnabled(NEON)); | |
| 4259 // Qd = vbsl(Qn, Qm) SIMD bitwise select. | 4243 // Qd = vbsl(Qn, Qm) SIMD bitwise select. |
| 4260 // Instruction details available in ARM DDI 0406C.b, A8-844. | 4244 // Instruction details available in ARM DDI 0406C.b, A8-844. |
| 4261 emit(EncodeNeonBinaryBitwiseOp(VBSL, dst, src1, src2)); | 4245 DCHECK(IsEnabled(NEON)); |
| 4246 emit(EncodeNeonBinaryBitwiseOp(VBSL, NEON_Q, dst.code(), src1.code(), | |
| 4247 src2.code())); | |
| 4248 } | |
| 4249 | |
| 4250 void Assembler::veor(DwVfpRegister dst, DwVfpRegister src1, | |
| 4251 DwVfpRegister src2) { | |
| 4252 // Dd = veor(Dn, Dm) SIMD exclusive OR. | |
| 4253 // Instruction details available in ARM DDI 0406C.b, A8.8.888. | |
| 4254 DCHECK(IsEnabled(NEON)); | |
| 4255 emit(EncodeNeonBinaryBitwiseOp(VEOR, NEON_D, dst.code(), src1.code(), | |
| 4256 src2.code())); | |
| 4262 } | 4257 } |
| 4263 | 4258 |
| 4264 void Assembler::veor(QwNeonRegister dst, QwNeonRegister src1, | 4259 void Assembler::veor(QwNeonRegister dst, QwNeonRegister src1, |
| 4265 QwNeonRegister src2) { | 4260 QwNeonRegister src2) { |
| 4266 // Qd = veor(Qn, Qm) SIMD exclusive OR. | 4261 // Qd = veor(Qn, Qm) SIMD exclusive OR. |
| 4267 // Instruction details available in ARM DDI 0406C.b, A8.8.888. | 4262 // Instruction details available in ARM DDI 0406C.b, A8.8.888. |
| 4268 DCHECK(IsEnabled(NEON)); | 4263 DCHECK(IsEnabled(NEON)); |
| 4269 emit(EncodeNeonBinaryBitwiseOp(VEOR, dst, src1, src2)); | 4264 emit(EncodeNeonBinaryBitwiseOp(VEOR, NEON_Q, dst.code(), src1.code(), |
| 4265 src2.code())); | |
| 4270 } | 4266 } |
| 4271 | 4267 |
| 4272 void Assembler::vorr(QwNeonRegister dst, QwNeonRegister src1, | 4268 void Assembler::vorr(QwNeonRegister dst, QwNeonRegister src1, |
| 4273 QwNeonRegister src2) { | 4269 QwNeonRegister src2) { |
| 4274 // Qd = vorr(Qn, Qm) SIMD OR. | 4270 // Qd = vorr(Qn, Qm) SIMD OR. |
| 4275 // Instruction details available in ARM DDI 0406C.b, A8.8.976. | 4271 // Instruction details available in ARM DDI 0406C.b, A8.8.976. |
| 4276 DCHECK(IsEnabled(NEON)); | 4272 DCHECK(IsEnabled(NEON)); |
| 4277 emit(EncodeNeonBinaryBitwiseOp(VORR, dst, src1, src2)); | 4273 emit(EncodeNeonBinaryBitwiseOp(VORR, NEON_Q, dst.code(), src1.code(), |
| 4274 src2.code())); | |
| 4278 } | 4275 } |
| 4279 | 4276 |
| 4280 enum FPBinOp { | 4277 enum FPBinOp { |
| 4281 VADDF, | 4278 VADDF, |
| 4282 VSUBF, | 4279 VSUBF, |
| 4283 VMULF, | 4280 VMULF, |
| 4284 VMINF, | 4281 VMINF, |
| 4285 VMAXF, | 4282 VMAXF, |
| 4286 VRECPS, | 4283 VRECPS, |
| 4287 VRSQRTS, | 4284 VRSQRTS, |
| (...skipping 174 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4462 } | 4459 } |
| 4463 | 4460 |
| 4464 void Assembler::vmul(QwNeonRegister dst, QwNeonRegister src1, | 4461 void Assembler::vmul(QwNeonRegister dst, QwNeonRegister src1, |
| 4465 QwNeonRegister src2) { | 4462 QwNeonRegister src2) { |
| 4466 DCHECK(IsEnabled(NEON)); | 4463 DCHECK(IsEnabled(NEON)); |
| 4467 // Qd = vadd(Qn, Qm) SIMD floating point multiply. | 4464 // Qd = vadd(Qn, Qm) SIMD floating point multiply. |
| 4468 // Instruction details available in ARM DDI 0406C.b, A8-958. | 4465 // Instruction details available in ARM DDI 0406C.b, A8-958. |
| 4469 emit(EncodeNeonBinOp(VMULF, dst, src1, src2)); | 4466 emit(EncodeNeonBinOp(VMULF, dst, src1, src2)); |
| 4470 } | 4467 } |
| 4471 | 4468 |
| 4472 void Assembler::vmul(NeonSize size, QwNeonRegister dst, | 4469 void Assembler::vmul(NeonSize size, QwNeonRegister dst, QwNeonRegister src1, |
| 4473 const QwNeonRegister src1, const QwNeonRegister src2) { | 4470 QwNeonRegister src2) { |
| 4474 DCHECK(IsEnabled(NEON)); | 4471 DCHECK(IsEnabled(NEON)); |
| 4475 // Qd = vadd(Qn, Qm) SIMD integer multiply. | 4472 // Qd = vadd(Qn, Qm) SIMD integer multiply. |
| 4476 // Instruction details available in ARM DDI 0406C.b, A8-960. | 4473 // Instruction details available in ARM DDI 0406C.b, A8-960. |
| 4477 emit(EncodeNeonBinOp(VMUL, size, dst, src1, src2)); | 4474 emit(EncodeNeonBinOp(VMUL, size, dst, src1, src2)); |
| 4478 } | 4475 } |
| 4479 | 4476 |
| 4480 void Assembler::vmin(const QwNeonRegister dst, const QwNeonRegister src1, | 4477 void Assembler::vmin(QwNeonRegister dst, QwNeonRegister src1, |
| 4481 const QwNeonRegister src2) { | 4478 QwNeonRegister src2) { |
| 4482 DCHECK(IsEnabled(NEON)); | 4479 DCHECK(IsEnabled(NEON)); |
| 4483 // Qd = vmin(Qn, Qm) SIMD floating point MIN. | 4480 // Qd = vmin(Qn, Qm) SIMD floating point MIN. |
| 4484 // Instruction details available in ARM DDI 0406C.b, A8-928. | 4481 // Instruction details available in ARM DDI 0406C.b, A8-928. |
| 4485 emit(EncodeNeonBinOp(VMINF, dst, src1, src2)); | 4482 emit(EncodeNeonBinOp(VMINF, dst, src1, src2)); |
| 4486 } | 4483 } |
| 4487 | 4484 |
| 4488 void Assembler::vmin(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1, | 4485 void Assembler::vmin(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1, |
| 4489 QwNeonRegister src2) { | 4486 QwNeonRegister src2) { |
| 4490 DCHECK(IsEnabled(NEON)); | 4487 DCHECK(IsEnabled(NEON)); |
| 4491 // Qd = vmin(Qn, Qm) SIMD integer MIN. | 4488 // Qd = vmin(Qn, Qm) SIMD integer MIN. |
| (...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4585 } | 4582 } |
| 4586 | 4583 |
| 4587 void Assembler::vrsqrts(QwNeonRegister dst, QwNeonRegister src1, | 4584 void Assembler::vrsqrts(QwNeonRegister dst, QwNeonRegister src1, |
| 4588 QwNeonRegister src2) { | 4585 QwNeonRegister src2) { |
| 4589 DCHECK(IsEnabled(NEON)); | 4586 DCHECK(IsEnabled(NEON)); |
| 4590 // Qd = vrsqrts(Qn, Qm) SIMD reciprocal square root refinement step. | 4587 // Qd = vrsqrts(Qn, Qm) SIMD reciprocal square root refinement step. |
| 4591 // Instruction details available in ARM DDI 0406C.b, A8-1040. | 4588 // Instruction details available in ARM DDI 0406C.b, A8-1040. |
| 4592 emit(EncodeNeonBinOp(VRSQRTS, dst, src1, src2)); | 4589 emit(EncodeNeonBinOp(VRSQRTS, dst, src1, src2)); |
| 4593 } | 4590 } |
| 4594 | 4591 |
| 4595 enum PairwiseOp { VPMIN, VPMAX }; | 4592 enum NeonPairwiseOp { VPMIN, VPMAX }; |
| 4596 | 4593 |
| 4597 static Instr EncodeNeonPairwiseOp(PairwiseOp op, NeonDataType dt, | 4594 static Instr EncodeNeonPairwiseOp(NeonPairwiseOp op, NeonDataType dt, |
| 4598 DwVfpRegister dst, DwVfpRegister src1, | 4595 DwVfpRegister dst, DwVfpRegister src1, |
| 4599 DwVfpRegister src2) { | 4596 DwVfpRegister src2) { |
| 4600 int op_encoding = 0; | 4597 int op_encoding = 0; |
| 4601 switch (op) { | 4598 switch (op) { |
| 4602 case VPMIN: | 4599 case VPMIN: |
| 4603 op_encoding = 0xA * B8 | B4; | 4600 op_encoding = 0xA * B8 | B4; |
| 4604 break; | 4601 break; |
| 4605 case VPMAX: | 4602 case VPMAX: |
| 4606 op_encoding = 0xA * B8; | 4603 op_encoding = 0xA * B8; |
| 4607 break; | 4604 break; |
| (...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4686 } | 4683 } |
| 4687 | 4684 |
| 4688 void Assembler::vcgt(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1, | 4685 void Assembler::vcgt(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1, |
| 4689 QwNeonRegister src2) { | 4686 QwNeonRegister src2) { |
| 4690 DCHECK(IsEnabled(NEON)); | 4687 DCHECK(IsEnabled(NEON)); |
| 4691 // Qd = vcgt(Qn, Qm) SIMD integer compare greater than. | 4688 // Qd = vcgt(Qn, Qm) SIMD integer compare greater than. |
| 4692 // Instruction details available in ARM DDI 0406C.b, A8-852. | 4689 // Instruction details available in ARM DDI 0406C.b, A8-852. |
| 4693 emit(EncodeNeonBinOp(VCGT, dt, dst, src1, src2)); | 4690 emit(EncodeNeonBinOp(VCGT, dt, dst, src1, src2)); |
| 4694 } | 4691 } |
| 4695 | 4692 |
| 4696 void Assembler::vext(QwNeonRegister dst, const QwNeonRegister src1, | 4693 void Assembler::vext(QwNeonRegister dst, QwNeonRegister src1, |
| 4697 const QwNeonRegister src2, int bytes) { | 4694 QwNeonRegister src2, int bytes) { |
| 4698 DCHECK(IsEnabled(NEON)); | 4695 DCHECK(IsEnabled(NEON)); |
| 4699 // Qd = vext(Qn, Qm) SIMD byte extract. | 4696 // Qd = vext(Qn, Qm) SIMD byte extract. |
| 4700 // Instruction details available in ARM DDI 0406C.b, A8-890. | 4697 // Instruction details available in ARM DDI 0406C.b, A8-890. |
| 4701 int vd, d; | 4698 int vd, d; |
| 4702 dst.split_code(&vd, &d); | 4699 dst.split_code(&vd, &d); |
| 4703 int vn, n; | 4700 int vn, n; |
| 4704 src1.split_code(&vn, &n); | 4701 src1.split_code(&vn, &n); |
| 4705 int vm, m; | 4702 int vm, m; |
| 4706 src2.split_code(&vm, &m); | 4703 src2.split_code(&vm, &m); |
| 4707 DCHECK_GT(16, bytes); | 4704 DCHECK_GT(16, bytes); |
| 4708 emit(0x1E5U * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 | bytes * B8 | | 4705 emit(0x1E5U * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 | bytes * B8 | |
| 4709 n * B7 | B6 | m * B5 | vm); | 4706 n * B7 | B6 | m * B5 | vm); |
| 4710 } | 4707 } |
| 4711 | 4708 |
| 4712 void Assembler::vzip(NeonSize size, QwNeonRegister dst, | 4709 enum NeonSizedOp { VZIP, VUZP, VREV16, VREV32, VREV64, VTRN }; |
| 4713 const QwNeonRegister src) { | 4710 |
| 4714 DCHECK(IsEnabled(NEON)); | 4711 static Instr EncodeNeonSizedOp(NeonSizedOp op, NeonSize size, |
| 4715 // Qd = vzip.<size>(Qn, Qm) SIMD zip (interleave). | 4712 QwNeonRegister dst, QwNeonRegister src) { |
| 4716 // Instruction details available in ARM DDI 0406C.b, A8-1102. | 4713 int op_encoding = 0; |
| 4714 switch (op) { | |
| 4715 case VZIP: | |
| 4716 op_encoding = 0x2 * B16 | B8 | B7; | |
|
martyn.capewell
2017/03/09 14:03:36
Some of these seem inconsistently expressed. For e
bbudge
2017/03/09 20:51:05
Done.
| |
| 4717 break; | |
| 4718 case VUZP: | |
| 4719 op_encoding = 0x2 * B16 | B8; | |
| 4720 break; | |
| 4721 case VREV16: | |
| 4722 op_encoding = 0x2 * B7; | |
| 4723 break; | |
| 4724 case VREV32: | |
| 4725 op_encoding = 0x1 * B7; | |
| 4726 break; | |
| 4727 case VREV64: | |
| 4728 // op_encoding is 0; | |
| 4729 break; | |
| 4730 case VTRN: | |
| 4731 op_encoding = 0x2 * B16 | B7; | |
| 4732 break; | |
| 4733 default: | |
| 4734 UNREACHABLE(); | |
| 4735 break; | |
| 4736 } | |
| 4717 int vd, d; | 4737 int vd, d; |
| 4718 dst.split_code(&vd, &d); | 4738 dst.split_code(&vd, &d); |
| 4719 int vm, m; | 4739 int vm, m; |
| 4720 src.split_code(&vm, &m); | 4740 src.split_code(&vm, &m); |
| 4721 int sz = static_cast<int>(size); | 4741 int sz = static_cast<int>(size); |
| 4722 emit(0x1E7U * B23 | d * B22 | 0x3 * B20 | sz * B18 | 2 * B16 | vd * B12 | | 4742 return 0x1E7U * B23 | d * B22 | 0x3 * B20 | sz * B18 | vd * B12 | B6 | |
| 4723 0x3 * B7 | B6 | m * B5 | vm); | 4743 m * B5 | vm | op_encoding; |
| 4724 } | 4744 } |
| 4725 | 4745 |
| 4726 static Instr EncodeNeonVREV(NeonSize op_size, NeonSize size, | 4746 void Assembler::vzip(NeonSize size, QwNeonRegister src1, QwNeonRegister src2) { |
| 4727 const QwNeonRegister dst, | 4747 DCHECK(IsEnabled(NEON)); |
| 4728 const QwNeonRegister src) { | 4748 // Qd = vzip.<size>(Qn, Qm) SIMD zip (interleave). |
| 4749 // Instruction details available in ARM DDI 0406C.b, A8-1102. | |
| 4750 emit(EncodeNeonSizedOp(VZIP, size, src1, src2)); | |
| 4751 } | |
| 4752 | |
| 4753 void Assembler::vuzp(NeonSize size, QwNeonRegister src1, QwNeonRegister src2) { | |
| 4754 DCHECK(IsEnabled(NEON)); | |
| 4755 // Qd = vuzp.<size>(Qn, Qm) SIMD un-zip (de-interleave). | |
| 4756 // Instruction details available in ARM DDI 0406C.b, A8-1100. | |
| 4757 emit(EncodeNeonSizedOp(VUZP, size, src1, src2)); | |
| 4758 } | |
| 4759 | |
| 4760 void Assembler::vrev16(NeonSize size, QwNeonRegister dst, QwNeonRegister src) { | |
| 4761 DCHECK(IsEnabled(NEON)); | |
| 4729 // Qd = vrev<op_size>.<size>(Qn, Qm) SIMD scalar reverse. | 4762 // Qd = vrev<op_size>.<size>(Qn, Qm) SIMD scalar reverse. |
| 4730 // Instruction details available in ARM DDI 0406C.b, A8-1028. | 4763 // Instruction details available in ARM DDI 0406C.b, A8-1028. |
| 4731 DCHECK_GT(op_size, static_cast<int>(size)); | 4764 emit(EncodeNeonSizedOp(VREV16, size, dst, src)); |
| 4732 int vd, d; | |
| 4733 dst.split_code(&vd, &d); | |
| 4734 int vm, m; | |
| 4735 src.split_code(&vm, &m); | |
| 4736 int sz = static_cast<int>(size); | |
| 4737 int op = static_cast<int>(Neon64) - static_cast<int>(op_size); | |
| 4738 return 0x1E7U * B23 | d * B22 | 0x3 * B20 | sz * B18 | vd * B12 | op * B7 | | |
| 4739 B6 | m * B5 | vm; | |
| 4740 } | 4765 } |
| 4741 | 4766 |
| 4742 void Assembler::vrev16(NeonSize size, const QwNeonRegister dst, | 4767 void Assembler::vrev32(NeonSize size, QwNeonRegister dst, QwNeonRegister src) { |
| 4743 const QwNeonRegister src) { | |
| 4744 DCHECK(IsEnabled(NEON)); | 4768 DCHECK(IsEnabled(NEON)); |
| 4745 emit(EncodeNeonVREV(Neon16, size, dst, src)); | 4769 // Qd = vrev<op_size>.<size>(Qn, Qm) SIMD scalar reverse. |
| 4770 // Instruction details available in ARM DDI 0406C.b, A8-1028. | |
| 4771 emit(EncodeNeonSizedOp(VREV32, size, dst, src)); | |
| 4746 } | 4772 } |
| 4747 | 4773 |
| 4748 void Assembler::vrev32(NeonSize size, const QwNeonRegister dst, | 4774 void Assembler::vrev64(NeonSize size, QwNeonRegister dst, QwNeonRegister src) { |
| 4749 const QwNeonRegister src) { | |
| 4750 DCHECK(IsEnabled(NEON)); | 4775 DCHECK(IsEnabled(NEON)); |
| 4751 emit(EncodeNeonVREV(Neon32, size, dst, src)); | 4776 // Qd = vrev<op_size>.<size>(Qn, Qm) SIMD scalar reverse. |
| 4777 // Instruction details available in ARM DDI 0406C.b, A8-1028. | |
| 4778 emit(EncodeNeonSizedOp(VREV64, size, dst, src)); | |
| 4752 } | 4779 } |
| 4753 | 4780 |
| 4754 void Assembler::vrev64(NeonSize size, const QwNeonRegister dst, | 4781 void Assembler::vtrn(NeonSize size, QwNeonRegister src1, QwNeonRegister src2) { |
| 4755 const QwNeonRegister src) { | |
| 4756 DCHECK(IsEnabled(NEON)); | 4782 DCHECK(IsEnabled(NEON)); |
| 4757 emit(EncodeNeonVREV(Neon64, size, dst, src)); | 4783 // Qd = vrev<op_size>.<size>(Qn, Qm) SIMD scalar reverse. |
| 4784 // Instruction details available in ARM DDI 0406C.b, A8-1096. | |
| 4785 emit(EncodeNeonSizedOp(VTRN, size, src1, src2)); | |
| 4758 } | 4786 } |
| 4759 | 4787 |
| 4760 // Encode NEON vtbl / vtbx instruction. | 4788 // Encode NEON vtbl / vtbx instruction. |
| 4761 static Instr EncodeNeonVTB(const DwVfpRegister dst, const NeonListOperand& list, | 4789 static Instr EncodeNeonVTB(DwVfpRegister dst, const NeonListOperand& list, |
| 4762 const DwVfpRegister index, bool vtbx) { | 4790 DwVfpRegister index, bool vtbx) { |
| 4763 // Dd = vtbl(table, Dm) SIMD vector permute, zero at out of range indices. | 4791 // Dd = vtbl(table, Dm) SIMD vector permute, zero at out of range indices. |
| 4764 // Instruction details available in ARM DDI 0406C.b, A8-1094. | 4792 // Instruction details available in ARM DDI 0406C.b, A8-1094. |
| 4765 // Dd = vtbx(table, Dm) SIMD vector permute, skip out of range indices. | 4793 // Dd = vtbx(table, Dm) SIMD vector permute, skip out of range indices. |
| 4766 // Instruction details available in ARM DDI 0406C.b, A8-1094. | 4794 // Instruction details available in ARM DDI 0406C.b, A8-1094. |
| 4767 int vd, d; | 4795 int vd, d; |
| 4768 dst.split_code(&vd, &d); | 4796 dst.split_code(&vd, &d); |
| 4769 int vn, n; | 4797 int vn, n; |
| 4770 list.base().split_code(&vn, &n); | 4798 list.base().split_code(&vn, &n); |
| 4771 int vm, m; | 4799 int vm, m; |
| 4772 index.split_code(&vm, &m); | 4800 index.split_code(&vm, &m); |
| 4773 int op = vtbx ? 1 : 0; // vtbl = 0, vtbx = 1. | 4801 int op = vtbx ? 1 : 0; // vtbl = 0, vtbx = 1. |
| 4774 return 0x1E7U * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 | 0x2 * B10 | | 4802 return 0x1E7U * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 | 0x2 * B10 | |
| 4775 list.length() * B8 | n * B7 | op * B6 | m * B5 | vm; | 4803 list.length() * B8 | n * B7 | op * B6 | m * B5 | vm; |
| 4776 } | 4804 } |
| 4777 | 4805 |
| 4778 void Assembler::vtbl(const DwVfpRegister dst, const NeonListOperand& list, | 4806 void Assembler::vtbl(DwVfpRegister dst, const NeonListOperand& list, |
| 4779 const DwVfpRegister index) { | 4807 DwVfpRegister index) { |
| 4780 DCHECK(IsEnabled(NEON)); | 4808 DCHECK(IsEnabled(NEON)); |
| 4781 emit(EncodeNeonVTB(dst, list, index, false)); | 4809 emit(EncodeNeonVTB(dst, list, index, false)); |
| 4782 } | 4810 } |
| 4783 | 4811 |
| 4784 void Assembler::vtbx(const DwVfpRegister dst, const NeonListOperand& list, | 4812 void Assembler::vtbx(DwVfpRegister dst, const NeonListOperand& list, |
| 4785 const DwVfpRegister index) { | 4813 DwVfpRegister index) { |
| 4786 DCHECK(IsEnabled(NEON)); | 4814 DCHECK(IsEnabled(NEON)); |
| 4787 emit(EncodeNeonVTB(dst, list, index, true)); | 4815 emit(EncodeNeonVTB(dst, list, index, true)); |
| 4788 } | 4816 } |
| 4789 | 4817 |
| 4790 // Pseudo instructions. | 4818 // Pseudo instructions. |
| 4791 void Assembler::nop(int type) { | 4819 void Assembler::nop(int type) { |
| 4792 // ARMv6{K/T2} and v7 have an actual NOP instruction but it serializes | 4820 // ARMv6{K/T2} and v7 have an actual NOP instruction but it serializes |
| 4793 // some of the CPU's pipeline and has to issue. Older ARM chips simply used | 4821 // some of the CPU's pipeline and has to issue. Older ARM chips simply used |
| 4794 // MOV Rx, Rx as NOP and it performs better even in newer CPUs. | 4822 // MOV Rx, Rx as NOP and it performs better even in newer CPUs. |
| 4795 // We therefore use MOV Rx, Rx, even on newer CPUs, and use Rx to encode | 4823 // We therefore use MOV Rx, Rx, even on newer CPUs, and use Rx to encode |
| (...skipping 538 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 5334 DCHECK(is_uint12(offset)); | 5362 DCHECK(is_uint12(offset)); |
| 5335 instr_at_put(pc, SetLdrRegisterImmediateOffset(instr, offset)); | 5363 instr_at_put(pc, SetLdrRegisterImmediateOffset(instr, offset)); |
| 5336 } | 5364 } |
| 5337 } | 5365 } |
| 5338 | 5366 |
| 5339 | 5367 |
| 5340 } // namespace internal | 5368 } // namespace internal |
| 5341 } // namespace v8 | 5369 } // namespace v8 |
| 5342 | 5370 |
| 5343 #endif // V8_TARGET_ARCH_ARM | 5371 #endif // V8_TARGET_ARCH_ARM |
| OLD | NEW |