Chromium Code Reviews| Index: src/arm/simulator-arm.cc |
| diff --git a/src/arm/simulator-arm.cc b/src/arm/simulator-arm.cc |
| index 5b0c7d603eec3f47d852eda7503a38c1b7ee8f20..7054dd4045d4aed5cf13e661efa797399b3bacec 100644 |
| --- a/src/arm/simulator-arm.cc |
| +++ b/src/arm/simulator-arm.cc |
| @@ -3067,6 +3067,7 @@ void Simulator::DecodeType7(Instruction* instr) { |
| // Dd = vsqrt(Dm) |
| // Sd = vsqrt(Sm) |
| // vmrs |
| +// vdup.size Qd, Rt. |
| void Simulator::DecodeTypeVFP(Instruction* instr) { |
| DCHECK((instr->TypeValue() == 7) && (instr->Bit(24) == 0x0) ); |
| DCHECK(instr->Bits(11, 9) == 0x5); |
| @@ -3277,24 +3278,116 @@ void Simulator::DecodeTypeVFP(Instruction* instr) { |
| if ((instr->VCValue() == 0x0) && |
| (instr->VAValue() == 0x0)) { |
| DecodeVMOVBetweenCoreAndSinglePrecisionRegisters(instr); |
| - } else if ((instr->VLValue() == 0x0) && |
| - (instr->VCValue() == 0x1) && |
| - (instr->Bit(23) == 0x0)) { |
| - // vmov (ARM core register to scalar) |
| - int vd = instr->Bits(19, 16) | (instr->Bit(7) << 4); |
| - uint32_t data[2]; |
| - get_d_register(vd, data); |
| - data[instr->Bit(21)] = get_register(instr->RtValue()); |
| - set_d_register(vd, data); |
| - } else if ((instr->VLValue() == 0x1) && |
| - (instr->VCValue() == 0x1) && |
| - (instr->Bit(23) == 0x0)) { |
| + } else if ((instr->VLValue() == 0x0) && (instr->VCValue() == 0x1)) { |
| + if (instr->Bit(23) == 0) { |
| + // vmov (ARM core register to scalar) |
| + int vd = instr->VFPNRegValue(kDoublePrecision); |
| + int rt = instr->RtValue(); |
| + int opc1_opc2 = (instr->Bits(22, 21) << 2) | instr->Bits(6, 5); |
| + if ((opc1_opc2 & 0xb) == 0) { |
| + // NeonS32/NeonU32 |
| + uint32_t data[2]; |
| + get_d_register(vd, data); |
| + data[instr->Bit(21)] = get_register(rt); |
| + set_d_register(vd, data); |
| + } else { |
| + uint64_t data; |
| + get_d_register(vd, &data); |
| + uint64_t rt_value = get_register(rt); |
| + if ((opc1_opc2 & 0x8) != 0) { |
| + // NeonS8 / NeonU8 |
| + int i = opc1_opc2 & 0x7; |
| + int shift = i * kBitsPerByte; |
| + const uint64_t mask = 0xFF; |
| + data &= ~(mask << shift); |
| + data |= (rt_value & mask) << shift; |
| + set_d_register(vd, &data); |
| + } else if ((opc1_opc2 & 0x1) != 0) { |
| + // NeonS16 / NeonU16 |
| + int i = (opc1_opc2 >> 1) & 0x3; |
| + int shift = i * kBitsPerByte * kShortSize; |
| + const uint64_t mask = 0xFFFF; |
| + data &= ~(mask << shift); |
| + data |= (rt_value & mask) << shift; |
| + set_d_register(vd, &data); |
| + } else { |
| + UNREACHABLE(); // Not used by V8. |
| + } |
| + } |
| + } else { |
| + // vdup.size Qd, Rt. |
| + NeonSize size = Neon32; |
| + if (instr->Bit(5) != 0) |
| + size = Neon16; |
| + else if (instr->Bit(22) != 0) |
| + size = Neon8; |
| + int vd = instr->VFPNRegValue(kSimd128Precision); |
| + int rt = instr->RtValue(); |
| + uint32_t rt_value = get_register(rt); |
| + uint32_t q_data[4]; |
| + switch (size) { |
| + case Neon8: { |
| + rt_value &= 0xFF; |
| + uint8_t* dst = reinterpret_cast<uint8_t*>(q_data); |
| + for (int i = 0; i < 16; i++) { |
| + dst[i] = rt_value; |
| + } |
| + break; |
| + } |
| + case Neon16: { |
| + // Perform pairwise ops instead of casting to uint16_t. |
| + rt_value &= 0xFFFFu; |
| + uint32_t rt_rt = (rt_value << 16) | (rt_value & 0xFFFFu); |
| + for (int i = 0; i < 4; i++) { |
| + q_data[i] = rt_rt; |
| + } |
| + break; |
| + } |
| + case Neon32: { |
| + for (int i = 0; i < 4; i++) { |
| + q_data[i] = rt_value; |
| + } |
| + break; |
| + } |
| + default: |
| + UNREACHABLE(); |
| + break; |
| + } |
| + set_q_register(vd, q_data); |
| + } |
| + } else if ((instr->VLValue() == 0x1) && (instr->VCValue() == 0x1)) { |
| // vmov (scalar to ARM core register) |
| - int vn = instr->Bits(19, 16) | (instr->Bit(7) << 4); |
| - double dn_value = get_double_from_d_register(vn); |
| - int32_t data[2]; |
| - memcpy(data, &dn_value, 8); |
| - set_register(instr->RtValue(), data[instr->Bit(21)]); |
| + int vn = instr->VFPNRegValue(kDoublePrecision); |
| + int rt = instr->RtValue(); |
| + int opc1_opc2 = (instr->Bits(22, 21) << 2) | instr->Bits(6, 5); |
| + if ((opc1_opc2 & 0xb) == 0) { |
| + // NeonS32 / NeonU32 |
| + double dn_value = get_double_from_d_register(vn); |
| + int32_t data[2]; |
| + memcpy(data, &dn_value, 8); |
| + set_register(rt, data[instr->Bit(21)]); |
| + } else { |
| + uint64_t data; |
| + get_d_register(vn, &data); |
| + bool u = instr->Bit(23) != 0; |
| + if ((opc1_opc2 & 0x8) != 0) { |
| + // NeonS8 / NeonU8 |
| + int i = opc1_opc2 & 0x7; |
| + int shift = i * kBitsPerByte; |
| + uint32_t scalar = (data >> shift) & 0xFFu; |
| + if (!u && (scalar & 0x80) != 0) scalar |= 0xffffff00; |
| + set_register(rt, scalar); |
| + } else if ((opc1_opc2 & 0x1) != 0) { |
| + // NeonS16 / NeonU16 |
| + int i = (opc1_opc2 >> 1) & 0x3; |
| + int shift = i * kBitsPerByte * kShortSize; |
| + uint32_t scalar = (data >> shift) & 0xFFFFu; |
| + if (!u && (scalar & 0x8000) != 0) scalar |= 0xffff0000; |
| + set_register(rt, scalar); |
| + } else { |
| + UNREACHABLE(); // Not used by V8. |
| + } |
| + } |
| } else if ((instr->VLValue() == 0x1) && |
| (instr->VCValue() == 0x0) && |
| (instr->VAValue() == 0x7) && |
| @@ -3520,6 +3613,48 @@ int VFPConversionSaturate(double val, bool unsigned_res) { |
| } |
| } |
| +int32_t Simulator::ConvertDoubleToInt(double val, bool unsigned_integer, |
| + VFPRoundingMode mode, bool neon) { |
|
Rodolph Perfetta (ARM)
2016/12/14 20:27:07
nit: neon is unused.
bbudge
2016/12/14 23:50:52
Done.
|
| + int32_t result = |
| + unsigned_integer ? static_cast<uint32_t>(val) : static_cast<int32_t>(val); |
| + |
| + inv_op_vfp_flag_ = get_inv_op_vfp_flag(mode, val, unsigned_integer); |
| + |
| + double abs_diff = unsigned_integer |
| + ? std::fabs(val - static_cast<uint32_t>(result)) |
| + : std::fabs(val - result); |
| + |
| + inexact_vfp_flag_ = (abs_diff != 0); |
| + |
| + if (inv_op_vfp_flag_) { |
| + result = VFPConversionSaturate(val, unsigned_integer); |
| + } else { |
| + switch (mode) { |
| + case RN: { |
| + int val_sign = (val > 0) ? 1 : -1; |
| + if (abs_diff > 0.5) { |
| + result += val_sign; |
| + } else if (abs_diff == 0.5) { |
| + // Round to even if exactly halfway. |
| + result = ((result % 2) == 0) ? result : result + val_sign; |
| + } |
| + break; |
| + } |
| + |
| + case RM: |
| + result = result > val ? result - 1 : result; |
| + break; |
| + |
| + case RZ: |
| + // Nothing to do. |
| + break; |
| + |
| + default: |
| + UNREACHABLE(); |
| + } |
| + } |
| + return result; |
| +} |
| void Simulator::DecodeVCVTBetweenFloatingPointAndInteger(Instruction* instr) { |
| DCHECK((instr->Bit(4) == 0) && (instr->Opc1Value() == 0x7) && |
| @@ -3556,44 +3691,7 @@ void Simulator::DecodeVCVTBetweenFloatingPointAndInteger(Instruction* instr) { |
| double val = double_precision ? get_double_from_d_register(src) |
| : get_float_from_s_register(src); |
| - int temp = unsigned_integer ? static_cast<uint32_t>(val) |
| - : static_cast<int32_t>(val); |
| - |
| - inv_op_vfp_flag_ = get_inv_op_vfp_flag(mode, val, unsigned_integer); |
| - |
| - double abs_diff = |
| - unsigned_integer ? std::fabs(val - static_cast<uint32_t>(temp)) |
| - : std::fabs(val - temp); |
| - |
| - inexact_vfp_flag_ = (abs_diff != 0); |
| - |
| - if (inv_op_vfp_flag_) { |
| - temp = VFPConversionSaturate(val, unsigned_integer); |
| - } else { |
| - switch (mode) { |
| - case RN: { |
| - int val_sign = (val > 0) ? 1 : -1; |
| - if (abs_diff > 0.5) { |
| - temp += val_sign; |
| - } else if (abs_diff == 0.5) { |
| - // Round to even if exactly halfway. |
| - temp = ((temp % 2) == 0) ? temp : temp + val_sign; |
| - } |
| - break; |
| - } |
| - |
| - case RM: |
| - temp = temp > val ? temp - 1 : temp; |
| - break; |
| - |
| - case RZ: |
| - // Nothing to do. |
| - break; |
| - |
| - default: |
| - UNREACHABLE(); |
| - } |
| - } |
| + int32_t temp = ConvertDoubleToInt(val, unsigned_integer, mode, false); |
| // Update the destination register. |
| set_s_register_from_sinteger(dst, temp); |
| @@ -3740,6 +3838,16 @@ void Simulator::DecodeType6CoprocessorIns(Instruction* instr) { |
| } |
| } |
| +#define HIGH_16(x) ((x) >> 16) |
| +#define LOW_16(x) ((x)&0xFFFFu) |
| +#define COMBINE_32(high, low) ((high) << 16 | (low)&0xFFFFu) |
| +#define PAIRWISE_OP(x, y, OP) \ |
| + COMBINE_32(OP(HIGH_16((x)), HIGH_16((y))), OP(LOW_16((x)), LOW_16((y)))) |
| + |
| +#define ADD_16(x, y) ((x) + (y)) |
| +#define SUB_16(x, y) ((x) - (y)) |
| +#define CEQ_16(x, y) ((x) == (y) ? 0xFFFFu : 0) |
| +#define TST_16(x, y) (((x) & (y)) != 0 ? 0xFFFFu : 0) |
| void Simulator::DecodeSpecialCondition(Instruction* instr) { |
| switch (instr->SpecialValue()) { |
| @@ -3752,6 +3860,91 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
| uint32_t data[4]; |
| get_q_register(Vm, data); |
| set_q_register(Vd, data); |
| + } else if (instr->Bits(11, 8) == 8) { |
| + // vadd/vtst |
| + int size = static_cast<NeonSize>(instr->Bits(21, 20)); |
| + int Vd = instr->VFPDRegValue(kSimd128Precision); |
| + int Vm = instr->VFPMRegValue(kSimd128Precision); |
| + int Vn = instr->VFPNRegValue(kSimd128Precision); |
| + uint32_t src1[4], src2[4]; |
| + get_q_register(Vn, src1); |
| + get_q_register(Vm, src2); |
| + if (instr->Bit(4) == 0) { |
| + // vadd.i<size> Qd, Qm, Qn. |
| + switch (size) { |
| + case Neon8: { |
| + uint8_t* s1 = reinterpret_cast<uint8_t*>(src1); |
| + uint8_t* s2 = reinterpret_cast<uint8_t*>(src2); |
| + for (int i = 0; i < 16; i++) { |
| + s1[i] += s2[i]; |
| + } |
| + break; |
| + } |
| + case Neon16: { |
| + for (int i = 0; i < 4; i++) { |
| + src1[i] = PAIRWISE_OP(src1[i], src2[i], ADD_16); |
| + } |
| + break; |
| + } |
| + case Neon32: { |
| + for (int i = 0; i < 4; i++) { |
| + src1[i] += src2[i]; |
| + } |
| + break; |
| + } |
| + default: |
| + UNREACHABLE(); |
| + break; |
| + } |
| + } else { |
| + // vtst.i<size> Qd, Qm, Qn. |
| + switch (size) { |
| + case Neon8: { |
| + uint8_t* s1 = reinterpret_cast<uint8_t*>(src1); |
| + uint8_t* s2 = reinterpret_cast<uint8_t*>(src2); |
| + for (int i = 0; i < 16; i++) { |
| + s1[i] = (s1[i] & s2[i]) != 0 ? 0xFFu : 0; |
| + } |
| + break; |
| + } |
| + case Neon16: { |
| + for (int i = 0; i < 4; i++) { |
| + src1[i] = PAIRWISE_OP(src1[i], src2[i], TST_16); |
| + } |
| + break; |
| + } |
| + case Neon32: { |
| + for (int i = 0; i < 4; i++) { |
| + src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFFFFFFFu : 0; |
| + } |
| + break; |
| + } |
| + default: |
| + UNREACHABLE(); |
| + break; |
| + } |
| + } |
| + set_q_register(Vd, src1); |
| + } else if (instr->Bit(20) == 0 && instr->Bits(11, 8) == 0xd && |
| + instr->Bit(4) == 0) { |
| + int Vd = instr->VFPDRegValue(kSimd128Precision); |
| + int Vm = instr->VFPMRegValue(kSimd128Precision); |
| + int Vn = instr->VFPNRegValue(kSimd128Precision); |
| + uint32_t src1[4], src2[4]; |
| + get_q_register(Vn, src1); |
| + get_q_register(Vm, src2); |
| + for (int i = 0; i < 4; i++) { |
| + if (instr->Bit(21) == 0) { |
| + // vadd.f32 Qd, Qm, Qn. |
| + src1[i] = bit_cast<uint32_t>(bit_cast<float>(src1[i]) + |
| + bit_cast<float>(src2[i])); |
| + } else { |
| + // vsub.f32 Qd, Qm, Qn. |
| + src1[i] = bit_cast<uint32_t>(bit_cast<float>(src1[i]) - |
| + bit_cast<float>(src2[i])); |
| + } |
| + } |
| + set_q_register(Vd, src1); |
| } else { |
| UNIMPLEMENTED(); |
| } |
| @@ -3781,8 +3974,92 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
| } |
| break; |
| case 6: |
| - if (instr->Bits(21, 20) == 0 && instr->Bits(11, 8) == 1 && |
| - instr->Bit(4) == 1) { |
| + if (instr->Bits(11, 8) == 8 && instr->Bit(4) == 0) { |
| + // vsub.size Qd, Qm, Qn. |
| + int size = static_cast<NeonSize>(instr->Bits(21, 20)); |
| + int Vd = instr->VFPDRegValue(kSimd128Precision); |
| + int Vm = instr->VFPMRegValue(kSimd128Precision); |
| + int Vn = instr->VFPNRegValue(kSimd128Precision); |
| + uint32_t src1[4], src2[4]; |
| + get_q_register(Vn, src1); |
| + get_q_register(Vm, src2); |
| + switch (size) { |
| + case Neon8: { |
| + uint8_t* s1 = reinterpret_cast<uint8_t*>(src1); |
| + uint8_t* s2 = reinterpret_cast<uint8_t*>(src2); |
| + for (int i = 0; i < 16; i++) { |
| + s1[i] -= s2[i]; |
| + } |
| + break; |
| + } |
| + case Neon16: { |
| + for (int i = 0; i < 4; i++) { |
| + src1[i] = PAIRWISE_OP(src1[i], src2[i], SUB_16); |
| + } |
| + break; |
| + } |
| + case Neon32: { |
| + for (int i = 0; i < 4; i++) { |
| + src1[i] -= src2[i]; |
| + } |
| + break; |
| + } |
| + default: |
| + UNREACHABLE(); |
| + break; |
| + } |
| + set_q_register(Vd, src1); |
| + } else if (instr->Bits(11, 8) == 8 && instr->Bit(4) == 1) { |
| + // vceq.size Qd, Qm, Qn. |
| + int size = static_cast<NeonSize>(instr->Bits(21, 20)); |
| + int Vd = instr->VFPDRegValue(kSimd128Precision); |
| + int Vm = instr->VFPMRegValue(kSimd128Precision); |
| + int Vn = instr->VFPNRegValue(kSimd128Precision); |
| + uint32_t src1[4], src2[4]; |
| + get_q_register(Vn, src1); |
| + get_q_register(Vm, src2); |
| + switch (size) { |
| + case Neon8: { |
| + uint8_t* s1 = reinterpret_cast<uint8_t*>(src1); |
| + uint8_t* s2 = reinterpret_cast<uint8_t*>(src2); |
| + for (int i = 0; i < 16; i++) { |
| + s1[i] = s1[i] == s2[i] ? 0xFF : 0; |
| + } |
| + break; |
| + } |
| + case Neon16: { |
| + for (int i = 0; i < 4; i++) { |
| + src1[i] = PAIRWISE_OP(src1[i], src2[i], CEQ_16); |
| + } |
| + break; |
| + } |
| + case Neon32: { |
| + for (int i = 0; i < 4; i++) { |
| + src1[i] = src1[i] == src2[i] ? 0xFFFFFFFF : 0; |
| + } |
| + break; |
| + } |
| + default: |
| + UNREACHABLE(); |
| + break; |
| + } |
| + set_q_register(Vd, src1); |
| + } else if (instr->Bits(21, 20) == 1 && instr->Bits(11, 8) == 1 && |
| + instr->Bit(4) == 1) { |
| + // vbsl.size Qd, Qm, Qn. |
| + int Vd = instr->VFPDRegValue(kSimd128Precision); |
| + int Vm = instr->VFPMRegValue(kSimd128Precision); |
| + int Vn = instr->VFPNRegValue(kSimd128Precision); |
| + uint32_t dst[4], src1[4], src2[4]; |
| + get_q_register(Vd, dst); |
| + get_q_register(Vn, src1); |
| + get_q_register(Vm, src2); |
| + for (int i = 0; i < 4; i++) { |
| + dst[i] = (dst[i] & src1[i]) | (~dst[i] & src2[i]); |
| + } |
| + set_q_register(Vd, dst); |
| + } else if (instr->Bits(21, 20) == 0 && instr->Bits(11, 8) == 1 && |
| + instr->Bit(4) == 1) { |
| if (instr->Bit(6) == 0) { |
| // veor Dd, Dn, Dm |
| int Vd = instr->VFPDRegValue(kDoublePrecision); |
| @@ -3829,6 +4106,40 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
| e++; |
| } |
| set_q_register(Vd, reinterpret_cast<uint64_t*>(to)); |
| + } else if (instr->Opc1Value() == 7 && instr->Bits(19, 16) == 0xB && |
| + instr->Bits(11, 9) == 0x3 && instr->Bit(6) == 1 && |
| + instr->Bit(4) == 0) { |
| + // vcvt.<Td>.<Tm> Qd, Qm. |
| + int Vd = instr->VFPDRegValue(kSimd128Precision); |
| + int Vm = instr->VFPMRegValue(kSimd128Precision); |
| + uint32_t q_data[4]; |
| + get_q_register(Vm, q_data); |
| + int op = instr->Bits(8, 7); |
| + for (int i = 0; i < 4; i++) { |
| + switch (op) { |
| + case 0: |
| + // f32 <- s32, round towards nearest. |
| + q_data[i] = bit_cast<uint32_t>( |
| + std::round(static_cast<float>(bit_cast<int32_t>(q_data[i])))); |
| + break; |
| + case 1: |
| + // f32 <- u32, round towards nearest. |
| + q_data[i] = |
| + bit_cast<uint32_t>(std::round(static_cast<float>(q_data[i]))); |
| + break; |
| + case 2: |
| + // s32 <- f32, round to zero. |
| + q_data[i] = static_cast<uint32_t>(ConvertDoubleToInt( |
| + bit_cast<float>(q_data[i]), false, RZ, true)); |
| + break; |
| + case 3: |
| + // u32 <- f32, round to zero. |
| + q_data[i] = static_cast<uint32_t>(ConvertDoubleToInt( |
| + bit_cast<float>(q_data[i]), true, RZ, true)); |
| + break; |
| + } |
| + } |
| + set_q_register(Vd, q_data); |
| } else if ((instr->Bits(21, 16) == 0x32) && (instr->Bits(11, 7) == 0) && |
| (instr->Bit(4) == 0)) { |
| if (instr->Bit(6) == 0) { |
| @@ -3850,6 +4161,49 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
| set_q_register(vm, dval); |
| set_q_register(vd, mval); |
| } |
| + } else if (instr->Opc1Value() == 0x7 && instr->Bits(11, 7) == 0x18 && |
| + instr->Bit(4) == 0x0) { |
| + // vdup.32 Qd, Sm. |
| + int vd = instr->VFPDRegValue(kSimd128Precision); |
| + int vm = instr->VFPMRegValue(kDoublePrecision); |
| + int index = instr->Bit(19); |
| + uint32_t s_data = get_s_register(vm * 2 + index); |
| + uint32_t q_data[4]; |
| + for (int i = 0; i < 4; i++) q_data[i] = s_data; |
| + set_q_register(vd, q_data); |
| + } else if (instr->Opc1Value() == 7 && instr->Bits(19, 16) == 0 && |
| + instr->Bits(11, 6) == 0x17 && instr->Bit(4) == 0) { |
| + // vmvn Qd, Qm. |
| + int vd = instr->VFPDRegValue(kSimd128Precision); |
| + int vm = instr->VFPMRegValue(kSimd128Precision); |
| + uint32_t q_data[4]; |
| + get_q_register(vm, q_data); |
| + for (int i = 0; i < 4; i++) q_data[i] = ~q_data[i]; |
| + set_q_register(vd, q_data); |
| + } else if (instr->Opc1Value() == 0x7 && instr->Bits(11, 10) == 0x2 && |
| + instr->Bit(4) == 0x0) { |
| + // vtb[l,x] Dd, <list>, Dm. |
| + int vd = instr->VFPDRegValue(kDoublePrecision); |
| + int vn = instr->VFPNRegValue(kDoublePrecision); |
| + int vm = instr->VFPMRegValue(kDoublePrecision); |
| + int table_len = (instr->Bits(9, 8) + 1) * kDoubleSize; |
| + bool vtbx = instr->Bit(6) != 0; // vtbl / vtbx |
| + uint64_t destination = 0, indices = 0, result = 0; |
| + get_d_register(vd, &destination); |
| + get_d_register(vm, &indices); |
| + for (int i = 0; i < kDoubleSize; i++) { |
| + int shift = i * kBitsPerByte; |
| + int index = (indices >> shift) & 0xFF; |
| + if (index < table_len) { |
| + uint64_t table; |
| + get_d_register(vn + index / kDoubleSize, &table); |
| + result |= ((table >> ((index % kDoubleSize) * kBitsPerByte)) & 0xFF) |
| + << shift; |
| + } else if (vtbx) { |
| + result |= destination & (0xFFull << shift); |
| + } |
| + } |
| + set_d_register(vd, &result); |
| } else { |
| UNIMPLEMENTED(); |
| } |