Chromium Code Reviews| Index: src/arm/macro-assembler-arm.cc |
| diff --git a/src/arm/macro-assembler-arm.cc b/src/arm/macro-assembler-arm.cc |
| index 076c0429a3e30a57265209b42a23623cf3d30159..882da13b9c93bea727d55e4e09fb5be0c2c0d746 100644 |
| --- a/src/arm/macro-assembler-arm.cc |
| +++ b/src/arm/macro-assembler-arm.cc |
| @@ -3793,36 +3793,65 @@ void MacroAssembler::ClampUint8(Register output_reg, Register input_reg) { |
| void MacroAssembler::ClampDoubleToUint8(Register result_reg, |
| DwVfpRegister input_reg, |
| LowDwVfpRegister double_scratch) { |
| - Label above_zero; |
| Label done; |
| - Label in_bounds; |
| - VFPCompareAndSetFlags(input_reg, 0.0); |
| - b(gt, &above_zero); |
| - |
| - // Double value is less than zero, NaN or Inf, return 0. |
| mov(result_reg, Operand::Zero()); |
| - b(al, &done); |
| + VFPCompareAndSetFlags(input_reg, 0.0); |
| + b(le, &done); // Double value is <= 0, NaN or Inf, return 0. |
|
jbramley
2014/04/03 09:22:48
Actually there are only two input ranges where vcv
|
| - // Double value is >= 255, return 255. |
| - bind(&above_zero); |
| Vmov(double_scratch, 255.0, result_reg); |
| - VFPCompareAndSetFlags(input_reg, double_scratch); |
| - b(le, &in_bounds); |
| mov(result_reg, Operand(255)); |
| - b(al, &done); |
| + VFPCompareAndSetFlags(input_reg, double_scratch); |
| + b(ge, &done); // Double value is >= 255, return 255. |
| // In 0-255 range, round and truncate. |
| - bind(&in_bounds); |
| - // Save FPSCR. |
| - vmrs(ip); |
| - // Set rounding mode to round to the nearest integer by clearing bits[23:22]. |
|
jbramley
2014/04/03 09:22:48
Shouldn't we be in the right rounding mode already
oetuaho-nv
2014/04/03 16:02:46
This would be an excellent solution, but it seems
|
| - bic(result_reg, ip, Operand(kVFPRoundingModeMask)); |
| - vmsr(result_reg); |
| - vcvt_s32_f64(double_scratch.low(), input_reg, kFPSCRRounding); |
| - vmov(result_reg, double_scratch.low()); |
| - // Restore FPSCR. |
| - vmsr(ip); |
| + if (CpuFeatures::IsSupported(VFP3)) { |
| + // Setting rounding mode is expensive on some ARM CPUs, hence this more |
| + // convoluted solution of converting to fixed point, adding 0.5, and |
| + // handling ties explicitly. |
| + Label truncate; |
| + |
| + // If any of the low 32 bits in the mantissa is one, the fraction part |
| + // can't be exactly 0.5. |
| + VmovLow(result_reg, input_reg); |
| + tst(result_reg, Operand(0xFFFFFFFF)); |
| + |
| + // Convert to fixed point in format 0xII.DDDDDD |
| + vmov(double_scratch, input_reg); |
| + vcvt_u32_f64(double_scratch, 24); |
| + VmovLow(result_reg, double_scratch); |
| + |
| + // Add 0.5 in fixed point, guaranteed not to overflow since result_reg is |
| + // at most 0xFF000000 at this point. |
| + add(result_reg, result_reg, Operand(0x00800000)); |
| + |
| + // Proceed to truncate if we determined earlier that there can't be a tie. |
| + b(ne, &truncate); |
| + |
| + // We have already tested the 32 low bits, now test the 24 highest fraction |
| + // bits. In case the number is at least 0.5, these two tests together are |
| + // guaranteed to cover all the bits in the 52-bit mantissa. In case the |
| + // number is smaller than 0.5, the bic is a no-op. |
| + tst(result_reg, Operand(0x00FFFFFF)); |
| + // If all the decimals were 0, we had a tie, and the result should be even. |
| + bic(result_reg, result_reg, Operand(0x01000000), LeaveCC, eq); |
| + |
| + bind(&truncate); |
| + // Shift out all the fraction bits. |
| + mov(result_reg, Operand(result_reg, LSR, 24)); |
| + } else { |
| + // Save FPSCR. |
| + vmrs(ip); |
| + // Set rounding mode to round to the nearest integer by clearing |
| + // bits[23:22]. |
| + bic(result_reg, ip, Operand(kVFPRoundingModeMask)); |
| + vmsr(result_reg); |
| + vcvt_s32_f64(double_scratch.low(), input_reg, kFPSCRRounding); |
| + vmov(result_reg, double_scratch.low()); |
| + // Restore FPSCR. |
| + vmsr(ip); |
| + } |
| + |
| bind(&done); |
| } |