Index: src/arm/macro-assembler-arm.cc |
diff --git a/src/arm/macro-assembler-arm.cc b/src/arm/macro-assembler-arm.cc |
index 076c0429a3e30a57265209b42a23623cf3d30159..882da13b9c93bea727d55e4e09fb5be0c2c0d746 100644 |
--- a/src/arm/macro-assembler-arm.cc |
+++ b/src/arm/macro-assembler-arm.cc |
@@ -3793,36 +3793,65 @@ void MacroAssembler::ClampUint8(Register output_reg, Register input_reg) { |
void MacroAssembler::ClampDoubleToUint8(Register result_reg, |
DwVfpRegister input_reg, |
LowDwVfpRegister double_scratch) { |
- Label above_zero; |
Label done; |
- Label in_bounds; |
- VFPCompareAndSetFlags(input_reg, 0.0); |
- b(gt, &above_zero); |
- |
- // Double value is less than zero, NaN or Inf, return 0. |
mov(result_reg, Operand::Zero()); |
- b(al, &done); |
+ VFPCompareAndSetFlags(input_reg, 0.0); |
+ b(le, &done); // Double value is <= 0, NaN or Inf, return 0. |
jbramley
2014/04/03 09:22:48
Actually there are only two input ranges where vcv
|
- // Double value is >= 255, return 255. |
- bind(&above_zero); |
Vmov(double_scratch, 255.0, result_reg); |
- VFPCompareAndSetFlags(input_reg, double_scratch); |
- b(le, &in_bounds); |
mov(result_reg, Operand(255)); |
- b(al, &done); |
+ VFPCompareAndSetFlags(input_reg, double_scratch); |
+ b(ge, &done); // Double value is >= 255, return 255. |
// In 0-255 range, round and truncate. |
- bind(&in_bounds); |
- // Save FPSCR. |
- vmrs(ip); |
- // Set rounding mode to round to the nearest integer by clearing bits[23:22]. |
jbramley
2014/04/03 09:22:48
Shouldn't we be in the right rounding mode already
oetuaho-nv
2014/04/03 16:02:46
This would be an excellent solution, but it seems
|
- bic(result_reg, ip, Operand(kVFPRoundingModeMask)); |
- vmsr(result_reg); |
- vcvt_s32_f64(double_scratch.low(), input_reg, kFPSCRRounding); |
- vmov(result_reg, double_scratch.low()); |
- // Restore FPSCR. |
- vmsr(ip); |
+ if (CpuFeatures::IsSupported(VFP3)) { |
+ // Setting rounding mode is expensive on some ARM CPUs, hence this more |
+ // convoluted solution of converting to fixed point, adding 0.5, and |
+ // handling ties explicitly. |
+ Label truncate; |
+ |
+ // If any of the low 32 bits in the mantissa is one, the fraction part |
+ // can't be exactly 0.5. |
+ VmovLow(result_reg, input_reg); |
+ tst(result_reg, Operand(0xFFFFFFFF)); |
+ |
+ // Convert to fixed point in format 0xII.DDDDDD |
+ vmov(double_scratch, input_reg); |
+ vcvt_u32_f64(double_scratch, 24); |
+ VmovLow(result_reg, double_scratch); |
+ |
+ // Add 0.5 in fixed point, guaranteed not to overflow since result_reg is |
+ // at most 0xFF000000 at this point. |
+ add(result_reg, result_reg, Operand(0x00800000)); |
+ |
+ // Proceed to truncate if we determined earlier that there can't be a tie. |
+ b(ne, &truncate); |
+ |
+ // We have already tested the 32 low bits, now test the 24 highest fraction |
+ // bits. In case the number is at least 0.5, these two tests together are |
+ // guaranteed to cover all the bits in the 52-bit mantissa. In case the |
+ // number is smaller than 0.5, the bic is a no-op. |
+ tst(result_reg, Operand(0x00FFFFFF)); |
+ // If all the decimals were 0, we had a tie, and the result should be even. |
+ bic(result_reg, result_reg, Operand(0x01000000), LeaveCC, eq); |
+ |
+ bind(&truncate); |
+ // Shift out all the fraction bits. |
+ mov(result_reg, Operand(result_reg, LSR, 24)); |
+ } else { |
+ // Save FPSCR. |
+ vmrs(ip); |
+ // Set rounding mode to round to the nearest integer by clearing |
+ // bits[23:22]. |
+ bic(result_reg, ip, Operand(kVFPRoundingModeMask)); |
+ vmsr(result_reg); |
+ vcvt_s32_f64(double_scratch.low(), input_reg, kFPSCRRounding); |
+ vmov(result_reg, double_scratch.low()); |
+ // Restore FPSCR. |
+ vmsr(ip); |
+ } |
+ |
bind(&done); |
} |