src/arm/macro-assembler-arm.cc - Issue 222403002: ARM: Avoid VMSR instruction when converting to clamped uint8

Unified Diff: src/arm/macro-assembler-arm.cc

Issue 222403002: ARM: Avoid VMSR instruction when converting to clamped uint8 (Closed) Base URL: git://github.com/v8/v8.git@master

Patch Set: Created 6 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/arm/macro-assembler-arm.cc

diff --git a/src/arm/macro-assembler-arm.cc b/src/arm/macro-assembler-arm.cc

index 076c0429a3e30a57265209b42a23623cf3d30159..882da13b9c93bea727d55e4e09fb5be0c2c0d746 100644

--- a/src/arm/macro-assembler-arm.cc

+++ b/src/arm/macro-assembler-arm.cc

@@ -3793,36 +3793,65 @@ void MacroAssembler::ClampUint8(Register output_reg, Register input_reg) {

void MacroAssembler::ClampDoubleToUint8(Register result_reg,

DwVfpRegister input_reg,

LowDwVfpRegister double_scratch) {

- Label above_zero;

Label done;

- Label in_bounds;

- VFPCompareAndSetFlags(input_reg, 0.0);

- b(gt, &above_zero);

- // Double value is less than zero, NaN or Inf, return 0.

mov(result_reg, Operand::Zero());

- b(al, &done);

+ VFPCompareAndSetFlags(input_reg, 0.0);

+ b(le, &done); // Double value is <= 0, NaN or Inf, return 0.

jbramley 2014/04/03 09:22:48 Actually there are only two input ranges where vcv

- // Double value is >= 255, return 255.

- bind(&above_zero);

Vmov(double_scratch, 255.0, result_reg);

- VFPCompareAndSetFlags(input_reg, double_scratch);

- b(le, &in_bounds);

mov(result_reg, Operand(255));

- b(al, &done);

+ VFPCompareAndSetFlags(input_reg, double_scratch);

+ b(ge, &done); // Double value is >= 255, return 255.

// In 0-255 range, round and truncate.

- bind(&in_bounds);

- // Save FPSCR.

- vmrs(ip);

- // Set rounding mode to round to the nearest integer by clearing bits[23:22].

jbramley 2014/04/03 09:22:48 Shouldn't we be in the right rounding mode already

oetuaho-nv 2014/04/03 16:02:46 This would be an excellent solution, but it seems

- bic(result_reg, ip, Operand(kVFPRoundingModeMask));

- vmsr(result_reg);

- vcvt_s32_f64(double_scratch.low(), input_reg, kFPSCRRounding);

- vmov(result_reg, double_scratch.low());

- // Restore FPSCR.

- vmsr(ip);

+ if (CpuFeatures::IsSupported(VFP3)) {

+ // Setting rounding mode is expensive on some ARM CPUs, hence this more

+ // convoluted solution of converting to fixed point, adding 0.5, and

+ // handling ties explicitly.

+ Label truncate;

+ // If any of the low 32 bits in the mantissa is one, the fraction part

+ // can't be exactly 0.5.

+ VmovLow(result_reg, input_reg);

+ tst(result_reg, Operand(0xFFFFFFFF));

+ // Convert to fixed point in format 0xII.DDDDDD

+ vmov(double_scratch, input_reg);

+ vcvt_u32_f64(double_scratch, 24);

+ VmovLow(result_reg, double_scratch);

+ // Add 0.5 in fixed point, guaranteed not to overflow since result_reg is

+ // at most 0xFF000000 at this point.

+ add(result_reg, result_reg, Operand(0x00800000));

+ // Proceed to truncate if we determined earlier that there can't be a tie.

+ b(ne, &truncate);

+ // We have already tested the 32 low bits, now test the 24 highest fraction

+ // bits. In case the number is at least 0.5, these two tests together are

+ // guaranteed to cover all the bits in the 52-bit mantissa. In case the

+ // number is smaller than 0.5, the bic is a no-op.

+ tst(result_reg, Operand(0x00FFFFFF));

+ // If all the decimals were 0, we had a tie, and the result should be even.

+ bic(result_reg, result_reg, Operand(0x01000000), LeaveCC, eq);

+ bind(&truncate);

+ // Shift out all the fraction bits.

+ mov(result_reg, Operand(result_reg, LSR, 24));

+ } else {

+ // Save FPSCR.

+ vmrs(ip);

+ // Set rounding mode to round to the nearest integer by clearing

+ // bits[23:22].

+ bic(result_reg, ip, Operand(kVFPRoundingModeMask));

+ vmsr(result_reg);

+ vcvt_s32_f64(double_scratch.low(), input_reg, kFPSCRRounding);

+ vmov(result_reg, double_scratch.low());

+ // Restore FPSCR.

+ vmsr(ip);

+ }

bind(&done);

}

« no previous file with comments | « src/arm/disasm-arm.cc ('k') | src/arm/simulator-arm.h » ('j') | no next file with comments »