src/arm/macro-assembler-arm.cc - Issue 222403002: ARM: Avoid VMSR instruction when converting to clamped uint8

Side by Side Diff: src/arm/macro-assembler-arm.cc

Issue 222403002: ARM: Avoid VMSR instruction when converting to clamped uint8 (Closed) Base URL: git://github.com/v8/v8.git@master

Patch Set: Created 6 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2012 the V8 project authors. All rights reserved.	1 // Copyright 2012 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 3775 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3786	3786

3787	3787

3788 void MacroAssembler::ClampUint8(Register output_reg, Register input_reg) {	3788 void MacroAssembler::ClampUint8(Register output_reg, Register input_reg) {

3789 Usat(output_reg, 8, Operand(input_reg));	3789 Usat(output_reg, 8, Operand(input_reg));

3790 }	3790 }

3791	3791

3792	3792

3793 void MacroAssembler::ClampDoubleToUint8(Register result_reg,	3793 void MacroAssembler::ClampDoubleToUint8(Register result_reg,

3794 DwVfpRegister input_reg,	3794 DwVfpRegister input_reg,

3795 LowDwVfpRegister double_scratch) {	3795 LowDwVfpRegister double_scratch) {

3796 Label above_zero;

3797 Label done;	3796 Label done;

3798 Label in_bounds;

3799	3797

	3798 mov(result_reg, Operand::Zero());

3800 VFPCompareAndSetFlags(input_reg, 0.0);	3799 VFPCompareAndSetFlags(input_reg, 0.0);

3801 b(gt, &above_zero);	3800 b(le, &done); // Double value is <= 0, NaN or Inf, return 0.
	jbramley 2014/04/03 09:22:48 Actually there are only two input ranges where vcv Actually there are only two input ranges where vcvt.u32 doesn't agree with this clamp operation: - Values greater than 255 (since vcvt only converts to (u)int32 values. - +infinity, where vcvt produces 0 but we want 255. We can catch both of these ranges with the >=255 test, so the <=0 test can be omitted entirely. (The old implementation used vcvt.s32 so the zero check was necessary there.)
3802	3801

3803 // Double value is less than zero, NaN or Inf, return 0.

3804 mov(result_reg, Operand::Zero());

3805 b(al, &done);

3806

3807 // Double value is >= 255, return 255.

3808 bind(&above_zero);

3809 Vmov(double_scratch, 255.0, result_reg);	3802 Vmov(double_scratch, 255.0, result_reg);

	3803 mov(result_reg, Operand(255));

3810 VFPCompareAndSetFlags(input_reg, double_scratch);	3804 VFPCompareAndSetFlags(input_reg, double_scratch);

3811 b(le, &in_bounds);	3805 b(ge, &done); // Double value is >= 255, return 255.

3812 mov(result_reg, Operand(255));

3813 b(al, &done);

3814	3806

3815 // In 0-255 range, round and truncate.	3807 // In 0-255 range, round and truncate.

3816 bind(&in_bounds);	3808 if (CpuFeatures::IsSupported(VFP3)) {

3817 // Save FPSCR.	3809 // Setting rounding mode is expensive on some ARM CPUs, hence this more

3818 vmrs(ip);	3810 // convoluted solution of converting to fixed point, adding 0.5, and

3819 // Set rounding mode to round to the nearest integer by clearing bits[23:22].	3811 // handling ties explicitly.
jbramley 2014/04/03 09:22:48 Shouldn't we be in the right rounding mode already Shouldn't we be in the right rounding mode already? ECMAScript maths operations are supposed to be done in round-to-nearest mode, and this is also the default FPSCR setting. If we've changed it explicitly somewhere else (for whatever reason), we're probably not doing normal ECMAScript maths properly. If I'm correct about that, the whole thing collapses down: // Handle inputs >= 255 (including +infinity). mov(result_reg, 255); Vmov(double_scratch, 255.0, result_reg); VFPCompareAndSetFlags(input_reg, double_scratch); b(ge, &done); // All other inputs will clamp to the range [0-255]: NaN and -infinity both produce 0. vcvt_u32_f64(double_scratch.low(), input_reg, kFPSCRRounding); Vmov(result_reg, double_scratch.low()); This is more-or-less equivalent to what we did in ClampDoubleToUint8 in src/arm64/macro-assembler-arm64.cc, though the available instructions make it much simpler in A64. oetuaho-nv 2014/04/03 16:02:46 This would be an excellent solution, but it seems Show quoted text On 2014/04/03 09:22:48, jbramley wrote: > Shouldn't we be in the right rounding mode already? ECMAScript maths operations > are supposed to be done in round-to-nearest mode, and this is also the default > FPSCR setting. If we've changed it explicitly somewhere else (for whatever > reason), we're probably not doing normal ECMAScript maths properly. > > If I'm correct about that, the whole thing collapses down: > > // Handle inputs >= 255 (including +infinity). > mov(result_reg, 255); > Vmov(double_scratch, 255.0, result_reg); > VFPCompareAndSetFlags(input_reg, double_scratch); > b(ge, &done); > > // All other inputs will clamp to the range [0-255]: NaN and -infinity both > produce 0. > vcvt_u32_f64(double_scratch.low(), input_reg, kFPSCRRounding); > Vmov(result_reg, double_scratch.low()); > > This is more-or-less equivalent to what we did in ClampDoubleToUint8 in > src/arm64/macro-assembler-arm64.cc, though the available instructions make it > much simpler in A64. This would be an excellent solution, but it seems the FPSCR state can be wrong when entering this function. VFPEnsureFPSCRState doesn't currently set the rounding mode, and I suppose it can be messed with by outside code. But if setting the FPSCR state in VFPEnsureFPSCRState will be enough, this solution could work.
3820 bic(result_reg, ip, Operand(kVFPRoundingModeMask));	3812 Label truncate;

3821 vmsr(result_reg);	3813

3822 vcvt_s32_f64(double_scratch.low(), input_reg, kFPSCRRounding);	3814 // If any of the low 32 bits in the mantissa is one, the fraction part

3823 vmov(result_reg, double_scratch.low());	3815 // can't be exactly 0.5.

3824 // Restore FPSCR.	3816 VmovLow(result_reg, input_reg);

3825 vmsr(ip);	3817 tst(result_reg, Operand(0xFFFFFFFF));

	3818

	3819 // Convert to fixed point in format 0xII.DDDDDD

	3820 vmov(double_scratch, input_reg);

	3821 vcvt_u32_f64(double_scratch, 24);

	3822 VmovLow(result_reg, double_scratch);

	3823

	3824 // Add 0.5 in fixed point, guaranteed not to overflow since result_reg is

	3825 // at most 0xFF000000 at this point.

	3826 add(result_reg, result_reg, Operand(0x00800000));

	3827

	3828 // Proceed to truncate if we determined earlier that there can't be a tie.

	3829 b(ne, &truncate);

	3830

	3831 // We have already tested the 32 low bits, now test the 24 highest fraction

	3832 // bits. In case the number is at least 0.5, these two tests together are

	3833 // guaranteed to cover all the bits in the 52-bit mantissa. In case the

	3834 // number is smaller than 0.5, the bic is a no-op.

	3835 tst(result_reg, Operand(0x00FFFFFF));

	3836 // If all the decimals were 0, we had a tie, and the result should be even.

	3837 bic(result_reg, result_reg, Operand(0x01000000), LeaveCC, eq);

	3838

	3839 bind(&truncate);

	3840 // Shift out all the fraction bits.

	3841 mov(result_reg, Operand(result_reg, LSR, 24));

	3842 } else {

	3843 // Save FPSCR.

	3844 vmrs(ip);

	3845 // Set rounding mode to round to the nearest integer by clearing

	3846 // bits[23:22].

	3847 bic(result_reg, ip, Operand(kVFPRoundingModeMask));

	3848 vmsr(result_reg);

	3849 vcvt_s32_f64(double_scratch.low(), input_reg, kFPSCRRounding);

	3850 vmov(result_reg, double_scratch.low());

	3851 // Restore FPSCR.

	3852 vmsr(ip);

	3853 }

	3854

3826 bind(&done);	3855 bind(&done);

3827 }	3856 }

3828	3857

3829	3858

3830 void MacroAssembler::Throw(BailoutReason reason) {	3859 void MacroAssembler::Throw(BailoutReason reason) {

3831 Label throw_start;	3860 Label throw_start;

3832 bind(&throw_start);	3861 bind(&throw_start);

3833 #ifdef DEBUG	3862 #ifdef DEBUG

3834 const char* msg = GetBailoutReason(reason);	3863 const char* msg = GetBailoutReason(reason);

3835 if (msg != NULL) {	3864 if (msg != NULL) {

(...skipping 255 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4091 sub(result, result, Operand(dividend));	4120 sub(result, result, Operand(dividend));

4092 }	4121 }

4093 if (ms.shift() > 0) mov(result, Operand(result, ASR, ms.shift()));	4122 if (ms.shift() > 0) mov(result, Operand(result, ASR, ms.shift()));

4094 add(result, result, Operand(dividend, LSR, 31));	4123 add(result, result, Operand(dividend, LSR, 31));

4095 }	4124 }

4096	4125

4097	4126

4098 } } // namespace v8::internal	4127 } } // namespace v8::internal

4099	4128

4100 #endif // V8_TARGET_ARCH_ARM	4129 #endif // V8_TARGET_ARCH_ARM

OLD	NEW

« no previous file with comments | « src/arm/disasm-arm.cc ('k') | src/arm/simulator-arm.h » ('j') | no next file with comments »