Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(36)

Unified Diff: src/arm/macro-assembler-arm.cc

Issue 222403002: ARM: Avoid VMSR instruction when converting to clamped uint8 (Closed) Base URL: git://github.com/v8/v8.git@master
Patch Set: Created 6 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/arm/disasm-arm.cc ('k') | src/arm/simulator-arm.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/arm/macro-assembler-arm.cc
diff --git a/src/arm/macro-assembler-arm.cc b/src/arm/macro-assembler-arm.cc
index 076c0429a3e30a57265209b42a23623cf3d30159..882da13b9c93bea727d55e4e09fb5be0c2c0d746 100644
--- a/src/arm/macro-assembler-arm.cc
+++ b/src/arm/macro-assembler-arm.cc
@@ -3793,36 +3793,65 @@ void MacroAssembler::ClampUint8(Register output_reg, Register input_reg) {
void MacroAssembler::ClampDoubleToUint8(Register result_reg,
DwVfpRegister input_reg,
LowDwVfpRegister double_scratch) {
- Label above_zero;
Label done;
- Label in_bounds;
- VFPCompareAndSetFlags(input_reg, 0.0);
- b(gt, &above_zero);
-
- // Double value is less than zero, NaN or Inf, return 0.
mov(result_reg, Operand::Zero());
- b(al, &done);
+ VFPCompareAndSetFlags(input_reg, 0.0);
+ b(le, &done); // Double value is <= 0, NaN or Inf, return 0.
jbramley 2014/04/03 09:22:48 Actually there are only two input ranges where vcv
- // Double value is >= 255, return 255.
- bind(&above_zero);
Vmov(double_scratch, 255.0, result_reg);
- VFPCompareAndSetFlags(input_reg, double_scratch);
- b(le, &in_bounds);
mov(result_reg, Operand(255));
- b(al, &done);
+ VFPCompareAndSetFlags(input_reg, double_scratch);
+ b(ge, &done); // Double value is >= 255, return 255.
// In 0-255 range, round and truncate.
- bind(&in_bounds);
- // Save FPSCR.
- vmrs(ip);
- // Set rounding mode to round to the nearest integer by clearing bits[23:22].
jbramley 2014/04/03 09:22:48 Shouldn't we be in the right rounding mode already
oetuaho-nv 2014/04/03 16:02:46 This would be an excellent solution, but it seems
- bic(result_reg, ip, Operand(kVFPRoundingModeMask));
- vmsr(result_reg);
- vcvt_s32_f64(double_scratch.low(), input_reg, kFPSCRRounding);
- vmov(result_reg, double_scratch.low());
- // Restore FPSCR.
- vmsr(ip);
+ if (CpuFeatures::IsSupported(VFP3)) {
+ // Setting rounding mode is expensive on some ARM CPUs, hence this more
+ // convoluted solution of converting to fixed point, adding 0.5, and
+ // handling ties explicitly.
+ Label truncate;
+
+ // If any of the low 32 bits in the mantissa is one, the fraction part
+ // can't be exactly 0.5.
+ VmovLow(result_reg, input_reg);
+ tst(result_reg, Operand(0xFFFFFFFF));
+
+ // Convert to fixed point in format 0xII.DDDDDD
+ vmov(double_scratch, input_reg);
+ vcvt_u32_f64(double_scratch, 24);
+ VmovLow(result_reg, double_scratch);
+
+ // Add 0.5 in fixed point, guaranteed not to overflow since result_reg is
+ // at most 0xFF000000 at this point.
+ add(result_reg, result_reg, Operand(0x00800000));
+
+ // Proceed to truncate if we determined earlier that there can't be a tie.
+ b(ne, &truncate);
+
+ // We have already tested the 32 low bits, now test the 24 highest fraction
+ // bits. In case the number is at least 0.5, these two tests together are
+ // guaranteed to cover all the bits in the 52-bit mantissa. In case the
+ // number is smaller than 0.5, the bic is a no-op.
+ tst(result_reg, Operand(0x00FFFFFF));
+ // If all the decimals were 0, we had a tie, and the result should be even.
+ bic(result_reg, result_reg, Operand(0x01000000), LeaveCC, eq);
+
+ bind(&truncate);
+ // Shift out all the fraction bits.
+ mov(result_reg, Operand(result_reg, LSR, 24));
+ } else {
+ // Save FPSCR.
+ vmrs(ip);
+ // Set rounding mode to round to the nearest integer by clearing
+ // bits[23:22].
+ bic(result_reg, ip, Operand(kVFPRoundingModeMask));
+ vmsr(result_reg);
+ vcvt_s32_f64(double_scratch.low(), input_reg, kFPSCRRounding);
+ vmov(result_reg, double_scratch.low());
+ // Restore FPSCR.
+ vmsr(ip);
+ }
+
bind(&done);
}
« no previous file with comments | « src/arm/disasm-arm.cc ('k') | src/arm/simulator-arm.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698