src/ia32/lithium-codegen-ia32.cc - Issue 6049008: SSE2 truncating double-to-i.

Unified Diff: src/ia32/lithium-codegen-ia32.cc

Issue 6049008: SSE2 truncating double-to-i. (Closed)

Patch Set: . Created 9 years, 12 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: src/ia32/lithium-codegen-ia32.cc

diff --git a/src/ia32/lithium-codegen-ia32.cc b/src/ia32/lithium-codegen-ia32.cc

index 9569ac8b8099f628e410c09e0e00e2741e5feaa3..449befdd9df7c00d67973d08087ca2756aabb312 100644

--- a/src/ia32/lithium-codegen-ia32.cc

+++ b/src/ia32/lithium-codegen-ia32.cc

@@ -2885,9 +2885,58 @@ void LCodeGen::DoDoubleToI(LDoubleToI* instr) {

__ add(Operand(esp), Immediate(kDoubleSize));

__ bind(&done);

} else {

- // This will bail out if the input was not in the int32 range (or,

- // unfortunately, if the input was 0x80000000).

- DeoptimizeIf(equal, instr->environment());

+ NearLabel done;

+ Register temp_reg = ToRegister(instr->temporary());

+ XMMRegister xmm_scratch = xmm0;

+ __ j(not_equal, &done);

+ // Get high 32 bits of the input in temp_reg.

+ __ pshufd(xmm_scratch, input_reg, 1);

+ __ movd(Operand(temp_reg), xmm_scratch);

+ // Zero out the sign and the exponent in the input (by shifting

+ // it to the left) and restore the implicit mantissa bit,

+ // i.e. convert the input to unsigned int64 shifted left by

+ // kExponentBits.

+ __ mov(result_reg, 1);

+ __ movd(xmm_scratch, Operand(result_reg));

+ __ psllq(xmm_scratch, kDoubleSize * kBitsPerByte - 1);

Erik Corry 2011/01/04 22:33:00 It might be faster to load xmm_scratch using the t

+ __ psllq(input_reg, HeapNumber::kExponentBits);

+ __ por(input_reg, xmm_scratch);

+ // Restore high 32 bits of the input in result_reg.

+ __ mov(result_reg, temp_reg);

+ // Prepare negation mask in temp_reg.

+ __ sar(temp_reg, kBitsPerInt - 1);

+ // Extract the exponent from result_reg and subtract adjusted

+ // bias from it. The adjustment is selected in a way such that

+ // when the difference is zero, the answer is in the low 32 bits

+ // of the input, otherwise a shift has to be performed.

+ __ shr(result_reg, HeapNumber::kExponentShift);

+ __ and_(result_reg,

+ HeapNumber::kExponentMask >> HeapNumber::kExponentShift);

+ __ sub(Operand(result_reg),

+ Immediate(HeapNumber::kExponentBias +

+ HeapNumber::kExponentBits +

+ HeapNumber::kMantissaBits));

+ // Don't handle big (or special) exponents.

Erik Corry 2011/01/04 22:33:00 Could you add a comment describing the range handl

+ DeoptimizeIf(greater_equal, instr->environment());

+ // Get the amount to shift the input right in xmm_scratch.

+ __ neg(result_reg);

+ __ movd(xmm_scratch, Operand(result_reg));

+ // Shift the input right and extract low 32 bits.

+ __ psrlq(input_reg, xmm_scratch);

+ __ movd(Operand(result_reg), input_reg);

+ // Use the prepared mask in temp_reg to negate the result if necessary.

+ __ xor_(result_reg, Operand(temp_reg));

+ __ sub(result_reg, Operand(temp_reg));

Erik Corry 2011/01/04 22:33:00 ooh, nice!

+ __ bind(&done);

}

} else {

NearLabel done;

« src/ia32/assembler-ia32.h ('K') | « src/ia32/disasm-ia32.cc ('k') | src/ia32/lithium-ia32.h » ('j') | no next file with comments »