src/arm/lithium-codegen-arm.cc - Issue 9638018: [v8-dev] Optimise Math.floor(x/y) to use integer division for specific divisor....

Side by Side Diff: src/arm/lithium-codegen-arm.cc

Issue 9638018: [v8-dev] Optimise Math.floor(x/y) to use integer division for specific divisor.... (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/

Patch Set: Created 8 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2012 the V8 project authors. All rights reserved.	1 // Copyright 2012 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 1017 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1028 DeoptimizeIf(mi, instr->environment());	1028 DeoptimizeIf(mi, instr->environment());

1029 __ bind(&ok);	1029 __ bind(&ok);

1030 // Load the result and we are done.	1030 // Load the result and we are done.

1031 __ mov(result, scratch2);	1031 __ mov(result, scratch2);

1032 }	1032 }

1033	1033

1034 __ bind(&done);	1034 __ bind(&done);

1035 }	1035 }

1036	1036

1037	1037

	1038 void LCodeGen::EmitSignedIntegerDivisionByConstant(

	1039 Register result,

	1040 Register dividend,

	1041 int32_t divisor,

	1042 Register remainder,

	1043 Register scratch,

	1044 LEnvironment* environment) {

	1045 ASSERT(!AreAliased(dividend, scratch, ip));

	1046 ASSERT(LChunkBuilder::HasMagicNumberForDivisor(divisor));

	1047 bool compute_remainder = remainder.is_valid();
	fschneider 2012/03/28 09:59:18 Isn't remainder always a temp-register and compute Isn't remainder always a temp-register and compute_remainder always true in here? In this case you could simplify the code a bit. Alexandre 2012/03/28 16:27:38 Done. This mechanism can be reintroduced if furt Done. This mechanism can be reintroduced if further optimizations do not require to compute the remainder. On 2012/03/28 09:59:18, fschneider wrote: Show quoted text > Isn't remainder always a temp-register and compute_remainder always true in > here? In this case you could simplify the code a bit.
	1048

	1049 uint32_t divisor_abs = abs(divisor);

	1050

	1051 int32_t power_of_2_factor =

	1052 CompilerIntrinsics::CountTrailingZeros(divisor_abs);

	1053

	1054 switch (divisor_abs) {

	1055 case 0:

	1056 DeoptimizeIf(al, environment);

	1057 return;

	1058

	1059 case 1:

	1060 if (divisor > 0) {

	1061 __ Move(result, dividend);

	1062 } else {

	1063 __ rsb(result, dividend, Operand(0));

	1064 }

	1065 if (compute_remainder) {

	1066 __ mov(remainder, Operand(0));

	1067 }

	1068 return;

	1069

	1070 case 2:

	1071 // Correct the result for negative dividends.

	1072 __ add(scratch, dividend, Operand(dividend, LSR, 31));

	1073 __ mov(result, Operand(scratch, ASR, 1));

	1074 if (divisor < 0) {

	1075 __ rsb(result, result, Operand(0));

	1076 }

	1077 if (compute_remainder) {

	1078 if (divisor > 0) {

	1079 __ sub(remainder, dividend, Operand(result, LSL, 1));

	1080 } else {

	1081 __ add(remainder, dividend, Operand(result, LSL, 1));

	1082 }

	1083 }

	1084 return;

	1085

	1086 default:

	1087 if (IsPowerOf2(divisor_abs)) {

	1088 // Branch and condition free code for integer division by a power

	1089 // of two.

	1090 int32_t power = WhichPowerOf2(divisor_abs);

	1091 __ mov(scratch, Operand(dividend, ASR, power - 1));

	1092 __ add(scratch, dividend, Operand(scratch, LSR, 32 - power));

	1093 __ mov(result, Operand(scratch, ASR, power));

	1094 // Negate if necessary.

	1095 // We don't need to check for overflow because the case '-1' is

	1096 // handled separately.

	1097 if (divisor < 0) {

	1098 ASSERT(divisor != -1);

	1099 __ rsb(result, result, Operand(0));

	1100 }

	1101 if (compute_remainder) {

	1102 if (divisor > 0) {

	1103 __ sub(remainder, dividend, Operand(result, LSL, power));

	1104 } else {

	1105 __ add(remainder, dividend, Operand(result, LSL, power));

	1106 }

	1107 }

	1108 return;

	1109 } else {

	1110 // Use magic numbers for a few specific divisors.

	1111 // Details and proofs can be found in:

	1112 // - Hacker's Delight, Henry S. Warren, Jr.

	1113 // - The PowerPC Compiler Writer’s Guide

	1114 // and probably many others.

	1115 //

	1116 // We handle

	1117 // <divisor with magic numbers> * <power of 2>

	1118 // but not

	1119 // <divisor with magic numbers> * <other divisor with magic numbers>

	1120 DivMagicNumbers magic_numbers =

	1121 DivMagicNumberFor(divisor_abs >> power_of_2_factor);

	1122 // Branch and condition free code for integer division by a power

	1123 // of two.

	1124 const int32_t M = magic_numbers.M;

	1125 const int32_t s = magic_numbers.s + power_of_2_factor;

	1126

	1127 __ mov(ip, Operand(M));

	1128 __ smull(ip, scratch, dividend, ip);

	1129 if (M < 0) {

	1130 __ add(scratch, scratch, Operand(dividend));

	1131 }

	1132 if (s > 0) {

	1133 __ mov(scratch, Operand(scratch, ASR, s));

	1134 }

	1135 __ add(result, scratch, Operand(dividend, LSR, 31));

	1136 if (divisor < 0) __ rsb(result, result, Operand(0));

	1137 if (compute_remainder) {

	1138 __ mov(ip, Operand(divisor));

	1139 // This sequence could be replaced with 'mls' when

	1140 // it gets implemented.

	1141 __ mul(scratch, result, ip);

	1142 __ sub(remainder, dividend, scratch);

	1143 }

	1144 }

	1145 }

	1146 }

	1147

	1148

1038 void LCodeGen::DoDivI(LDivI* instr) {	1149 void LCodeGen::DoDivI(LDivI* instr) {

1039 class DeferredDivI: public LDeferredCode {	1150 class DeferredDivI: public LDeferredCode {

1040 public:	1151 public:

1041 DeferredDivI(LCodeGen* codegen, LDivI* instr)	1152 DeferredDivI(LCodeGen* codegen, LDivI* instr)

1042 : LDeferredCode(codegen), instr_(instr) { }	1153 : LDeferredCode(codegen), instr_(instr) { }

1043 virtual void Generate() {	1154 virtual void Generate() {

1044 codegen()->DoDeferredBinaryOpStub(instr_, Token::DIV);	1155 codegen()->DoDeferredBinaryOpStub(instr_, Token::DIV);

1045 }	1156 }

1046 virtual LInstruction* instr() { return instr_; }	1157 virtual LInstruction* instr() { return instr_; }

1047 private:	1158 private:

(...skipping 61 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1109 __ JumpIfNotSmi(result, &deoptimize);	1220 __ JumpIfNotSmi(result, &deoptimize);

1110 __ SmiUntag(result);	1221 __ SmiUntag(result);

1111 __ b(&done);	1222 __ b(&done);

1112	1223

1113 __ bind(&deoptimize);	1224 __ bind(&deoptimize);

1114 DeoptimizeIf(al, instr->environment());	1225 DeoptimizeIf(al, instr->environment());

1115 __ bind(&done);	1226 __ bind(&done);

1116 }	1227 }

1117	1228

1118	1229

	1230 void LCodeGen::DoMathFloorOfDiv(LMathFloorOfDiv* instr) {

	1231 const Register result = ToRegister(instr->result());

	1232 const Register left = ToRegister(instr->InputAt(0));

	1233 const Register remainder = ToRegister(instr->TempAt(0));

	1234 const Register scratch = scratch0();

	1235

	1236 // We only optimize this for division by constants, because the standard

	1237 // integer division routine is usually slower than transitionning to VFP.

	1238 // This could be optimized on processors with SDIV available.

	1239 ASSERT(instr->InputAt(1)->IsConstantOperand());

	1240 int32_t divisor = ToInteger32(LConstantOperand::cast(instr->InputAt(1)));

	1241 EmitSignedIntegerDivisionByConstant(result,

	1242 left,

	1243 divisor,

	1244 remainder,

	1245 scratch,

	1246 instr->environment());

	1247 // We operated a truncating division. Correct the result if necessary.

	1248 __ cmp(remainder, Operand(0));

	1249 __ teq(remainder, Operand(divisor), ne);

	1250 __ sub(result, result, Operand(1), LeaveCC, mi);

	1251 }

	1252

	1253

1119 template<int T>	1254 template<int T>

1120 void LCodeGen::DoDeferredBinaryOpStub(LTemplateInstruction<1, 2, T>* instr,	1255 void LCodeGen::DoDeferredBinaryOpStub(LTemplateInstruction<1, 2, T>* instr,

1121 Token::Value op) {	1256 Token::Value op) {

1122 Register left = ToRegister(instr->InputAt(0));	1257 Register left = ToRegister(instr->InputAt(0));

1123 Register right = ToRegister(instr->InputAt(1));	1258 Register right = ToRegister(instr->InputAt(1));

1124	1259

1125 PushSafepointRegistersScope scope(this, Safepoint::kWithRegistersAndDoubles);	1260 PushSafepointRegistersScope scope(this, Safepoint::kWithRegistersAndDoubles);

1126 // Move left to r1 and right to r0 for the stub call.	1261 // Move left to r1 and right to r0 for the stub call.

1127 if (left.is(r1)) {	1262 if (left.is(r1)) {

1128 __ Move(r0, right);	1263 __ Move(r0, right);

(...skipping 3937 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5066 __ sub(scratch, result, Operand(index, LSL, kPointerSizeLog2 - kSmiTagSize));	5201 __ sub(scratch, result, Operand(index, LSL, kPointerSizeLog2 - kSmiTagSize));

5067 __ ldr(result, FieldMemOperand(scratch,	5202 __ ldr(result, FieldMemOperand(scratch,

5068 FixedArray::kHeaderSize - kPointerSize));	5203 FixedArray::kHeaderSize - kPointerSize));

5069 __ bind(&done);	5204 __ bind(&done);

5070 }	5205 }

5071	5206

5072	5207

5073 #undef __	5208 #undef __

5074	5209

5075 } } // namespace v8::internal	5210 } } // namespace v8::internal

OLD	NEW

« no previous file with comments | « src/arm/lithium-codegen-arm.h ('k') | src/arm/macro-assembler-arm.h » ('j') | src/arm/macro-assembler-arm.h » ('J')