src/arm/lithium-codegen-arm.cc - Issue 9638018: [v8-dev] Optimise Math.floor(x/y) to use integer division for specific divisor....

Side by Side Diff: src/arm/lithium-codegen-arm.cc

Issue 9638018: [v8-dev] Optimise Math.floor(x/y) to use integer division for specific divisor.... (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/

Patch Set: '' Created 8 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2012 the V8 project authors. All rights reserved.	1 // Copyright 2012 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 1016 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1027 DeoptimizeIf(mi, instr->environment());	1027 DeoptimizeIf(mi, instr->environment());

1028 __ bind(&ok);	1028 __ bind(&ok);

1029 // Load the result and we are done.	1029 // Load the result and we are done.

1030 __ mov(result, scratch2);	1030 __ mov(result, scratch2);

1031 }	1031 }

1032	1032

1033 __ bind(&done);	1033 __ bind(&done);

1034 }	1034 }

1035	1035

1036	1036

	1037 void LCodeGen::EmitSignedIntegerDivisionByConstant(

	1038 Register result,

	1039 Register dividend,

	1040 int32_t divisor,

	1041 Register remainder,

	1042 Register scratch,

	1043 LEnvironment* environment) {

	1044 ASSERT(!AreAliased(dividend, scratch, ip));

	1045 ASSERT(LChunkBuilder::HasMagicNumberForDivisor(divisor));

	1046

	1047 uint32_t divisor_abs = abs(divisor);

	1048

	1049 int32_t power_of_2_factor =

	1050 CompilerIntrinsics::CountTrailingZeros(divisor_abs);

	1051

	1052 switch (divisor_abs) {

	1053 case 0:

	1054 DeoptimizeIf(al, environment);

	1055 return;

	1056

	1057 case 1:

	1058 if (divisor > 0) {

	1059 __ Move(result, dividend);

	1060 } else {

	1061 __ rsb(result, dividend, Operand(0), SetCC);

	1062 DeoptimizeIf(vs, environment);

	1063 }

	1064 // Compute the remainder.

	1065 __ mov(remainder, Operand(0));

	1066 return;

	1067

	1068 default:

	1069 if (IsPowerOf2(divisor_abs)) {

	1070 // Branch and condition free code for integer division by a power

	1071 // of two.

	1072 int32_t power = WhichPowerOf2(divisor_abs);

	1073 if (power > 1) {

	1074 __ mov(scratch, Operand(dividend, ASR, power - 1));

	1075 }

	1076 __ add(scratch, dividend, Operand(scratch, LSR, 32 - power));

	1077 __ mov(result, Operand(scratch, ASR, power));

	1078 // Negate if necessary.

	1079 // We don't need to check for overflow because the case '-1' is

	1080 // handled separately.

	1081 if (divisor < 0) {

	1082 ASSERT(divisor != -1);

	1083 __ rsb(result, result, Operand(0));

	1084 }

	1085 // Compute the remainder.

	1086 if (divisor > 0) {

	1087 __ sub(remainder, dividend, Operand(result, LSL, power));

	1088 } else {

	1089 __ add(remainder, dividend, Operand(result, LSL, power));

	1090 }

	1091 return;

	1092 } else {

	1093 // Use magic numbers for a few specific divisors.

	1094 // Details and proofs can be found in:

	1095 // - Hacker's Delight, Henry S. Warren, Jr.

	1096 // - The PowerPC Compiler Writer’s Guide

	1097 // and probably many others.

	1098 //

	1099 // We handle

	1100 // <divisor with magic numbers> * <power of 2>

	1101 // but not

	1102 // <divisor with magic numbers> * <other divisor with magic numbers>

	1103 DivMagicNumbers magic_numbers =

	1104 DivMagicNumberFor(divisor_abs >> power_of_2_factor);

	1105 // Branch and condition free code for integer division by a power

	1106 // of two.

	1107 const int32_t M = magic_numbers.M;

	1108 const int32_t s = magic_numbers.s + power_of_2_factor;

	1109

	1110 __ mov(ip, Operand(M));

	1111 __ smull(ip, scratch, dividend, ip);

	1112 if (M < 0) {

	1113 __ add(scratch, scratch, Operand(dividend));

	1114 }

	1115 if (s > 0) {

	1116 __ mov(scratch, Operand(scratch, ASR, s));

	1117 }

	1118 __ add(result, scratch, Operand(dividend, LSR, 31));

	1119 if (divisor < 0) __ rsb(result, result, Operand(0));

	1120 // Compute the remainder.

	1121 __ mov(ip, Operand(divisor));

	1122 // This sequence could be replaced with 'mls' when

	1123 // it gets implemented.

	1124 __ mul(scratch, result, ip);

	1125 __ sub(remainder, dividend, scratch);

	1126 }

	1127 }

	1128 }

	1129

	1130

1037 void LCodeGen::DoDivI(LDivI* instr) {	1131 void LCodeGen::DoDivI(LDivI* instr) {

1038 class DeferredDivI: public LDeferredCode {	1132 class DeferredDivI: public LDeferredCode {

1039 public:	1133 public:

1040 DeferredDivI(LCodeGen* codegen, LDivI* instr)	1134 DeferredDivI(LCodeGen* codegen, LDivI* instr)

1041 : LDeferredCode(codegen), instr_(instr) { }	1135 : LDeferredCode(codegen), instr_(instr) { }

1042 virtual void Generate() {	1136 virtual void Generate() {

1043 codegen()->DoDeferredBinaryOpStub(instr_, Token::DIV);	1137 codegen()->DoDeferredBinaryOpStub(instr_, Token::DIV);

1044 }	1138 }

1045 virtual LInstruction* instr() { return instr_; }	1139 virtual LInstruction* instr() { return instr_; }

1046 private:	1140 private:

(...skipping 61 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1108 __ JumpIfNotSmi(result, &deoptimize);	1202 __ JumpIfNotSmi(result, &deoptimize);

1109 __ SmiUntag(result);	1203 __ SmiUntag(result);

1110 __ b(&done);	1204 __ b(&done);

1111	1205

1112 __ bind(&deoptimize);	1206 __ bind(&deoptimize);

1113 DeoptimizeIf(al, instr->environment());	1207 DeoptimizeIf(al, instr->environment());

1114 __ bind(&done);	1208 __ bind(&done);

1115 }	1209 }

1116	1210

1117	1211

	1212 void LCodeGen::DoMathFloorOfDiv(LMathFloorOfDiv* instr) {

	1213 const Register result = ToRegister(instr->result());

	1214 const Register left = ToRegister(instr->InputAt(0));

	1215 const Register remainder = ToRegister(instr->TempAt(0));

	1216 const Register scratch = scratch0();

	1217

	1218 // We only optimize this for division by constants, because the standard

	1219 // integer division routine is usually slower than transitionning to VFP.

	1220 // This could be optimized on processors with SDIV available.

	1221 ASSERT(instr->InputAt(1)->IsConstantOperand());

	1222 int32_t divisor = ToInteger32(LConstantOperand::cast(instr->InputAt(1)));

	1223 if (divisor < 0) {

	1224 __ cmp(left, Operand(0));

	1225 DeoptimizeIf(eq, instr->environment());

	1226 }

	1227 EmitSignedIntegerDivisionByConstant(result,

	1228 left,

	1229 divisor,

	1230 remainder,

	1231 scratch,

	1232 instr->environment());

	1233 // We operated a truncating division. Correct the result if necessary.

	1234 __ cmp(remainder, Operand(0));

	1235 __ teq(remainder, Operand(divisor), ne);

	1236 __ sub(result, result, Operand(1), LeaveCC, mi);

	1237 }

	1238

	1239

1118 template<int T>	1240 template<int T>

1119 void LCodeGen::DoDeferredBinaryOpStub(LTemplateInstruction<1, 2, T>* instr,	1241 void LCodeGen::DoDeferredBinaryOpStub(LTemplateInstruction<1, 2, T>* instr,

1120 Token::Value op) {	1242 Token::Value op) {

1121 Register left = ToRegister(instr->InputAt(0));	1243 Register left = ToRegister(instr->InputAt(0));

1122 Register right = ToRegister(instr->InputAt(1));	1244 Register right = ToRegister(instr->InputAt(1));

1123	1245

1124 PushSafepointRegistersScope scope(this, Safepoint::kWithRegistersAndDoubles);	1246 PushSafepointRegistersScope scope(this, Safepoint::kWithRegistersAndDoubles);

1125 // Move left to r1 and right to r0 for the stub call.	1247 // Move left to r1 and right to r0 for the stub call.

1126 if (left.is(r1)) {	1248 if (left.is(r1)) {

1127 __ Move(r0, right);	1249 __ Move(r0, right);

(...skipping 4046 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5174 __ sub(scratch, result, Operand(index, LSL, kPointerSizeLog2 - kSmiTagSize));	5296 __ sub(scratch, result, Operand(index, LSL, kPointerSizeLog2 - kSmiTagSize));

5175 __ ldr(result, FieldMemOperand(scratch,	5297 __ ldr(result, FieldMemOperand(scratch,

5176 FixedArray::kHeaderSize - kPointerSize));	5298 FixedArray::kHeaderSize - kPointerSize));

5177 __ bind(&done);	5299 __ bind(&done);

5178 }	5300 }

5179	5301

5180	5302

5181 #undef __	5303 #undef __

5182	5304

5183 } } // namespace v8::internal	5305 } } // namespace v8::internal

OLD	NEW

« no previous file with comments | « src/arm/lithium-codegen-arm.h ('k') | src/arm/macro-assembler-arm.h » ('j') | no next file with comments »