Index: runtime/vm/assembler_arm_test.cc |
=================================================================== |
--- runtime/vm/assembler_arm_test.cc (revision 25290) |
+++ runtime/vm/assembler_arm_test.cc (working copy) |
@@ -3070,6 +3070,7 @@ |
__ bx(LR); |
} else { |
+ __ LoadSImmediate(S0, 2.0 - 10.0 * 5.0); |
__ bx(LR); |
} |
} |
@@ -3079,7 +3080,7 @@ |
EXPECT(test != NULL); |
typedef float (*Vrecpsqs)(); |
float res = EXECUTE_TEST_CODE_FLOAT(Vrecpsqs, test->entry()); |
- EXPECT_FLOAT_EQ(2 - 10.0 * 5.0, res, 0.0001f); |
+ EXPECT_FLOAT_EQ(2.0 - 10.0 * 5.0, res, 0.0001f); |
} |
@@ -3114,6 +3115,282 @@ |
} |
+static float arm_reciprocal_sqrt_estimate(float a) { |
+ // From the ARM Architecture Reference Manual A2-87. |
+ if (isinf(a) || (abs(a) >= exp2f(126))) return 0.0; |
+ else if (a == 0.0) return INFINITY; |
+ else if (isnan(a)) return a; |
+ |
+ uint32_t a_bits = bit_cast<uint32_t, float>(a); |
+ uint64_t scaled; |
+ if (((a_bits >> 23) & 1) != 0) { |
+ // scaled = '0 01111111101' : operand<22:0> : Zeros(29) |
+ scaled = (static_cast<uint64_t>(0x3fd) << 52) | |
+ ((static_cast<uint64_t>(a_bits) & 0x7fffff) << 29); |
+ } else { |
+ // scaled = '0 01111111110' : operand<22:0> : Zeros(29) |
+ scaled = (static_cast<uint64_t>(0x3fe) << 52) | |
+ ((static_cast<uint64_t>(a_bits) & 0x7fffff) << 29); |
+ } |
+ // result_exp = (380 - UInt(operand<30:23>) DIV 2; |
+ int32_t result_exp = (380 - ((a_bits >> 23) & 0xff)) / 2; |
+ |
+ double scaled_d = bit_cast<double, uint64_t>(scaled); |
+ ASSERT((scaled_d >= 0.25) && (scaled_d < 1.0)); |
+ |
+ double r; |
+ if (scaled_d < 0.5) { |
+ // range 0.25 <= a < 0.5 |
+ |
+ // a in units of 1/512 rounded down. |
+ int32_t q0 = static_cast<int32_t>(scaled_d * 512.0); |
+ // reciprocal root r. |
+ r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0); |
+ } else { |
+ // range 0.5 <= a < 1.0 |
+ |
+ // a in units of 1/256 rounded down. |
+ int32_t q1 = static_cast<int32_t>(scaled_d * 256.0); |
+ // reciprocal root r. |
+ r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0); |
+ } |
+ // r in units of 1/256 rounded to nearest. |
+ int32_t s = static_cast<int>(256.0 * r + 0.5); |
+ double estimate = static_cast<double>(s) / 256.0; |
+ ASSERT((estimate >= 1.0) && (estimate <= (511.0/256.0))); |
+ |
+ // result = 0 : result_exp<7:0> : estimate<51:29> |
+ int32_t result_bits = ((result_exp & 0xff) << 23) | |
+ ((bit_cast<uint64_t, double>(estimate) >> 29) & 0x7fffff); |
+ return bit_cast<float, int32_t>(result_bits); |
+} |
+ |
+ |
+ASSEMBLER_TEST_GENERATE(Vrsqrteqs, assembler) { |
+ if (CPUFeatures::neon_supported()) { |
+ __ LoadSImmediate(S4, 147.0); |
+ __ vmovs(S5, S4); |
+ __ vmovs(S6, S4); |
+ __ vmovs(S7, S4); |
+ |
+ __ vrsqrteqs(Q0, Q1); |
+ |
+ __ bx(LR); |
+ } else { |
+ __ LoadSImmediate(S0, arm_reciprocal_sqrt_estimate(147.0)); |
+ __ bx(LR); |
+ } |
+} |
+ |
+ |
+ASSEMBLER_TEST_RUN(Vrsqrteqs, test) { |
+ EXPECT(test != NULL); |
+ typedef float (*Vrsqrteqs)(); |
+ float res = EXECUTE_TEST_CODE_FLOAT(Vrsqrteqs, test->entry()); |
+ EXPECT_FLOAT_EQ(arm_reciprocal_sqrt_estimate(147.0), res, 0.0001f); |
+} |
+ |
+ |
+ASSEMBLER_TEST_GENERATE(Vrsqrtsqs, assembler) { |
+ if (CPUFeatures::neon_supported()) { |
+ __ LoadSImmediate(S4, 5.0); |
+ __ LoadSImmediate(S5, 2.0); |
+ __ LoadSImmediate(S6, 3.0); |
+ __ LoadSImmediate(S7, 4.0); |
+ |
+ __ LoadSImmediate(S8, 10.0); |
+ __ LoadSImmediate(S9, 1.0); |
+ __ LoadSImmediate(S10, 6.0); |
+ __ LoadSImmediate(S11, 3.0); |
+ |
+ __ vrsqrtsqs(Q0, Q1, Q2); |
+ |
+ __ bx(LR); |
+ } else { |
+ __ LoadSImmediate(S0, (3.0 - 10.0 * 5.0) / 2.0); |
+ __ bx(LR); |
+ } |
+} |
+ |
+ |
+ASSEMBLER_TEST_RUN(Vrsqrtsqs, test) { |
+ EXPECT(test != NULL); |
+ typedef float (*Vrsqrtsqs)(); |
+ float res = EXECUTE_TEST_CODE_FLOAT(Vrsqrtsqs, test->entry()); |
+ EXPECT_FLOAT_EQ((3.0 - 10.0 * 5.0)/2.0, res, 0.0001f); |
+} |
+ |
+ |
+ASSEMBLER_TEST_GENERATE(ReciprocalSqrt, assembler) { |
+ if (CPUFeatures::neon_supported()) { |
+ __ LoadSImmediate(S4, 147000.0); |
+ __ vmovs(S5, S4); |
+ __ vmovs(S6, S4); |
+ __ vmovs(S7, S4); |
+ |
+ // Reciprocal square root estimate. |
+ __ vrsqrteqs(Q0, Q1); |
+ // 2 Newton-Raphson steps. xn+1 = xn * (3 - Q1*xn^2) / 2. |
+ // First step. |
+ __ vmulqs(Q2, Q0, Q0); // Q2 <- xn^2 |
+ __ vrsqrtsqs(Q2, Q1, Q2); // Q2 <- (3 - Q1*Q2) / 2. |
+ __ vmulqs(Q0, Q0, Q2); // xn+1 <- xn * Q2 |
+ // Second step. |
+ __ vmulqs(Q2, Q0, Q0); |
+ __ vrsqrtsqs(Q2, Q1, Q2); |
+ __ vmulqs(Q0, Q0, Q2); |
+ |
+ __ bx(LR); |
+ } else { |
+ __ LoadSImmediate(S0, 1.0/sqrt(147000.0)); |
+ __ bx(LR); |
+ } |
+} |
+ |
+ |
+ASSEMBLER_TEST_RUN(ReciprocalSqrt, test) { |
+ EXPECT(test != NULL); |
+ typedef float (*ReciprocalSqrt)(); |
+ float res = EXECUTE_TEST_CODE_FLOAT(ReciprocalSqrt, test->entry()); |
+ EXPECT_FLOAT_EQ(1.0/sqrt(147000.0), res, 0.0001f); |
+} |
+ |
+ |
+ASSEMBLER_TEST_GENERATE(SIMDSqrt, assembler) { |
+ if (CPUFeatures::neon_supported()) { |
+ __ LoadSImmediate(S4, 147000.0); |
+ __ vmovs(S5, S4); |
+ __ vmovs(S6, S4); |
+ __ vmovs(S7, S4); |
+ |
+ // Reciprocal square root estimate. |
+ __ vrsqrteqs(Q0, Q1); |
+ // 2 Newton-Raphson steps. xn+1 = xn * (3 - Q1*xn^2) / 2. |
+ // First step. |
+ __ vmulqs(Q2, Q0, Q0); // Q2 <- xn^2 |
+ __ vrsqrtsqs(Q2, Q1, Q2); // Q2 <- (3 - Q1*Q2) / 2. |
+ __ vmulqs(Q0, Q0, Q2); // xn+1 <- xn * Q2 |
+ // Second step. |
+ __ vmulqs(Q2, Q0, Q0); |
+ __ vrsqrtsqs(Q2, Q1, Q2); |
+ __ vmulqs(Q0, Q0, Q2); |
+ |
+ // Reciprocal. |
+ __ vmovq(Q1, Q0); |
+ // Reciprocal estimate. |
+ __ vrecpeqs(Q0, Q1); |
+ // 2 Newton-Raphson steps. |
+ __ vrecpsqs(Q2, Q1, Q0); |
+ __ vmulqs(Q0, Q0, Q2); |
+ __ vrecpsqs(Q2, Q1, Q0); |
+ __ vmulqs(Q0, Q0, Q2); |
+ |
+ __ bx(LR); |
+ } else { |
+ __ LoadSImmediate(S0, sqrt(147000.0)); |
+ __ bx(LR); |
+ } |
+} |
+ |
+ |
+ASSEMBLER_TEST_RUN(SIMDSqrt, test) { |
+ EXPECT(test != NULL); |
+ typedef float (*SIMDSqrt)(); |
+ float res = EXECUTE_TEST_CODE_FLOAT(SIMDSqrt, test->entry()); |
+ EXPECT_FLOAT_EQ(sqrt(147000.0), res, 0.0001f); |
+} |
+ |
+ |
+ASSEMBLER_TEST_GENERATE(SIMDSqrt2, assembler) { |
+ if (CPUFeatures::neon_supported()) { |
+ __ LoadSImmediate(S4, 1.0); |
+ __ LoadSImmediate(S5, 4.0); |
+ __ LoadSImmediate(S6, 9.0); |
+ __ LoadSImmediate(S7, 16.0); |
+ |
+ // Reciprocal square root estimate. |
+ __ vrsqrteqs(Q0, Q1); |
+ // 2 Newton-Raphson steps. xn+1 = xn * (3 - Q1*xn^2) / 2. |
+ // First step. |
+ __ vmulqs(Q2, Q0, Q0); // Q2 <- xn^2 |
+ __ vrsqrtsqs(Q2, Q1, Q2); // Q2 <- (3 - Q1*Q2) / 2. |
+ __ vmulqs(Q0, Q0, Q2); // xn+1 <- xn * Q2 |
+ // Second step. |
+ __ vmulqs(Q2, Q0, Q0); |
+ __ vrsqrtsqs(Q2, Q1, Q2); |
+ __ vmulqs(Q0, Q0, Q2); |
+ |
+ // Reciprocal. |
+ __ vmovq(Q1, Q0); |
+ // Reciprocal estimate. |
+ __ vrecpeqs(Q0, Q1); |
+ // 2 Newton-Raphson steps. |
+ __ vrecpsqs(Q2, Q1, Q0); |
+ __ vmulqs(Q0, Q0, Q2); |
+ __ vrecpsqs(Q2, Q1, Q0); |
+ __ vmulqs(Q0, Q0, Q2); |
+ |
+ __ vadds(S0, S0, S1); |
+ __ vadds(S0, S0, S2); |
+ __ vadds(S0, S0, S3); |
+ |
+ __ bx(LR); |
+ } else { |
+ __ LoadSImmediate(S0, 10.0); |
+ __ bx(LR); |
+ } |
+} |
+ |
+ |
+ASSEMBLER_TEST_RUN(SIMDSqrt2, test) { |
+ EXPECT(test != NULL); |
+ typedef float (*SIMDSqrt2)(); |
+ float res = EXECUTE_TEST_CODE_FLOAT(SIMDSqrt2, test->entry()); |
+ EXPECT_FLOAT_EQ(10.0, res, 0.0001f); |
+} |
+ |
+ |
+ASSEMBLER_TEST_GENERATE(SIMDDiv, assembler) { |
+ if (CPUFeatures::neon_supported()) { |
+ __ LoadSImmediate(S4, 1.0); |
+ __ LoadSImmediate(S5, 4.0); |
+ __ LoadSImmediate(S6, 9.0); |
+ __ LoadSImmediate(S7, 16.0); |
+ |
+ __ LoadSImmediate(S12, 4.0); |
+ __ LoadSImmediate(S13, 16.0); |
+ __ LoadSImmediate(S14, 36.0); |
+ __ LoadSImmediate(S15, 64.0); |
+ |
+ // Reciprocal estimate. |
+ __ vrecpeqs(Q0, Q1); |
+ // 2 Newton-Raphson steps. |
+ __ vrecpsqs(Q2, Q1, Q0); |
+ __ vmulqs(Q0, Q0, Q2); |
+ __ vrecpsqs(Q2, Q1, Q0); |
+ __ vmulqs(Q0, Q0, Q2); |
+ |
+ __ vmulqs(Q0, Q3, Q0); |
+ __ vadds(S0, S0, S1); |
+ __ vadds(S0, S0, S2); |
+ __ vadds(S0, S0, S3); |
+ |
+ __ bx(LR); |
+ } else { |
+ __ LoadSImmediate(S0, 16.0); |
+ __ bx(LR); |
+ } |
+} |
+ |
+ |
+ASSEMBLER_TEST_RUN(SIMDDiv, test) { |
+ EXPECT(test != NULL); |
+ typedef float (*SIMDDiv)(); |
+ float res = EXECUTE_TEST_CODE_FLOAT(SIMDDiv, test->entry()); |
+ EXPECT_FLOAT_EQ(16.0, res, 0.0001f); |
+} |
+ |
+ |
// Called from assembler_test.cc. |
// LR: return address. |
// R0: context. |