OLD | NEW |
1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 #include "vm/globals.h" | 5 #include "vm/globals.h" |
6 #if defined(TARGET_ARCH_ARM) | 6 #if defined(TARGET_ARCH_ARM) |
7 | 7 |
8 #include "vm/assembler.h" | 8 #include "vm/assembler.h" |
9 #include "vm/os.h" | 9 #include "vm/os.h" |
10 #include "vm/unit_test.h" | 10 #include "vm/unit_test.h" |
(...skipping 3052 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3063 | 3063 |
3064 __ LoadSImmediate(S8, 10.0); | 3064 __ LoadSImmediate(S8, 10.0); |
3065 __ LoadSImmediate(S9, 1.0); | 3065 __ LoadSImmediate(S9, 1.0); |
3066 __ LoadSImmediate(S10, 6.0); | 3066 __ LoadSImmediate(S10, 6.0); |
3067 __ LoadSImmediate(S11, 3.0); | 3067 __ LoadSImmediate(S11, 3.0); |
3068 | 3068 |
3069 __ vrecpsqs(Q0, Q1, Q2); | 3069 __ vrecpsqs(Q0, Q1, Q2); |
3070 | 3070 |
3071 __ bx(LR); | 3071 __ bx(LR); |
3072 } else { | 3072 } else { |
| 3073 __ LoadSImmediate(S0, 2.0 - 10.0 * 5.0); |
3073 __ bx(LR); | 3074 __ bx(LR); |
3074 } | 3075 } |
3075 } | 3076 } |
3076 | 3077 |
3077 | 3078 |
3078 ASSEMBLER_TEST_RUN(Vrecpsqs, test) { | 3079 ASSEMBLER_TEST_RUN(Vrecpsqs, test) { |
3079 EXPECT(test != NULL); | 3080 EXPECT(test != NULL); |
3080 typedef float (*Vrecpsqs)(); | 3081 typedef float (*Vrecpsqs)(); |
3081 float res = EXECUTE_TEST_CODE_FLOAT(Vrecpsqs, test->entry()); | 3082 float res = EXECUTE_TEST_CODE_FLOAT(Vrecpsqs, test->entry()); |
3082 EXPECT_FLOAT_EQ(2 - 10.0 * 5.0, res, 0.0001f); | 3083 EXPECT_FLOAT_EQ(2.0 - 10.0 * 5.0, res, 0.0001f); |
3083 } | 3084 } |
3084 | 3085 |
3085 | 3086 |
3086 ASSEMBLER_TEST_GENERATE(Reciprocal, assembler) { | 3087 ASSEMBLER_TEST_GENERATE(Reciprocal, assembler) { |
3087 if (CPUFeatures::neon_supported()) { | 3088 if (CPUFeatures::neon_supported()) { |
3088 __ LoadSImmediate(S4, 147000.0); | 3089 __ LoadSImmediate(S4, 147000.0); |
3089 __ vmovs(S5, S4); | 3090 __ vmovs(S5, S4); |
3090 __ vmovs(S6, S4); | 3091 __ vmovs(S6, S4); |
3091 __ vmovs(S7, S4); | 3092 __ vmovs(S7, S4); |
3092 | 3093 |
(...skipping 14 matching lines...) Expand all Loading... |
3107 | 3108 |
3108 | 3109 |
3109 ASSEMBLER_TEST_RUN(Reciprocal, test) { | 3110 ASSEMBLER_TEST_RUN(Reciprocal, test) { |
3110 EXPECT(test != NULL); | 3111 EXPECT(test != NULL); |
3111 typedef float (*Reciprocal)(); | 3112 typedef float (*Reciprocal)(); |
3112 float res = EXECUTE_TEST_CODE_FLOAT(Reciprocal, test->entry()); | 3113 float res = EXECUTE_TEST_CODE_FLOAT(Reciprocal, test->entry()); |
3113 EXPECT_FLOAT_EQ(1.0/147000.0, res, 0.0001f); | 3114 EXPECT_FLOAT_EQ(1.0/147000.0, res, 0.0001f); |
3114 } | 3115 } |
3115 | 3116 |
3116 | 3117 |
| 3118 static float arm_reciprocal_sqrt_estimate(float a) { |
| 3119 // From the ARM Architecture Reference Manual A2-87. |
| 3120 if (isinf(a) || (abs(a) >= exp2f(126))) return 0.0; |
| 3121 else if (a == 0.0) return INFINITY; |
| 3122 else if (isnan(a)) return a; |
| 3123 |
| 3124 uint32_t a_bits = bit_cast<uint32_t, float>(a); |
| 3125 uint64_t scaled; |
| 3126 if (((a_bits >> 23) & 1) != 0) { |
| 3127 // scaled = '0 01111111101' : operand<22:0> : Zeros(29) |
| 3128 scaled = (static_cast<uint64_t>(0x3fd) << 52) | |
| 3129 ((static_cast<uint64_t>(a_bits) & 0x7fffff) << 29); |
| 3130 } else { |
| 3131 // scaled = '0 01111111110' : operand<22:0> : Zeros(29) |
| 3132 scaled = (static_cast<uint64_t>(0x3fe) << 52) | |
| 3133 ((static_cast<uint64_t>(a_bits) & 0x7fffff) << 29); |
| 3134 } |
| 3135 // result_exp = (380 - UInt(operand<30:23>) DIV 2; |
| 3136 int32_t result_exp = (380 - ((a_bits >> 23) & 0xff)) / 2; |
| 3137 |
| 3138 double scaled_d = bit_cast<double, uint64_t>(scaled); |
| 3139 ASSERT((scaled_d >= 0.25) && (scaled_d < 1.0)); |
| 3140 |
| 3141 double r; |
| 3142 if (scaled_d < 0.5) { |
| 3143 // range 0.25 <= a < 0.5 |
| 3144 |
| 3145 // a in units of 1/512 rounded down. |
| 3146 int32_t q0 = static_cast<int32_t>(scaled_d * 512.0); |
| 3147 // reciprocal root r. |
| 3148 r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0); |
| 3149 } else { |
| 3150 // range 0.5 <= a < 1.0 |
| 3151 |
| 3152 // a in units of 1/256 rounded down. |
| 3153 int32_t q1 = static_cast<int32_t>(scaled_d * 256.0); |
| 3154 // reciprocal root r. |
| 3155 r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0); |
| 3156 } |
| 3157 // r in units of 1/256 rounded to nearest. |
| 3158 int32_t s = static_cast<int>(256.0 * r + 0.5); |
| 3159 double estimate = static_cast<double>(s) / 256.0; |
| 3160 ASSERT((estimate >= 1.0) && (estimate <= (511.0/256.0))); |
| 3161 |
| 3162 // result = 0 : result_exp<7:0> : estimate<51:29> |
| 3163 int32_t result_bits = ((result_exp & 0xff) << 23) | |
| 3164 ((bit_cast<uint64_t, double>(estimate) >> 29) & 0x7fffff); |
| 3165 return bit_cast<float, int32_t>(result_bits); |
| 3166 } |
| 3167 |
| 3168 |
| 3169 ASSEMBLER_TEST_GENERATE(Vrsqrteqs, assembler) { |
| 3170 if (CPUFeatures::neon_supported()) { |
| 3171 __ LoadSImmediate(S4, 147.0); |
| 3172 __ vmovs(S5, S4); |
| 3173 __ vmovs(S6, S4); |
| 3174 __ vmovs(S7, S4); |
| 3175 |
| 3176 __ vrsqrteqs(Q0, Q1); |
| 3177 |
| 3178 __ bx(LR); |
| 3179 } else { |
| 3180 __ LoadSImmediate(S0, arm_reciprocal_sqrt_estimate(147.0)); |
| 3181 __ bx(LR); |
| 3182 } |
| 3183 } |
| 3184 |
| 3185 |
| 3186 ASSEMBLER_TEST_RUN(Vrsqrteqs, test) { |
| 3187 EXPECT(test != NULL); |
| 3188 typedef float (*Vrsqrteqs)(); |
| 3189 float res = EXECUTE_TEST_CODE_FLOAT(Vrsqrteqs, test->entry()); |
| 3190 EXPECT_FLOAT_EQ(arm_reciprocal_sqrt_estimate(147.0), res, 0.0001f); |
| 3191 } |
| 3192 |
| 3193 |
| 3194 ASSEMBLER_TEST_GENERATE(Vrsqrtsqs, assembler) { |
| 3195 if (CPUFeatures::neon_supported()) { |
| 3196 __ LoadSImmediate(S4, 5.0); |
| 3197 __ LoadSImmediate(S5, 2.0); |
| 3198 __ LoadSImmediate(S6, 3.0); |
| 3199 __ LoadSImmediate(S7, 4.0); |
| 3200 |
| 3201 __ LoadSImmediate(S8, 10.0); |
| 3202 __ LoadSImmediate(S9, 1.0); |
| 3203 __ LoadSImmediate(S10, 6.0); |
| 3204 __ LoadSImmediate(S11, 3.0); |
| 3205 |
| 3206 __ vrsqrtsqs(Q0, Q1, Q2); |
| 3207 |
| 3208 __ bx(LR); |
| 3209 } else { |
| 3210 __ LoadSImmediate(S0, (3.0 - 10.0 * 5.0) / 2.0); |
| 3211 __ bx(LR); |
| 3212 } |
| 3213 } |
| 3214 |
| 3215 |
| 3216 ASSEMBLER_TEST_RUN(Vrsqrtsqs, test) { |
| 3217 EXPECT(test != NULL); |
| 3218 typedef float (*Vrsqrtsqs)(); |
| 3219 float res = EXECUTE_TEST_CODE_FLOAT(Vrsqrtsqs, test->entry()); |
| 3220 EXPECT_FLOAT_EQ((3.0 - 10.0 * 5.0)/2.0, res, 0.0001f); |
| 3221 } |
| 3222 |
| 3223 |
| 3224 ASSEMBLER_TEST_GENERATE(ReciprocalSqrt, assembler) { |
| 3225 if (CPUFeatures::neon_supported()) { |
| 3226 __ LoadSImmediate(S4, 147000.0); |
| 3227 __ vmovs(S5, S4); |
| 3228 __ vmovs(S6, S4); |
| 3229 __ vmovs(S7, S4); |
| 3230 |
| 3231 // Reciprocal square root estimate. |
| 3232 __ vrsqrteqs(Q0, Q1); |
| 3233 // 2 Newton-Raphson steps. xn+1 = xn * (3 - Q1*xn^2) / 2. |
| 3234 // First step. |
| 3235 __ vmulqs(Q2, Q0, Q0); // Q2 <- xn^2 |
| 3236 __ vrsqrtsqs(Q2, Q1, Q2); // Q2 <- (3 - Q1*Q2) / 2. |
| 3237 __ vmulqs(Q0, Q0, Q2); // xn+1 <- xn * Q2 |
| 3238 // Second step. |
| 3239 __ vmulqs(Q2, Q0, Q0); |
| 3240 __ vrsqrtsqs(Q2, Q1, Q2); |
| 3241 __ vmulqs(Q0, Q0, Q2); |
| 3242 |
| 3243 __ bx(LR); |
| 3244 } else { |
| 3245 __ LoadSImmediate(S0, 1.0/sqrt(147000.0)); |
| 3246 __ bx(LR); |
| 3247 } |
| 3248 } |
| 3249 |
| 3250 |
| 3251 ASSEMBLER_TEST_RUN(ReciprocalSqrt, test) { |
| 3252 EXPECT(test != NULL); |
| 3253 typedef float (*ReciprocalSqrt)(); |
| 3254 float res = EXECUTE_TEST_CODE_FLOAT(ReciprocalSqrt, test->entry()); |
| 3255 EXPECT_FLOAT_EQ(1.0/sqrt(147000.0), res, 0.0001f); |
| 3256 } |
| 3257 |
| 3258 |
| 3259 ASSEMBLER_TEST_GENERATE(SIMDSqrt, assembler) { |
| 3260 if (CPUFeatures::neon_supported()) { |
| 3261 __ LoadSImmediate(S4, 147000.0); |
| 3262 __ vmovs(S5, S4); |
| 3263 __ vmovs(S6, S4); |
| 3264 __ vmovs(S7, S4); |
| 3265 |
| 3266 // Reciprocal square root estimate. |
| 3267 __ vrsqrteqs(Q0, Q1); |
| 3268 // 2 Newton-Raphson steps. xn+1 = xn * (3 - Q1*xn^2) / 2. |
| 3269 // First step. |
| 3270 __ vmulqs(Q2, Q0, Q0); // Q2 <- xn^2 |
| 3271 __ vrsqrtsqs(Q2, Q1, Q2); // Q2 <- (3 - Q1*Q2) / 2. |
| 3272 __ vmulqs(Q0, Q0, Q2); // xn+1 <- xn * Q2 |
| 3273 // Second step. |
| 3274 __ vmulqs(Q2, Q0, Q0); |
| 3275 __ vrsqrtsqs(Q2, Q1, Q2); |
| 3276 __ vmulqs(Q0, Q0, Q2); |
| 3277 |
| 3278 // Reciprocal. |
| 3279 __ vmovq(Q1, Q0); |
| 3280 // Reciprocal estimate. |
| 3281 __ vrecpeqs(Q0, Q1); |
| 3282 // 2 Newton-Raphson steps. |
| 3283 __ vrecpsqs(Q2, Q1, Q0); |
| 3284 __ vmulqs(Q0, Q0, Q2); |
| 3285 __ vrecpsqs(Q2, Q1, Q0); |
| 3286 __ vmulqs(Q0, Q0, Q2); |
| 3287 |
| 3288 __ bx(LR); |
| 3289 } else { |
| 3290 __ LoadSImmediate(S0, sqrt(147000.0)); |
| 3291 __ bx(LR); |
| 3292 } |
| 3293 } |
| 3294 |
| 3295 |
| 3296 ASSEMBLER_TEST_RUN(SIMDSqrt, test) { |
| 3297 EXPECT(test != NULL); |
| 3298 typedef float (*SIMDSqrt)(); |
| 3299 float res = EXECUTE_TEST_CODE_FLOAT(SIMDSqrt, test->entry()); |
| 3300 EXPECT_FLOAT_EQ(sqrt(147000.0), res, 0.0001f); |
| 3301 } |
| 3302 |
| 3303 |
| 3304 ASSEMBLER_TEST_GENERATE(SIMDSqrt2, assembler) { |
| 3305 if (CPUFeatures::neon_supported()) { |
| 3306 __ LoadSImmediate(S4, 1.0); |
| 3307 __ LoadSImmediate(S5, 4.0); |
| 3308 __ LoadSImmediate(S6, 9.0); |
| 3309 __ LoadSImmediate(S7, 16.0); |
| 3310 |
| 3311 // Reciprocal square root estimate. |
| 3312 __ vrsqrteqs(Q0, Q1); |
| 3313 // 2 Newton-Raphson steps. xn+1 = xn * (3 - Q1*xn^2) / 2. |
| 3314 // First step. |
| 3315 __ vmulqs(Q2, Q0, Q0); // Q2 <- xn^2 |
| 3316 __ vrsqrtsqs(Q2, Q1, Q2); // Q2 <- (3 - Q1*Q2) / 2. |
| 3317 __ vmulqs(Q0, Q0, Q2); // xn+1 <- xn * Q2 |
| 3318 // Second step. |
| 3319 __ vmulqs(Q2, Q0, Q0); |
| 3320 __ vrsqrtsqs(Q2, Q1, Q2); |
| 3321 __ vmulqs(Q0, Q0, Q2); |
| 3322 |
| 3323 // Reciprocal. |
| 3324 __ vmovq(Q1, Q0); |
| 3325 // Reciprocal estimate. |
| 3326 __ vrecpeqs(Q0, Q1); |
| 3327 // 2 Newton-Raphson steps. |
| 3328 __ vrecpsqs(Q2, Q1, Q0); |
| 3329 __ vmulqs(Q0, Q0, Q2); |
| 3330 __ vrecpsqs(Q2, Q1, Q0); |
| 3331 __ vmulqs(Q0, Q0, Q2); |
| 3332 |
| 3333 __ vadds(S0, S0, S1); |
| 3334 __ vadds(S0, S0, S2); |
| 3335 __ vadds(S0, S0, S3); |
| 3336 |
| 3337 __ bx(LR); |
| 3338 } else { |
| 3339 __ LoadSImmediate(S0, 10.0); |
| 3340 __ bx(LR); |
| 3341 } |
| 3342 } |
| 3343 |
| 3344 |
| 3345 ASSEMBLER_TEST_RUN(SIMDSqrt2, test) { |
| 3346 EXPECT(test != NULL); |
| 3347 typedef float (*SIMDSqrt2)(); |
| 3348 float res = EXECUTE_TEST_CODE_FLOAT(SIMDSqrt2, test->entry()); |
| 3349 EXPECT_FLOAT_EQ(10.0, res, 0.0001f); |
| 3350 } |
| 3351 |
| 3352 |
| 3353 ASSEMBLER_TEST_GENERATE(SIMDDiv, assembler) { |
| 3354 if (CPUFeatures::neon_supported()) { |
| 3355 __ LoadSImmediate(S4, 1.0); |
| 3356 __ LoadSImmediate(S5, 4.0); |
| 3357 __ LoadSImmediate(S6, 9.0); |
| 3358 __ LoadSImmediate(S7, 16.0); |
| 3359 |
| 3360 __ LoadSImmediate(S12, 4.0); |
| 3361 __ LoadSImmediate(S13, 16.0); |
| 3362 __ LoadSImmediate(S14, 36.0); |
| 3363 __ LoadSImmediate(S15, 64.0); |
| 3364 |
| 3365 // Reciprocal estimate. |
| 3366 __ vrecpeqs(Q0, Q1); |
| 3367 // 2 Newton-Raphson steps. |
| 3368 __ vrecpsqs(Q2, Q1, Q0); |
| 3369 __ vmulqs(Q0, Q0, Q2); |
| 3370 __ vrecpsqs(Q2, Q1, Q0); |
| 3371 __ vmulqs(Q0, Q0, Q2); |
| 3372 |
| 3373 __ vmulqs(Q0, Q3, Q0); |
| 3374 __ vadds(S0, S0, S1); |
| 3375 __ vadds(S0, S0, S2); |
| 3376 __ vadds(S0, S0, S3); |
| 3377 |
| 3378 __ bx(LR); |
| 3379 } else { |
| 3380 __ LoadSImmediate(S0, 16.0); |
| 3381 __ bx(LR); |
| 3382 } |
| 3383 } |
| 3384 |
| 3385 |
| 3386 ASSEMBLER_TEST_RUN(SIMDDiv, test) { |
| 3387 EXPECT(test != NULL); |
| 3388 typedef float (*SIMDDiv)(); |
| 3389 float res = EXECUTE_TEST_CODE_FLOAT(SIMDDiv, test->entry()); |
| 3390 EXPECT_FLOAT_EQ(16.0, res, 0.0001f); |
| 3391 } |
| 3392 |
| 3393 |
3117 // Called from assembler_test.cc. | 3394 // Called from assembler_test.cc. |
3118 // LR: return address. | 3395 // LR: return address. |
3119 // R0: context. | 3396 // R0: context. |
3120 // R1: value. | 3397 // R1: value. |
3121 // R2: growable array. | 3398 // R2: growable array. |
3122 ASSEMBLER_TEST_GENERATE(StoreIntoObject, assembler) { | 3399 ASSEMBLER_TEST_GENERATE(StoreIntoObject, assembler) { |
3123 __ PushList((1 << CTX) | (1 << LR)); | 3400 __ PushList((1 << CTX) | (1 << LR)); |
3124 __ mov(CTX, ShifterOperand(R0)); | 3401 __ mov(CTX, ShifterOperand(R0)); |
3125 __ StoreIntoObject(R2, | 3402 __ StoreIntoObject(R2, |
3126 FieldAddress(R2, GrowableObjectArray::data_offset()), | 3403 FieldAddress(R2, GrowableObjectArray::data_offset()), |
3127 R1); | 3404 R1); |
3128 __ PopList((1 << CTX) | (1 << LR)); | 3405 __ PopList((1 << CTX) | (1 << LR)); |
3129 __ Ret(); | 3406 __ Ret(); |
3130 } | 3407 } |
3131 | 3408 |
3132 } // namespace dart | 3409 } // namespace dart |
3133 | 3410 |
3134 #endif // defined TARGET_ARCH_ARM | 3411 #endif // defined TARGET_ARCH_ARM |
OLD | NEW |