Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(217)

Side by Side Diff: runtime/vm/assembler_arm_test.cc

Issue 19875002: Adds reciprocal squre root SIMD instructions for ARM. (Closed) Base URL: http://dart.googlecode.com/svn/branches/bleeding_edge/dart/
Patch Set: Created 7 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « runtime/vm/assembler_arm.cc ('k') | runtime/vm/constants_arm.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 #include "vm/globals.h" 5 #include "vm/globals.h"
6 #if defined(TARGET_ARCH_ARM) 6 #if defined(TARGET_ARCH_ARM)
7 7
8 #include "vm/assembler.h" 8 #include "vm/assembler.h"
9 #include "vm/os.h" 9 #include "vm/os.h"
10 #include "vm/unit_test.h" 10 #include "vm/unit_test.h"
(...skipping 3052 matching lines...) Expand 10 before | Expand all | Expand 10 after
3063 3063
3064 __ LoadSImmediate(S8, 10.0); 3064 __ LoadSImmediate(S8, 10.0);
3065 __ LoadSImmediate(S9, 1.0); 3065 __ LoadSImmediate(S9, 1.0);
3066 __ LoadSImmediate(S10, 6.0); 3066 __ LoadSImmediate(S10, 6.0);
3067 __ LoadSImmediate(S11, 3.0); 3067 __ LoadSImmediate(S11, 3.0);
3068 3068
3069 __ vrecpsqs(Q0, Q1, Q2); 3069 __ vrecpsqs(Q0, Q1, Q2);
3070 3070
3071 __ bx(LR); 3071 __ bx(LR);
3072 } else { 3072 } else {
3073 __ LoadSImmediate(S0, 2.0 - 10.0 * 5.0);
3073 __ bx(LR); 3074 __ bx(LR);
3074 } 3075 }
3075 } 3076 }
3076 3077
3077 3078
3078 ASSEMBLER_TEST_RUN(Vrecpsqs, test) { 3079 ASSEMBLER_TEST_RUN(Vrecpsqs, test) {
3079 EXPECT(test != NULL); 3080 EXPECT(test != NULL);
3080 typedef float (*Vrecpsqs)(); 3081 typedef float (*Vrecpsqs)();
3081 float res = EXECUTE_TEST_CODE_FLOAT(Vrecpsqs, test->entry()); 3082 float res = EXECUTE_TEST_CODE_FLOAT(Vrecpsqs, test->entry());
3082 EXPECT_FLOAT_EQ(2 - 10.0 * 5.0, res, 0.0001f); 3083 EXPECT_FLOAT_EQ(2.0 - 10.0 * 5.0, res, 0.0001f);
3083 } 3084 }
3084 3085
3085 3086
3086 ASSEMBLER_TEST_GENERATE(Reciprocal, assembler) { 3087 ASSEMBLER_TEST_GENERATE(Reciprocal, assembler) {
3087 if (CPUFeatures::neon_supported()) { 3088 if (CPUFeatures::neon_supported()) {
3088 __ LoadSImmediate(S4, 147000.0); 3089 __ LoadSImmediate(S4, 147000.0);
3089 __ vmovs(S5, S4); 3090 __ vmovs(S5, S4);
3090 __ vmovs(S6, S4); 3091 __ vmovs(S6, S4);
3091 __ vmovs(S7, S4); 3092 __ vmovs(S7, S4);
3092 3093
(...skipping 14 matching lines...) Expand all
3107 3108
3108 3109
3109 ASSEMBLER_TEST_RUN(Reciprocal, test) { 3110 ASSEMBLER_TEST_RUN(Reciprocal, test) {
3110 EXPECT(test != NULL); 3111 EXPECT(test != NULL);
3111 typedef float (*Reciprocal)(); 3112 typedef float (*Reciprocal)();
3112 float res = EXECUTE_TEST_CODE_FLOAT(Reciprocal, test->entry()); 3113 float res = EXECUTE_TEST_CODE_FLOAT(Reciprocal, test->entry());
3113 EXPECT_FLOAT_EQ(1.0/147000.0, res, 0.0001f); 3114 EXPECT_FLOAT_EQ(1.0/147000.0, res, 0.0001f);
3114 } 3115 }
3115 3116
3116 3117
3118 static float arm_reciprocal_sqrt_estimate(float a) {
3119 // From the ARM Architecture Reference Manual A2-87.
3120 if (isinf(a) || (abs(a) >= exp2f(126))) return 0.0;
3121 else if (a == 0.0) return INFINITY;
3122 else if (isnan(a)) return a;
3123
3124 uint32_t a_bits = bit_cast<uint32_t, float>(a);
3125 uint64_t scaled;
3126 if (((a_bits >> 23) & 1) != 0) {
3127 // scaled = '0 01111111101' : operand<22:0> : Zeros(29)
3128 scaled = (static_cast<uint64_t>(0x3fd) << 52) |
3129 ((static_cast<uint64_t>(a_bits) & 0x7fffff) << 29);
3130 } else {
3131 // scaled = '0 01111111110' : operand<22:0> : Zeros(29)
3132 scaled = (static_cast<uint64_t>(0x3fe) << 52) |
3133 ((static_cast<uint64_t>(a_bits) & 0x7fffff) << 29);
3134 }
3135 // result_exp = (380 - UInt(operand<30:23>) DIV 2;
3136 int32_t result_exp = (380 - ((a_bits >> 23) & 0xff)) / 2;
3137
3138 double scaled_d = bit_cast<double, uint64_t>(scaled);
3139 ASSERT((scaled_d >= 0.25) && (scaled_d < 1.0));
3140
3141 double r;
3142 if (scaled_d < 0.5) {
3143 // range 0.25 <= a < 0.5
3144
3145 // a in units of 1/512 rounded down.
3146 int32_t q0 = static_cast<int32_t>(scaled_d * 512.0);
3147 // reciprocal root r.
3148 r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0);
3149 } else {
3150 // range 0.5 <= a < 1.0
3151
3152 // a in units of 1/256 rounded down.
3153 int32_t q1 = static_cast<int32_t>(scaled_d * 256.0);
3154 // reciprocal root r.
3155 r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0);
3156 }
3157 // r in units of 1/256 rounded to nearest.
3158 int32_t s = static_cast<int>(256.0 * r + 0.5);
3159 double estimate = static_cast<double>(s) / 256.0;
3160 ASSERT((estimate >= 1.0) && (estimate <= (511.0/256.0)));
3161
3162 // result = 0 : result_exp<7:0> : estimate<51:29>
3163 int32_t result_bits = ((result_exp & 0xff) << 23) |
3164 ((bit_cast<uint64_t, double>(estimate) >> 29) & 0x7fffff);
3165 return bit_cast<float, int32_t>(result_bits);
3166 }
3167
3168
3169 ASSEMBLER_TEST_GENERATE(Vrsqrteqs, assembler) {
3170 if (CPUFeatures::neon_supported()) {
3171 __ LoadSImmediate(S4, 147.0);
3172 __ vmovs(S5, S4);
3173 __ vmovs(S6, S4);
3174 __ vmovs(S7, S4);
3175
3176 __ vrsqrteqs(Q0, Q1);
3177
3178 __ bx(LR);
3179 } else {
3180 __ LoadSImmediate(S0, arm_reciprocal_sqrt_estimate(147.0));
3181 __ bx(LR);
3182 }
3183 }
3184
3185
3186 ASSEMBLER_TEST_RUN(Vrsqrteqs, test) {
3187 EXPECT(test != NULL);
3188 typedef float (*Vrsqrteqs)();
3189 float res = EXECUTE_TEST_CODE_FLOAT(Vrsqrteqs, test->entry());
3190 EXPECT_FLOAT_EQ(arm_reciprocal_sqrt_estimate(147.0), res, 0.0001f);
3191 }
3192
3193
3194 ASSEMBLER_TEST_GENERATE(Vrsqrtsqs, assembler) {
3195 if (CPUFeatures::neon_supported()) {
3196 __ LoadSImmediate(S4, 5.0);
3197 __ LoadSImmediate(S5, 2.0);
3198 __ LoadSImmediate(S6, 3.0);
3199 __ LoadSImmediate(S7, 4.0);
3200
3201 __ LoadSImmediate(S8, 10.0);
3202 __ LoadSImmediate(S9, 1.0);
3203 __ LoadSImmediate(S10, 6.0);
3204 __ LoadSImmediate(S11, 3.0);
3205
3206 __ vrsqrtsqs(Q0, Q1, Q2);
3207
3208 __ bx(LR);
3209 } else {
3210 __ LoadSImmediate(S0, (3.0 - 10.0 * 5.0) / 2.0);
3211 __ bx(LR);
3212 }
3213 }
3214
3215
3216 ASSEMBLER_TEST_RUN(Vrsqrtsqs, test) {
3217 EXPECT(test != NULL);
3218 typedef float (*Vrsqrtsqs)();
3219 float res = EXECUTE_TEST_CODE_FLOAT(Vrsqrtsqs, test->entry());
3220 EXPECT_FLOAT_EQ((3.0 - 10.0 * 5.0)/2.0, res, 0.0001f);
3221 }
3222
3223
3224 ASSEMBLER_TEST_GENERATE(ReciprocalSqrt, assembler) {
3225 if (CPUFeatures::neon_supported()) {
3226 __ LoadSImmediate(S4, 147000.0);
3227 __ vmovs(S5, S4);
3228 __ vmovs(S6, S4);
3229 __ vmovs(S7, S4);
3230
3231 // Reciprocal square root estimate.
3232 __ vrsqrteqs(Q0, Q1);
3233 // 2 Newton-Raphson steps. xn+1 = xn * (3 - Q1*xn^2) / 2.
3234 // First step.
3235 __ vmulqs(Q2, Q0, Q0); // Q2 <- xn^2
3236 __ vrsqrtsqs(Q2, Q1, Q2); // Q2 <- (3 - Q1*Q2) / 2.
3237 __ vmulqs(Q0, Q0, Q2); // xn+1 <- xn * Q2
3238 // Second step.
3239 __ vmulqs(Q2, Q0, Q0);
3240 __ vrsqrtsqs(Q2, Q1, Q2);
3241 __ vmulqs(Q0, Q0, Q2);
3242
3243 __ bx(LR);
3244 } else {
3245 __ LoadSImmediate(S0, 1.0/sqrt(147000.0));
3246 __ bx(LR);
3247 }
3248 }
3249
3250
3251 ASSEMBLER_TEST_RUN(ReciprocalSqrt, test) {
3252 EXPECT(test != NULL);
3253 typedef float (*ReciprocalSqrt)();
3254 float res = EXECUTE_TEST_CODE_FLOAT(ReciprocalSqrt, test->entry());
3255 EXPECT_FLOAT_EQ(1.0/sqrt(147000.0), res, 0.0001f);
3256 }
3257
3258
3259 ASSEMBLER_TEST_GENERATE(SIMDSqrt, assembler) {
3260 if (CPUFeatures::neon_supported()) {
3261 __ LoadSImmediate(S4, 147000.0);
3262 __ vmovs(S5, S4);
3263 __ vmovs(S6, S4);
3264 __ vmovs(S7, S4);
3265
3266 // Reciprocal square root estimate.
3267 __ vrsqrteqs(Q0, Q1);
3268 // 2 Newton-Raphson steps. xn+1 = xn * (3 - Q1*xn^2) / 2.
3269 // First step.
3270 __ vmulqs(Q2, Q0, Q0); // Q2 <- xn^2
3271 __ vrsqrtsqs(Q2, Q1, Q2); // Q2 <- (3 - Q1*Q2) / 2.
3272 __ vmulqs(Q0, Q0, Q2); // xn+1 <- xn * Q2
3273 // Second step.
3274 __ vmulqs(Q2, Q0, Q0);
3275 __ vrsqrtsqs(Q2, Q1, Q2);
3276 __ vmulqs(Q0, Q0, Q2);
3277
3278 // Reciprocal.
3279 __ vmovq(Q1, Q0);
3280 // Reciprocal estimate.
3281 __ vrecpeqs(Q0, Q1);
3282 // 2 Newton-Raphson steps.
3283 __ vrecpsqs(Q2, Q1, Q0);
3284 __ vmulqs(Q0, Q0, Q2);
3285 __ vrecpsqs(Q2, Q1, Q0);
3286 __ vmulqs(Q0, Q0, Q2);
3287
3288 __ bx(LR);
3289 } else {
3290 __ LoadSImmediate(S0, sqrt(147000.0));
3291 __ bx(LR);
3292 }
3293 }
3294
3295
3296 ASSEMBLER_TEST_RUN(SIMDSqrt, test) {
3297 EXPECT(test != NULL);
3298 typedef float (*SIMDSqrt)();
3299 float res = EXECUTE_TEST_CODE_FLOAT(SIMDSqrt, test->entry());
3300 EXPECT_FLOAT_EQ(sqrt(147000.0), res, 0.0001f);
3301 }
3302
3303
3304 ASSEMBLER_TEST_GENERATE(SIMDSqrt2, assembler) {
3305 if (CPUFeatures::neon_supported()) {
3306 __ LoadSImmediate(S4, 1.0);
3307 __ LoadSImmediate(S5, 4.0);
3308 __ LoadSImmediate(S6, 9.0);
3309 __ LoadSImmediate(S7, 16.0);
3310
3311 // Reciprocal square root estimate.
3312 __ vrsqrteqs(Q0, Q1);
3313 // 2 Newton-Raphson steps. xn+1 = xn * (3 - Q1*xn^2) / 2.
3314 // First step.
3315 __ vmulqs(Q2, Q0, Q0); // Q2 <- xn^2
3316 __ vrsqrtsqs(Q2, Q1, Q2); // Q2 <- (3 - Q1*Q2) / 2.
3317 __ vmulqs(Q0, Q0, Q2); // xn+1 <- xn * Q2
3318 // Second step.
3319 __ vmulqs(Q2, Q0, Q0);
3320 __ vrsqrtsqs(Q2, Q1, Q2);
3321 __ vmulqs(Q0, Q0, Q2);
3322
3323 // Reciprocal.
3324 __ vmovq(Q1, Q0);
3325 // Reciprocal estimate.
3326 __ vrecpeqs(Q0, Q1);
3327 // 2 Newton-Raphson steps.
3328 __ vrecpsqs(Q2, Q1, Q0);
3329 __ vmulqs(Q0, Q0, Q2);
3330 __ vrecpsqs(Q2, Q1, Q0);
3331 __ vmulqs(Q0, Q0, Q2);
3332
3333 __ vadds(S0, S0, S1);
3334 __ vadds(S0, S0, S2);
3335 __ vadds(S0, S0, S3);
3336
3337 __ bx(LR);
3338 } else {
3339 __ LoadSImmediate(S0, 10.0);
3340 __ bx(LR);
3341 }
3342 }
3343
3344
3345 ASSEMBLER_TEST_RUN(SIMDSqrt2, test) {
3346 EXPECT(test != NULL);
3347 typedef float (*SIMDSqrt2)();
3348 float res = EXECUTE_TEST_CODE_FLOAT(SIMDSqrt2, test->entry());
3349 EXPECT_FLOAT_EQ(10.0, res, 0.0001f);
3350 }
3351
3352
3353 ASSEMBLER_TEST_GENERATE(SIMDDiv, assembler) {
3354 if (CPUFeatures::neon_supported()) {
3355 __ LoadSImmediate(S4, 1.0);
3356 __ LoadSImmediate(S5, 4.0);
3357 __ LoadSImmediate(S6, 9.0);
3358 __ LoadSImmediate(S7, 16.0);
3359
3360 __ LoadSImmediate(S12, 4.0);
3361 __ LoadSImmediate(S13, 16.0);
3362 __ LoadSImmediate(S14, 36.0);
3363 __ LoadSImmediate(S15, 64.0);
3364
3365 // Reciprocal estimate.
3366 __ vrecpeqs(Q0, Q1);
3367 // 2 Newton-Raphson steps.
3368 __ vrecpsqs(Q2, Q1, Q0);
3369 __ vmulqs(Q0, Q0, Q2);
3370 __ vrecpsqs(Q2, Q1, Q0);
3371 __ vmulqs(Q0, Q0, Q2);
3372
3373 __ vmulqs(Q0, Q3, Q0);
3374 __ vadds(S0, S0, S1);
3375 __ vadds(S0, S0, S2);
3376 __ vadds(S0, S0, S3);
3377
3378 __ bx(LR);
3379 } else {
3380 __ LoadSImmediate(S0, 16.0);
3381 __ bx(LR);
3382 }
3383 }
3384
3385
3386 ASSEMBLER_TEST_RUN(SIMDDiv, test) {
3387 EXPECT(test != NULL);
3388 typedef float (*SIMDDiv)();
3389 float res = EXECUTE_TEST_CODE_FLOAT(SIMDDiv, test->entry());
3390 EXPECT_FLOAT_EQ(16.0, res, 0.0001f);
3391 }
3392
3393
3117 // Called from assembler_test.cc. 3394 // Called from assembler_test.cc.
3118 // LR: return address. 3395 // LR: return address.
3119 // R0: context. 3396 // R0: context.
3120 // R1: value. 3397 // R1: value.
3121 // R2: growable array. 3398 // R2: growable array.
3122 ASSEMBLER_TEST_GENERATE(StoreIntoObject, assembler) { 3399 ASSEMBLER_TEST_GENERATE(StoreIntoObject, assembler) {
3123 __ PushList((1 << CTX) | (1 << LR)); 3400 __ PushList((1 << CTX) | (1 << LR));
3124 __ mov(CTX, ShifterOperand(R0)); 3401 __ mov(CTX, ShifterOperand(R0));
3125 __ StoreIntoObject(R2, 3402 __ StoreIntoObject(R2,
3126 FieldAddress(R2, GrowableObjectArray::data_offset()), 3403 FieldAddress(R2, GrowableObjectArray::data_offset()),
3127 R1); 3404 R1);
3128 __ PopList((1 << CTX) | (1 << LR)); 3405 __ PopList((1 << CTX) | (1 << LR));
3129 __ Ret(); 3406 __ Ret();
3130 } 3407 }
3131 3408
3132 } // namespace dart 3409 } // namespace dart
3133 3410
3134 #endif // defined TARGET_ARCH_ARM 3411 #endif // defined TARGET_ARCH_ARM
OLDNEW
« no previous file with comments | « runtime/vm/assembler_arm.cc ('k') | runtime/vm/constants_arm.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698