runtime/vm/simulator_arm.cc - Issue 19875002: Adds reciprocal squre root SIMD instructions for ARM.

Side by Side Diff: runtime/vm/simulator_arm.cc

Issue 19875002: Adds reciprocal squre root SIMD instructions for ARM. (Closed) Base URL: http://dart.googlecode.com/svn/branches/bleeding_edge/dart/

Patch Set: Created 7 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file	1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file

2 // for details. All rights reserved. Use of this source code is governed by a	2 // for details. All rights reserved. Use of this source code is governed by a

3 // BSD-style license that can be found in the LICENSE file.	3 // BSD-style license that can be found in the LICENSE file.

4	4

5 #include <math.h> // for isnan.	5 #include <math.h> // for isnan.

6 #include <setjmp.h>	6 #include <setjmp.h>

7 #include <stdlib.h>	7 #include <stdlib.h>

8	8

9 #include "vm/globals.h"	9 #include "vm/globals.h"

10 #if defined(TARGET_ARCH_ARM)	10 #if defined(TARGET_ARCH_ARM)

(...skipping 906 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
917 dregisters_[reg] = bit_cast<int64_t, double>(value);	917 dregisters_[reg] = bit_cast<int64_t, double>(value);

918 }	918 }

919	919

920	920

921 double Simulator::get_dregister(DRegister reg) const {	921 double Simulator::get_dregister(DRegister reg) const {

922 ASSERT((reg >= 0) && (reg < kNumberOfDRegisters));	922 ASSERT((reg >= 0) && (reg < kNumberOfDRegisters));

923 return bit_cast<double, int64_t>(dregisters_[reg]);	923 return bit_cast<double, int64_t>(dregisters_[reg]);

924 }	924 }

925	925

926	926

927 void Simulator::set_qregister(QRegister reg, simd_value_t value) {	927 void Simulator::set_qregister(QRegister reg, const simd_value_t& value) {

928 ASSERT((reg >= 0) && (reg < kNumberOfQRegisters));	928 ASSERT((reg >= 0) && (reg < kNumberOfQRegisters));

929 qregisters_[reg].data_[0] = value.data_[0];	929 qregisters_[reg].data_[0] = value.data_[0];

930 qregisters_[reg].data_[1] = value.data_[1];	930 qregisters_[reg].data_[1] = value.data_[1];

931 qregisters_[reg].data_[2] = value.data_[2];	931 qregisters_[reg].data_[2] = value.data_[2];

932 qregisters_[reg].data_[3] = value.data_[3];	932 qregisters_[reg].data_[3] = value.data_[3];

933 }	933 }

934	934

935	935

936 simd_value_t Simulator::get_qregister(QRegister reg) const {	936 void Simulator::get_qregister(QRegister reg, simd_value_t* value) const {

937 ASSERT((reg >= 0) && (reg < kNumberOfQRegisters));	937 ASSERT((reg >= 0) && (reg < kNumberOfQRegisters));

938 return qregisters_[reg];	938 *value = qregisters_[reg];

939 }	939 }

940	940

941	941

942 void Simulator::set_sregister_bits(SRegister reg, int32_t value) {	942 void Simulator::set_sregister_bits(SRegister reg, int32_t value) {

943 ASSERT((reg >= 0) && (reg < kNumberOfSRegisters));	943 ASSERT((reg >= 0) && (reg < kNumberOfSRegisters));

944 sregisters_[reg] = value;	944 sregisters_[reg] = value;

945 }	945 }

946	946

947	947

948 int32_t Simulator::get_sregister_bits(SRegister reg) const {	948 int32_t Simulator::get_sregister_bits(SRegister reg) const {

(...skipping 1944 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2893 } else {	2893 } else {

2894 UnimplementedInstruction(instr);	2894 UnimplementedInstruction(instr);

2895 }	2895 }

2896 }	2896 }

2897 } else {	2897 } else {

2898 UnimplementedInstruction(instr);	2898 UnimplementedInstruction(instr);

2899 }	2899 }

2900 }	2900 }

2901	2901

2902	2902

	2903 static float arm_reciprocal_sqrt_estimate(float a) {

	2904 // From the ARM Architecture Reference Manual A2-87.

	2905 if (isinf(a) \|\| (abs(a) >= exp2f(126))) return 0.0;

	2906 else if (a == 0.0) return INFINITY;

	2907 else if (isnan(a)) return a;

	2908

	2909 uint32_t a_bits = bit_cast<uint32_t, float>(a);

	2910 uint64_t scaled;

	2911 if (((a_bits >> 23) & 1) != 0) {

	2912 // scaled = '0 01111111101' : operand<22:0> : Zeros(29)

	2913 scaled = (static_cast<uint64_t>(0x3fd) << 52) \|

	2914 ((static_cast<uint64_t>(a_bits) & 0x7fffff) << 29);

	2915 } else {

	2916 // scaled = '0 01111111110' : operand<22:0> : Zeros(29)

	2917 scaled = (static_cast<uint64_t>(0x3fe) << 52) \|

	2918 ((static_cast<uint64_t>(a_bits) & 0x7fffff) << 29);

	2919 }

	2920 // result_exp = (380 - UInt(operand<30:23>) DIV 2;

	2921 int32_t result_exp = (380 - ((a_bits >> 23) & 0xff)) / 2;

	2922

	2923 double scaled_d = bit_cast<double, uint64_t>(scaled);

	2924 ASSERT((scaled_d >= 0.25) && (scaled_d < 1.0));

	2925

	2926 double r;

	2927 if (scaled_d < 0.5) {

	2928 // range 0.25 <= a < 0.5

	2929

	2930 // a in units of 1/512 rounded down.

	2931 int32_t q0 = static_cast<int32_t>(scaled_d * 512.0);

	2932 // reciprocal root r.

	2933 r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0);

	2934 } else {

	2935 // range 0.5 <= a < 1.0

	2936

	2937 // a in units of 1/256 rounded down.

	2938 int32_t q1 = static_cast<int32_t>(scaled_d * 256.0);

	2939 // reciprocal root r.

	2940 r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0);

	2941 }

	2942 // r in units of 1/256 rounded to nearest.

	2943 int32_t s = static_cast<int>(256.0 * r + 0.5);

	2944 double estimate = static_cast<double>(s) / 256.0;

	2945 ASSERT((estimate >= 1.0) && (estimate <= (511.0/256.0)));

	2946

	2947 // result = 0 : result_exp<7:0> : estimate<51:29>

	2948 int32_t result_bits = ((result_exp & 0xff) << 23) \|

	2949 ((bit_cast<uint64_t, double>(estimate) >> 29) & 0x7fffff);

	2950 return bit_cast<float, int32_t>(result_bits);

	2951 }

	2952

	2953

2903 static float arm_recip_estimate(float a) {	2954 static float arm_recip_estimate(float a) {

2904 // From the ARM Architecture Reference Manual A2-85.	2955 // From the ARM Architecture Reference Manual A2-85.

2905 if (isinf(a) \|\| (abs(a) >= exp2f(126))) return 0.0;	2956 if (isinf(a) \|\| (abs(a) >= exp2f(126))) return 0.0;

2906 else if (a == 0.0) return INFINITY;	2957 else if (a == 0.0) return INFINITY;

2907 else if (isnan(a)) return a;	2958 else if (isnan(a)) return a;

2908	2959

2909 uint32_t a_bits = bit_cast<uint32_t, float>(a);	2960 uint32_t a_bits = bit_cast<uint32_t, float>(a);

2910 // scaled = '0011 1111 1110' : a<22:0> : Zeros(29)	2961 // scaled = '0011 1111 1110' : a<22:0> : Zeros(29)

2911 uint64_t scaled = (static_cast<uint64_t>(0x3fe) << 52) \|	2962 uint64_t scaled = (static_cast<uint64_t>(0x3fe) << 52) \|

2912 ((static_cast<uint64_t>(a_bits) & 0x7fffff) << 29);	2963 ((static_cast<uint64_t>(a_bits) & 0x7fffff) << 29);

(...skipping 23 matching lines...) Expand all Loading...
2936	2987

2937 void Simulator::DecodeSIMDDataProcessing(Instr* instr) {	2988 void Simulator::DecodeSIMDDataProcessing(Instr* instr) {

2938 ASSERT(instr->ConditionField() == kSpecialCondition);	2989 ASSERT(instr->ConditionField() == kSpecialCondition);

2939	2990

2940 if (instr->Bit(6) == 1) {	2991 if (instr->Bit(6) == 1) {

2941 // Q = 1, Using 128-bit Q registers.	2992 // Q = 1, Using 128-bit Q registers.

2942 const QRegister qd = instr->QdField();	2993 const QRegister qd = instr->QdField();

2943 const QRegister qn = instr->QnField();	2994 const QRegister qn = instr->QnField();

2944 const QRegister qm = instr->QmField();	2995 const QRegister qm = instr->QmField();

2945 simd_value_t s8d;	2996 simd_value_t s8d;

2946 simd_value_t s8n = get_qregister(qn);	2997 simd_value_t s8n;

2947 simd_value_t s8m = get_qregister(qm);	2998 simd_value_t s8m;

	2999

	3000 get_qregister(qn, &s8n);

	3001 get_qregister(qm, &s8m);

2948 int8_t* s8d_8 = reinterpret_cast<int8_t*>(&s8d);	3002 int8_t* s8d_8 = reinterpret_cast<int8_t*>(&s8d);

2949 int8_t* s8n_8 = reinterpret_cast<int8_t*>(&s8n);	3003 int8_t* s8n_8 = reinterpret_cast<int8_t*>(&s8n);

2950 int8_t* s8m_8 = reinterpret_cast<int8_t*>(&s8m);	3004 int8_t* s8m_8 = reinterpret_cast<int8_t*>(&s8m);

2951 uint8_t* s8n_u8 = reinterpret_cast<uint8_t*>(&s8n);	3005 uint8_t* s8n_u8 = reinterpret_cast<uint8_t*>(&s8n);

2952 uint8_t* s8m_u8 = reinterpret_cast<uint8_t*>(&s8m);	3006 uint8_t* s8m_u8 = reinterpret_cast<uint8_t*>(&s8m);

2953 int16_t* s8d_16 = reinterpret_cast<int16_t*>(&s8d);	3007 int16_t* s8d_16 = reinterpret_cast<int16_t*>(&s8d);

2954 int16_t* s8n_16 = reinterpret_cast<int16_t*>(&s8n);	3008 int16_t* s8n_16 = reinterpret_cast<int16_t*>(&s8n);

2955 int16_t* s8m_16 = reinterpret_cast<int16_t*>(&s8m);	3009 int16_t* s8m_16 = reinterpret_cast<int16_t*>(&s8m);

2956 uint16_t* s8n_u16 = reinterpret_cast<uint16_t*>(&s8n);	3010 uint16_t* s8n_u16 = reinterpret_cast<uint16_t*>(&s8n);

2957 uint16_t* s8m_u16 = reinterpret_cast<uint16_t*>(&s8m);	3011 uint16_t* s8m_u16 = reinterpret_cast<uint16_t*>(&s8m);

(...skipping 134 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3092 // Format(instr, "vrecpeq 'qd, 'qm");	3146 // Format(instr, "vrecpeq 'qd, 'qm");

3093 for (int i = 0; i < 4; i++) {	3147 for (int i = 0; i < 4; i++) {

3094 s8d.data_[i].f = arm_recip_estimate(s8m.data_[i].f);	3148 s8d.data_[i].f = arm_recip_estimate(s8m.data_[i].f);

3095 }	3149 }

3096 } else if ((instr->Bits(8, 4) == 15) && (instr->Bit(4) == 1) &&	3150 } else if ((instr->Bits(8, 4) == 15) && (instr->Bit(4) == 1) &&

3097 (instr->Bits(20, 2) == 0) && (instr->Bits(23, 2) == 0)) {	3151 (instr->Bits(20, 2) == 0) && (instr->Bits(23, 2) == 0)) {

3098 // Format(instr, "vrecpsq 'qd, 'qn, 'qm");	3152 // Format(instr, "vrecpsq 'qd, 'qn, 'qm");

3099 for (int i = 0; i < 4; i++) {	3153 for (int i = 0; i < 4; i++) {

3100 s8d.data_[i].f = 2.0 - (s8n.data_[i].f * s8m.data_[i].f);	3154 s8d.data_[i].f = 2.0 - (s8n.data_[i].f * s8m.data_[i].f);

3101 }	3155 }

	3156 } else if ((instr->Bits(8, 4) == 5) && (instr->Bit(4) == 0) &&

	3157 (instr->Bits(20, 2) == 3) && (instr->Bits(23, 2) == 3) &&

	3158 (instr->Bit(7) == 1) && (instr->Bits(16, 4) == 11)) {

	3159 // Format(instr, "vrsqrteqs 'qd, 'qm");

	3160 for (int i = 0; i < 4; i++) {

	3161 s8d.data_[i].f = arm_reciprocal_sqrt_estimate(s8m.data_[i].f);

	3162 }

	3163 } else if ((instr->Bits(8, 4) == 15) && (instr->Bit(4) == 1) &&

	3164 (instr->Bits(20, 2) == 2) && (instr->Bits(23, 2) == 0)) {

	3165 // Format(instr, "vrsqrtsqs 'qd, 'qn, 'qm");

	3166 for (int i = 0; i < 4; i++) {

	3167 s8d.data_[i].f = (3.0 - s8n.data_[i].f * s8m.data_[i].f) / 2.0;

	3168 }

3102 } else if ((instr->Bits(8, 4) == 12) && (instr->Bit(4) == 0) &&	3169 } else if ((instr->Bits(8, 4) == 12) && (instr->Bit(4) == 0) &&

3103 (instr->Bits(20, 2) == 3) && (instr->Bits(23, 2) == 3) &&	3170 (instr->Bits(20, 2) == 3) && (instr->Bits(23, 2) == 3) &&

3104 (instr->Bit(7) == 0)) {	3171 (instr->Bit(7) == 0)) {

3105 DRegister dm = instr->DmField();	3172 DRegister dm = instr->DmField();

3106 int64_t dm_value = get_dregister_bits(dm);	3173 int64_t dm_value = get_dregister_bits(dm);

3107 int32_t imm4 = instr->Bits(16, 4);	3174 int32_t imm4 = instr->Bits(16, 4);

3108 int32_t idx;	3175 int32_t idx;

3109 if ((imm4 & 1) != 0) {	3176 if ((imm4 & 1) != 0) {

3110 // Format(instr, "vdupb 'qd, 'dm['imm4_vdup]");	3177 // Format(instr, "vdupb 'qd, 'dm['imm4_vdup]");

3111 int8_t* dm_b = reinterpret_cast<int8_t*>(&dm_value);	3178 int8_t* dm_b = reinterpret_cast<int8_t*>(&dm_value);

(...skipping 424 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3536 set_register(kExceptionObjectReg, bit_cast<int32_t>(raw_exception));	3603 set_register(kExceptionObjectReg, bit_cast<int32_t>(raw_exception));

3537 set_register(kStackTraceObjectReg, bit_cast<int32_t>(raw_stacktrace));	3604 set_register(kStackTraceObjectReg, bit_cast<int32_t>(raw_stacktrace));

3538 buf->Longjmp();	3605 buf->Longjmp();

3539 }	3606 }

3540	3607

3541 } // namespace dart	3608 } // namespace dart

3542	3609

3543 #endif // !defined(HOST_ARCH_ARM)	3610 #endif // !defined(HOST_ARCH_ARM)

3544	3611

3545 #endif // defined TARGET_ARCH_ARM	3612 #endif // defined TARGET_ARCH_ARM

OLD	NEW

« no previous file with comments | « runtime/vm/simulator_arm.h ('k') | no next file » | no next file with comments »