| Index: test/cctest/test-assembler-arm.cc
|
| diff --git a/test/cctest/test-assembler-arm.cc b/test/cctest/test-assembler-arm.cc
|
| index ea1d0c2a2cc5915ef9249c44e32418909d02c85b..44b35f8d46139f1eb3fb8c576ff9845371257928 100644
|
| --- a/test/cctest/test-assembler-arm.cc
|
| +++ b/test/cctest/test-assembler-arm.cc
|
| @@ -1298,6 +1298,7 @@ TEST(15) {
|
| uint32_t vneg_s8[4], vneg_s16[4], vneg_s32[4];
|
| uint32_t veor[4], vand[4], vorr[4];
|
| float vdupf[4], vaddf[4], vpaddf[2], vsubf[4], vmulf[4];
|
| + uint32_t vdupf_16[2], vdupf_8[4];
|
| uint32_t vmin_s8[4], vmin_u16[4], vmin_s32[4];
|
| uint32_t vmax_s8[4], vmax_u16[4], vmax_s32[4];
|
| uint32_t vpadd_i8[2], vpadd_i16[2], vpadd_i32[2];
|
| @@ -1310,6 +1311,7 @@ TEST(15) {
|
| uint32_t vmul8[4], vmul16[4], vmul32[4];
|
| uint32_t vshl8[4], vshl16[4], vshl32[5];
|
| uint32_t vshr_s8[4], vshr_u16[4], vshr_s32[5];
|
| + uint32_t vsli_64[2], vsri_64[2], vsli_32[2], vsri_32[2];
|
| uint32_t vceq[4], vceqf[4], vcgef[4], vcgtf[4];
|
| uint32_t vcge_s8[4], vcge_u16[4], vcge_s32[4];
|
| uint32_t vcgt_s8[4], vcgt_u16[4], vcgt_s32[4];
|
| @@ -1440,7 +1442,7 @@ TEST(15) {
|
| __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vcvt_f32_u32))));
|
| __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
|
|
|
| - // vdup (integer).
|
| + // vdup (from register).
|
| __ mov(r4, Operand(0xa));
|
| __ vdup(Neon8, q0, r4);
|
| __ vdup(Neon16, q1, r4);
|
| @@ -1452,11 +1454,16 @@ TEST(15) {
|
| __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vdup32))));
|
| __ vst1(Neon8, NeonListOperand(q2), NeonMemOperand(r4));
|
|
|
| - // vdup (float).
|
| + // vdup (from scalar).
|
| __ vmov(s0, -1.0);
|
| - __ vdup(q0, s0);
|
| + __ vdup(Neon32, q1, d0, 0);
|
| __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vdupf))));
|
| - __ vst1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
|
| + __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
|
| + __ vdup(Neon16, d2, d0, 1);
|
| + __ vstr(d2, r0, offsetof(T, vdupf_16));
|
| + __ vdup(Neon8, q1, d0, 3);
|
| + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vdupf_8))));
|
| + __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
|
|
|
| // vabs (float).
|
| __ vmov(s0, -1.0);
|
| @@ -1525,24 +1532,24 @@ TEST(15) {
|
|
|
| // vmin (float).
|
| __ vmov(s4, 2.0);
|
| - __ vdup(q0, s4);
|
| + __ vdup(Neon32, q0, d2, 0);
|
| __ vmov(s4, 1.0);
|
| - __ vdup(q1, s4);
|
| + __ vdup(Neon32, q1, d2, 0);
|
| __ vmin(q1, q1, q0);
|
| __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vminf))));
|
| __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
|
| // vmax (float).
|
| __ vmov(s4, 2.0);
|
| - __ vdup(q0, s4);
|
| + __ vdup(Neon32, q0, d2, 0);
|
| __ vmov(s4, 1.0);
|
| - __ vdup(q1, s4);
|
| + __ vdup(Neon32, q1, d2, 0);
|
| __ vmax(q1, q1, q0);
|
| __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vmaxf))));
|
| __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
|
| // vadd (float).
|
| __ vmov(s4, 1.0);
|
| - __ vdup(q0, s4);
|
| - __ vdup(q1, s4);
|
| + __ vdup(Neon32, q0, d2, 0);
|
| + __ vdup(Neon32, q1, d2, 0);
|
| __ vadd(q1, q1, q0);
|
| __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vaddf))));
|
| __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
|
| @@ -1555,51 +1562,51 @@ TEST(15) {
|
| __ vstr(d2, r0, offsetof(T, vpaddf));
|
| // vsub (float).
|
| __ vmov(s4, 2.0);
|
| - __ vdup(q0, s4);
|
| + __ vdup(Neon32, q0, d2, 0);
|
| __ vmov(s4, 1.0);
|
| - __ vdup(q1, s4);
|
| + __ vdup(Neon32, q1, d2, 0);
|
| __ vsub(q1, q1, q0);
|
| __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vsubf))));
|
| __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
|
| // vmul (float).
|
| __ vmov(s4, 2.0);
|
| - __ vdup(q0, s4);
|
| - __ vdup(q1, s4);
|
| + __ vdup(Neon32, q0, d2, 0);
|
| + __ vdup(Neon32, q1, d2, 0);
|
| __ vmul(q1, q1, q0);
|
| __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vmulf))));
|
| __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
|
| // vrecpe.
|
| __ vmov(s4, 2.0);
|
| - __ vdup(q0, s4);
|
| + __ vdup(Neon32, q0, d2, 0);
|
| __ vrecpe(q1, q0);
|
| __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vrecpe))));
|
| __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
|
| // vrecps.
|
| __ vmov(s4, 2.0);
|
| - __ vdup(q0, s4);
|
| + __ vdup(Neon32, q0, d2, 0);
|
| __ vmov(s4, 1.5);
|
| - __ vdup(q1, s4);
|
| + __ vdup(Neon32, q1, d2, 0);
|
| __ vrecps(q1, q0, q1);
|
| __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vrecps))));
|
| __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
|
| // vrsqrte.
|
| __ vmov(s4, 4.0);
|
| - __ vdup(q0, s4);
|
| + __ vdup(Neon32, q0, d2, 0);
|
| __ vrsqrte(q1, q0);
|
| __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vrsqrte))));
|
| __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
|
| // vrsqrts.
|
| __ vmov(s4, 2.0);
|
| - __ vdup(q0, s4);
|
| + __ vdup(Neon32, q0, d2, 0);
|
| __ vmov(s4, 2.5);
|
| - __ vdup(q1, s4);
|
| + __ vdup(Neon32, q1, d2, 0);
|
| __ vrsqrts(q1, q0, q1);
|
| __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vrsqrts))));
|
| __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
|
| // vceq (float).
|
| __ vmov(s4, 1.0);
|
| - __ vdup(q0, s4);
|
| - __ vdup(q1, s4);
|
| + __ vdup(Neon32, q0, d2, 0);
|
| + __ vdup(Neon32, q1, d2, 0);
|
| __ vceq(q1, q1, q0);
|
| __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vceqf))));
|
| __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
|
| @@ -1608,7 +1615,7 @@ TEST(15) {
|
| __ vmov(s1, -1.0);
|
| __ vmov(s2, -0.0);
|
| __ vmov(s3, 0.0);
|
| - __ vdup(q1, s3);
|
| + __ vdup(Neon32, q1, d1, 1);
|
| __ vcge(q2, q1, q0);
|
| __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vcgef))));
|
| __ vst1(Neon8, NeonListOperand(q2), NeonMemOperand(r4));
|
| @@ -1814,6 +1821,26 @@ TEST(15) {
|
| __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vshr_s32))));
|
| __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
|
|
|
| + // vsli, vsri.
|
| + __ mov(r4, Operand(0xffffffff));
|
| + __ mov(r5, Operand(0x1));
|
| + __ vmov(d0, r4, r5);
|
| + __ vmov(d1, r5, r5);
|
| + __ vsli(Neon64, d1, d0, 32);
|
| + __ vstr(d1, r0, offsetof(T, vsli_64));
|
| + __ vmov(d0, r5, r4);
|
| + __ vmov(d1, r5, r5);
|
| + __ vsri(Neon64, d1, d0, 32);
|
| + __ vstr(d1, r0, offsetof(T, vsri_64));
|
| + __ vmov(d0, r4, r5);
|
| + __ vmov(d1, r5, r5);
|
| + __ vsli(Neon32, d1, d0, 16);
|
| + __ vstr(d1, r0, offsetof(T, vsli_32));
|
| + __ vmov(d0, r5, r4);
|
| + __ vmov(d1, r5, r5);
|
| + __ vsri(Neon32, d1, d0, 16);
|
| + __ vstr(d1, r0, offsetof(T, vsri_32));
|
| +
|
| // vceq.
|
| __ mov(r4, Operand(0x03));
|
| __ vdup(Neon8, q0, r4);
|
| @@ -2107,7 +2134,9 @@ TEST(15) {
|
| CHECK_EQ_SPLAT(vdup8, 0x0a0a0a0au);
|
| CHECK_EQ_SPLAT(vdup16, 0x000a000au);
|
| CHECK_EQ_SPLAT(vdup32, 0x0000000au);
|
| - CHECK_EQ_SPLAT(vdupf, -1.0);
|
| + CHECK_EQ_SPLAT(vdupf, -1.0); // bit pattern is 0xbf800000.
|
| + CHECK_EQ_32X2(vdupf_16, 0xbf80bf80u, 0xbf80bf80u);
|
| + CHECK_EQ_SPLAT(vdupf_8, 0xbfbfbfbfu);
|
|
|
| // src: [-1, -1, 1, 1]
|
| CHECK_EQ_32X4(vcvt_s32_f32, -1, -1, 1, 1);
|
| @@ -2189,6 +2218,10 @@ TEST(15) {
|
| CHECK_EQ_SPLAT(vshr_s8, 0xc0c0c0c0u);
|
| CHECK_EQ_SPLAT(vshr_u16, 0x00400040u);
|
| CHECK_EQ_SPLAT(vshr_s32, 0xffffc040u);
|
| + CHECK_EQ_32X2(vsli_64, 0x01u, 0xffffffffu);
|
| + CHECK_EQ_32X2(vsri_64, 0xffffffffu, 0x01u);
|
| + CHECK_EQ_32X2(vsli_32, 0xffff0001u, 0x00010001u);
|
| + CHECK_EQ_32X2(vsri_32, 0x00000000u, 0x0000ffffu);
|
| CHECK_EQ_SPLAT(vceq, 0x00ff00ffu);
|
| // [0, 3, 0, 3, ...] >= [3, 3, 3, 3, ...]
|
| CHECK_EQ_SPLAT(vcge_s8, 0x00ff00ffu);
|
| @@ -3840,11 +3873,8 @@ TEST(vswp) {
|
| const uint32_t test_2 = 0x89abcdef;
|
| __ mov(r4, Operand(test_1));
|
| __ mov(r5, Operand(test_2));
|
| - // TODO(bbudge) replace with vdup when implemented.
|
| - __ vmov(d8, r4, r4);
|
| - __ vmov(d9, r4, r4); // q4 = [1.0, 1.0]
|
| - __ vmov(d10, r5, r5);
|
| - __ vmov(d11, r5, r5); // q5 = [-1.0, -1.0]
|
| + __ vdup(Neon32, q4, r4);
|
| + __ vdup(Neon32, q5, r5);
|
| __ vswp(q4, q5);
|
| __ add(r6, r0, Operand(static_cast<int32_t>(offsetof(T, vswp_q4))));
|
| __ vst1(Neon8, NeonListOperand(q4), NeonMemOperand(r6));
|
|
|