Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(111)

Side by Side Diff: test/cctest/test-assembler-arm.cc

Issue 2804883008: [WASM SIMD] Implement horizontal add for float and integer types. (Closed)
Patch Set: Fix MIPS. Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 1279 matching lines...) Expand 10 before | Expand all | Expand 10 after
1290 uint32_t vmovl_s8[4], vmovl_u16[4], vmovl_s32[4]; 1290 uint32_t vmovl_s8[4], vmovl_u16[4], vmovl_s32[4];
1291 uint32_t vqmovn_s8[2], vqmovn_u16[2], vqmovn_s32[2]; 1291 uint32_t vqmovn_s8[2], vqmovn_u16[2], vqmovn_s32[2];
1292 int32_t vcvt_s32_f32[4]; 1292 int32_t vcvt_s32_f32[4];
1293 uint32_t vcvt_u32_f32[4]; 1293 uint32_t vcvt_u32_f32[4];
1294 float vcvt_f32_s32[4], vcvt_f32_u32[4]; 1294 float vcvt_f32_s32[4], vcvt_f32_u32[4];
1295 uint32_t vdup8[4], vdup16[4], vdup32[4]; 1295 uint32_t vdup8[4], vdup16[4], vdup32[4];
1296 float vabsf[4], vnegf[4]; 1296 float vabsf[4], vnegf[4];
1297 uint32_t vabs_s8[4], vabs_s16[4], vabs_s32[4]; 1297 uint32_t vabs_s8[4], vabs_s16[4], vabs_s32[4];
1298 uint32_t vneg_s8[4], vneg_s16[4], vneg_s32[4]; 1298 uint32_t vneg_s8[4], vneg_s16[4], vneg_s32[4];
1299 uint32_t veor[4], vand[4], vorr[4]; 1299 uint32_t veor[4], vand[4], vorr[4];
1300 float vdupf[4], vaddf[4], vsubf[4], vmulf[4]; 1300 float vdupf[4], vaddf[4], vpaddf[2], vsubf[4], vmulf[4];
1301 uint32_t vmin_s8[4], vmin_u16[4], vmin_s32[4]; 1301 uint32_t vmin_s8[4], vmin_u16[4], vmin_s32[4];
1302 uint32_t vmax_s8[4], vmax_u16[4], vmax_s32[4]; 1302 uint32_t vmax_s8[4], vmax_u16[4], vmax_s32[4];
1303 uint32_t vpadd_i8[2], vpadd_i16[2], vpadd_i32[2];
1303 uint32_t vpmin_s8[2], vpmin_u16[2], vpmin_s32[2]; 1304 uint32_t vpmin_s8[2], vpmin_u16[2], vpmin_s32[2];
1304 uint32_t vpmax_s8[2], vpmax_u16[2], vpmax_s32[2]; 1305 uint32_t vpmax_s8[2], vpmax_u16[2], vpmax_s32[2];
1305 uint32_t vadd8[4], vadd16[4], vadd32[4]; 1306 uint32_t vadd8[4], vadd16[4], vadd32[4];
1306 uint32_t vqadd_s8[4], vqadd_u16[4], vqadd_s32[4]; 1307 uint32_t vqadd_s8[4], vqadd_u16[4], vqadd_s32[4];
1307 uint32_t vsub8[4], vsub16[4], vsub32[4]; 1308 uint32_t vsub8[4], vsub16[4], vsub32[4];
1308 uint32_t vqsub_u8[4], vqsub_s16[4], vqsub_u32[4]; 1309 uint32_t vqsub_u8[4], vqsub_s16[4], vqsub_u32[4];
1309 uint32_t vmul8[4], vmul16[4], vmul32[4]; 1310 uint32_t vmul8[4], vmul16[4], vmul32[4];
1310 uint32_t vshl8[4], vshl16[4], vshl32[5]; 1311 uint32_t vshl8[4], vshl16[4], vshl32[5];
1311 uint32_t vshr_s8[4], vshr_u16[4], vshr_s32[5]; 1312 uint32_t vshr_s8[4], vshr_u16[4], vshr_s32[5];
1312 uint32_t vceq[4], vceqf[4], vcgef[4], vcgtf[4]; 1313 uint32_t vceq[4], vceqf[4], vcgef[4], vcgtf[4];
(...skipping 225 matching lines...) Expand 10 before | Expand all | Expand 10 after
1538 __ vmax(q1, q1, q0); 1539 __ vmax(q1, q1, q0);
1539 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vmaxf)))); 1540 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vmaxf))));
1540 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); 1541 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
1541 // vadd (float). 1542 // vadd (float).
1542 __ vmov(s4, 1.0); 1543 __ vmov(s4, 1.0);
1543 __ vdup(q0, s4); 1544 __ vdup(q0, s4);
1544 __ vdup(q1, s4); 1545 __ vdup(q1, s4);
1545 __ vadd(q1, q1, q0); 1546 __ vadd(q1, q1, q0);
1546 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vaddf)))); 1547 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vaddf))));
1547 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); 1548 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
1549 // vpadd (float).
1550 __ vmov(s4, 1.0);
1551 __ vdup(q0, s4);
1552 __ vpadd(d2, d0, d1);
1553 __ vstr(d2, r0, offsetof(T, vpaddf));
1548 // vsub (float). 1554 // vsub (float).
1549 __ vmov(s4, 2.0); 1555 __ vmov(s4, 2.0);
1550 __ vdup(q0, s4); 1556 __ vdup(q0, s4);
1551 __ vmov(s4, 1.0); 1557 __ vmov(s4, 1.0);
1552 __ vdup(q1, s4); 1558 __ vdup(q1, s4);
1553 __ vsub(q1, q1, q0); 1559 __ vsub(q1, q1, q0);
1554 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vsubf)))); 1560 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vsubf))));
1555 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); 1561 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
1556 // vmul (float). 1562 // vmul (float).
1557 __ vmov(s4, 2.0); 1563 __ vmov(s4, 2.0);
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after
1630 __ mov(r4, Operand(0xff)); 1636 __ mov(r4, Operand(0xff));
1631 __ vdup(Neon32, q0, r4); 1637 __ vdup(Neon32, q0, r4);
1632 __ vdup(Neon8, q1, r4); 1638 __ vdup(Neon8, q1, r4);
1633 __ vmin(NeonS32, q2, q0, q1); 1639 __ vmin(NeonS32, q2, q0, q1);
1634 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vmin_s32)))); 1640 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vmin_s32))));
1635 __ vst1(Neon8, NeonListOperand(q2), NeonMemOperand(r4)); 1641 __ vst1(Neon8, NeonListOperand(q2), NeonMemOperand(r4));
1636 __ vmax(NeonS32, q2, q0, q1); 1642 __ vmax(NeonS32, q2, q0, q1);
1637 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vmax_s32)))); 1643 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vmax_s32))));
1638 __ vst1(Neon8, NeonListOperand(q2), NeonMemOperand(r4)); 1644 __ vst1(Neon8, NeonListOperand(q2), NeonMemOperand(r4));
1639 1645
1646 // vpadd integer.
1647 __ mov(r4, Operand(0x03));
1648 __ vdup(Neon16, q0, r4);
1649 __ vdup(Neon8, q1, r4);
1650 __ vpadd(Neon8, d0, d0, d2);
1651 __ vstr(d0, r0, offsetof(T, vpadd_i8));
1652 __ vpadd(Neon16, d0, d0, d2);
1653 __ vstr(d0, r0, offsetof(T, vpadd_i16));
1654 __ vpadd(Neon32, d0, d0, d2);
1655 __ vstr(d0, r0, offsetof(T, vpadd_i32));
1656
1640 // vpmin/vpmax integer. 1657 // vpmin/vpmax integer.
1641 __ mov(r4, Operand(0x03)); 1658 __ mov(r4, Operand(0x03));
1642 __ vdup(Neon16, q0, r4); 1659 __ vdup(Neon16, q0, r4);
1643 __ vdup(Neon8, q1, r4); 1660 __ vdup(Neon8, q1, r4);
1644 __ vpmin(NeonS8, d4, d0, d2); 1661 __ vpmin(NeonS8, d4, d0, d2);
1645 __ vstr(d4, r0, offsetof(T, vpmin_s8)); 1662 __ vstr(d4, r0, offsetof(T, vpmin_s8));
1646 __ vpmax(NeonS8, d4, d0, d2); 1663 __ vpmax(NeonS8, d4, d0, d2);
1647 __ vstr(d4, r0, offsetof(T, vpmax_s8)); 1664 __ vstr(d4, r0, offsetof(T, vpmax_s8));
1648 __ mov(r4, Operand(0xffff)); 1665 __ mov(r4, Operand(0xffff));
1649 __ vdup(Neon32, q0, r4); 1666 __ vdup(Neon32, q0, r4);
(...skipping 458 matching lines...) Expand 10 before | Expand all | Expand 10 after
2108 CHECK_EQ_32X4(vabs_s16, 0x7f7f7f7fu, 0x01010101u, 0x00010001u, 0x7f807f80u); 2125 CHECK_EQ_32X4(vabs_s16, 0x7f7f7f7fu, 0x01010101u, 0x00010001u, 0x7f807f80u);
2109 CHECK_EQ_32X4(vabs_s32, 0x7f7f7f7fu, 0x01010101u, 0x00000001u, 0x7f7f7f80u); 2126 CHECK_EQ_32X4(vabs_s32, 0x7f7f7f7fu, 0x01010101u, 0x00000001u, 0x7f7f7f80u);
2110 CHECK_EQ_32X4(vneg_s8, 0x81818181u, 0xffffffffu, 0x01010101u, 0x80808080u); 2127 CHECK_EQ_32X4(vneg_s8, 0x81818181u, 0xffffffffu, 0x01010101u, 0x80808080u);
2111 CHECK_EQ_32X4(vneg_s16, 0x80818081u, 0xfefffeffu, 0x00010001u, 0x7f807f80u); 2128 CHECK_EQ_32X4(vneg_s16, 0x80818081u, 0xfefffeffu, 0x00010001u, 0x7f807f80u);
2112 CHECK_EQ_32X4(vneg_s32, 0x80808081u, 0xfefefeffu, 0x00000001u, 0x7f7f7f80u); 2129 CHECK_EQ_32X4(vneg_s32, 0x80808081u, 0xfefefeffu, 0x00000001u, 0x7f7f7f80u);
2113 2130
2114 CHECK_EQ_SPLAT(veor, 0x00ff00ffu); 2131 CHECK_EQ_SPLAT(veor, 0x00ff00ffu);
2115 CHECK_EQ_SPLAT(vand, 0x00fe00feu); 2132 CHECK_EQ_SPLAT(vand, 0x00fe00feu);
2116 CHECK_EQ_SPLAT(vorr, 0x00ff00ffu); 2133 CHECK_EQ_SPLAT(vorr, 0x00ff00ffu);
2117 CHECK_EQ_SPLAT(vaddf, 2.0); 2134 CHECK_EQ_SPLAT(vaddf, 2.0);
2135 CHECK_EQ_32X2(vpaddf, 2.0, 2.0);
2118 CHECK_EQ_SPLAT(vminf, 1.0); 2136 CHECK_EQ_SPLAT(vminf, 1.0);
2119 CHECK_EQ_SPLAT(vmaxf, 2.0); 2137 CHECK_EQ_SPLAT(vmaxf, 2.0);
2120 CHECK_EQ_SPLAT(vsubf, -1.0); 2138 CHECK_EQ_SPLAT(vsubf, -1.0);
2121 CHECK_EQ_SPLAT(vmulf, 4.0); 2139 CHECK_EQ_SPLAT(vmulf, 4.0);
2122 CHECK_ESTIMATE_SPLAT(vrecpe, 0.5f, 0.1f); // 1 / 2 2140 CHECK_ESTIMATE_SPLAT(vrecpe, 0.5f, 0.1f); // 1 / 2
2123 CHECK_EQ_SPLAT(vrecps, -1.0f); // 2 - (2 * 1.5) 2141 CHECK_EQ_SPLAT(vrecps, -1.0f); // 2 - (2 * 1.5)
2124 CHECK_ESTIMATE_SPLAT(vrsqrte, 0.5f, 0.1f); // 1 / sqrt(4) 2142 CHECK_ESTIMATE_SPLAT(vrsqrte, 0.5f, 0.1f); // 1 / sqrt(4)
2125 CHECK_EQ_SPLAT(vrsqrts, -1.0f); // (3 - (2 * 2.5)) / 2 2143 CHECK_EQ_SPLAT(vrsqrts, -1.0f); // (3 - (2 * 2.5)) / 2
2126 CHECK_EQ_SPLAT(vceqf, 0xffffffffu); 2144 CHECK_EQ_SPLAT(vceqf, 0xffffffffu);
2127 // [0] >= [-1, 1, -0, 0] 2145 // [0] >= [-1, 1, -0, 0]
2128 CHECK_EQ_32X4(vcgef, 0u, 0xffffffffu, 0xffffffffu, 0xffffffffu); 2146 CHECK_EQ_32X4(vcgef, 0u, 0xffffffffu, 0xffffffffu, 0xffffffffu);
2129 CHECK_EQ_32X4(vcgtf, 0u, 0xffffffffu, 0u, 0u); 2147 CHECK_EQ_32X4(vcgtf, 0u, 0xffffffffu, 0u, 0u);
2130 // [0, 3, 0, 3, ...] and [3, 3, 3, 3, ...] 2148 // [0, 3, 0, 3, ...] and [3, 3, 3, 3, ...]
2131 CHECK_EQ_SPLAT(vmin_s8, 0x00030003u); 2149 CHECK_EQ_SPLAT(vmin_s8, 0x00030003u);
2132 CHECK_EQ_SPLAT(vmax_s8, 0x03030303u); 2150 CHECK_EQ_SPLAT(vmax_s8, 0x03030303u);
2133 // [0x00ff, 0x00ff, ...] and [0xffff, 0xffff, ...] 2151 // [0x00ff, 0x00ff, ...] and [0xffff, 0xffff, ...]
2134 CHECK_EQ_SPLAT(vmin_u16, 0x00ff00ffu); 2152 CHECK_EQ_SPLAT(vmin_u16, 0x00ff00ffu);
2135 CHECK_EQ_SPLAT(vmax_u16, 0xffffffffu); 2153 CHECK_EQ_SPLAT(vmax_u16, 0xffffffffu);
2136 // [0x000000ff, 0x000000ff, ...] and [0xffffffff, 0xffffffff, ...] 2154 // [0x000000ff, 0x000000ff, ...] and [0xffffffff, 0xffffffff, ...]
2137 CHECK_EQ_SPLAT(vmin_s32, 0xffffffffu); 2155 CHECK_EQ_SPLAT(vmin_s32, 0xffffffffu);
2138 CHECK_EQ_SPLAT(vmax_s32, 0xffu); 2156 CHECK_EQ_SPLAT(vmax_s32, 0xffu);
2139 // [0, 3, 0, 3, ...] and [3, 3, 3, 3, ...] 2157 // [0, 3, 0, 3, ...] and [3, 3, 3, 3, ...]
2158 CHECK_EQ_32X2(vpadd_i8, 0x03030303u, 0x06060606u);
2159 CHECK_EQ_32X2(vpadd_i16, 0x0c0c0606u, 0x06060606u);
2160 CHECK_EQ_32X2(vpadd_i32, 0x12120c0cu, 0x06060606u);
2140 CHECK_EQ_32X2(vpmin_s8, 0x00000000u, 0x03030303u); 2161 CHECK_EQ_32X2(vpmin_s8, 0x00000000u, 0x03030303u);
2141 CHECK_EQ_32X2(vpmax_s8, 0x03030303u, 0x03030303u); 2162 CHECK_EQ_32X2(vpmax_s8, 0x03030303u, 0x03030303u);
2142 // [0, ffff, 0, ffff] and [ffff, ffff] 2163 // [0, ffff, 0, ffff] and [ffff, ffff]
2143 CHECK_EQ_32X2(vpmin_u16, 0x00000000u, 0xffffffffu); 2164 CHECK_EQ_32X2(vpmin_u16, 0x00000000u, 0xffffffffu);
2144 CHECK_EQ_32X2(vpmax_u16, 0xffffffffu, 0xffffffffu); 2165 CHECK_EQ_32X2(vpmax_u16, 0xffffffffu, 0xffffffffu);
2145 // [0x000000ff, 0x00000000u] and [0xffffffff, 0xffffffff, ...] 2166 // [0x000000ff, 0x00000000u] and [0xffffffff, 0xffffffff, ...]
2146 CHECK_EQ_32X2(vpmin_s32, 0x00u, 0xffffffffu); 2167 CHECK_EQ_32X2(vpmin_s32, 0x00u, 0xffffffffu);
2147 CHECK_EQ_32X2(vpmax_s32, 0xffu, 0xffffffffu); 2168 CHECK_EQ_32X2(vpmax_s32, 0xffu, 0xffffffffu);
2148 CHECK_EQ_SPLAT(vadd8, 0x03030303u); 2169 CHECK_EQ_SPLAT(vadd8, 0x03030303u);
2149 CHECK_EQ_SPLAT(vadd16, 0x00030003u); 2170 CHECK_EQ_SPLAT(vadd16, 0x00030003u);
(...skipping 1759 matching lines...) Expand 10 before | Expand all | Expand 10 after
3909 HandleScope scope(isolate); 3930 HandleScope scope(isolate);
3910 3931
3911 Assembler assm(isolate, NULL, 0); 3932 Assembler assm(isolate, NULL, 0);
3912 __ mov(r0, Operand(isolate->factory()->infinity_value())); 3933 __ mov(r0, Operand(isolate->factory()->infinity_value()));
3913 __ BlockConstPoolFor(1019); 3934 __ BlockConstPoolFor(1019);
3914 for (int i = 0; i < 1019; ++i) __ nop(); 3935 for (int i = 0; i < 1019; ++i) __ nop();
3915 __ vldr(d0, MemOperand(r0, 0)); 3936 __ vldr(d0, MemOperand(r0, 0));
3916 } 3937 }
3917 3938
3918 #undef __ 3939 #undef __
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698