Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(718)

Side by Side Diff: test/cctest/test-assembler-arm.cc

Issue 2804883008: [WASM SIMD] Implement horizontal add for float and integer types. (Closed)
Patch Set: Rebase, reformat. Created 3 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/wasm/wasm-opcodes.cc ('k') | test/cctest/test-disasm-arm.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 1279 matching lines...) Expand 10 before | Expand all | Expand 10 after
1290 uint32_t vmovl_s8[4], vmovl_u16[4], vmovl_s32[4]; 1290 uint32_t vmovl_s8[4], vmovl_u16[4], vmovl_s32[4];
1291 uint32_t vqmovn_s8[2], vqmovn_u16[2], vqmovn_s32[2]; 1291 uint32_t vqmovn_s8[2], vqmovn_u16[2], vqmovn_s32[2];
1292 int32_t vcvt_s32_f32[4]; 1292 int32_t vcvt_s32_f32[4];
1293 uint32_t vcvt_u32_f32[4]; 1293 uint32_t vcvt_u32_f32[4];
1294 float vcvt_f32_s32[4], vcvt_f32_u32[4]; 1294 float vcvt_f32_s32[4], vcvt_f32_u32[4];
1295 uint32_t vdup8[4], vdup16[4], vdup32[4]; 1295 uint32_t vdup8[4], vdup16[4], vdup32[4];
1296 float vabsf[4], vnegf[4]; 1296 float vabsf[4], vnegf[4];
1297 uint32_t vabs_s8[4], vabs_s16[4], vabs_s32[4]; 1297 uint32_t vabs_s8[4], vabs_s16[4], vabs_s32[4];
1298 uint32_t vneg_s8[4], vneg_s16[4], vneg_s32[4]; 1298 uint32_t vneg_s8[4], vneg_s16[4], vneg_s32[4];
1299 uint32_t veor[4], vand[4], vorr[4]; 1299 uint32_t veor[4], vand[4], vorr[4];
1300 float vdupf[4], vaddf[4], vsubf[4], vmulf[4]; 1300 float vdupf[4], vaddf[4], vpaddf[2], vsubf[4], vmulf[4];
1301 uint32_t vmin_s8[4], vmin_u16[4], vmin_s32[4]; 1301 uint32_t vmin_s8[4], vmin_u16[4], vmin_s32[4];
1302 uint32_t vmax_s8[4], vmax_u16[4], vmax_s32[4]; 1302 uint32_t vmax_s8[4], vmax_u16[4], vmax_s32[4];
1303 uint32_t vpadd_i8[2], vpadd_i16[2], vpadd_i32[2];
1303 uint32_t vpmin_s8[2], vpmin_u16[2], vpmin_s32[2]; 1304 uint32_t vpmin_s8[2], vpmin_u16[2], vpmin_s32[2];
1304 uint32_t vpmax_s8[2], vpmax_u16[2], vpmax_s32[2]; 1305 uint32_t vpmax_s8[2], vpmax_u16[2], vpmax_s32[2];
1305 uint32_t vadd8[4], vadd16[4], vadd32[4]; 1306 uint32_t vadd8[4], vadd16[4], vadd32[4];
1306 uint32_t vqadd_s8[4], vqadd_u16[4], vqadd_s32[4]; 1307 uint32_t vqadd_s8[4], vqadd_u16[4], vqadd_s32[4];
1307 uint32_t vsub8[4], vsub16[4], vsub32[4]; 1308 uint32_t vsub8[4], vsub16[4], vsub32[4];
1308 uint32_t vqsub_u8[4], vqsub_s16[4], vqsub_u32[4]; 1309 uint32_t vqsub_u8[4], vqsub_s16[4], vqsub_u32[4];
1309 uint32_t vmul8[4], vmul16[4], vmul32[4]; 1310 uint32_t vmul8[4], vmul16[4], vmul32[4];
1310 uint32_t vshl8[4], vshl16[4], vshl32[5]; 1311 uint32_t vshl8[4], vshl16[4], vshl32[5];
1311 uint32_t vshr_s8[4], vshr_u16[4], vshr_s32[5]; 1312 uint32_t vshr_s8[4], vshr_u16[4], vshr_s32[5];
1312 uint32_t vceq[4], vceqf[4], vcgef[4], vcgtf[4]; 1313 uint32_t vceq[4], vceqf[4], vcgef[4], vcgtf[4];
(...skipping 225 matching lines...) Expand 10 before | Expand all | Expand 10 after
1538 __ vmax(q1, q1, q0); 1539 __ vmax(q1, q1, q0);
1539 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vmaxf)))); 1540 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vmaxf))));
1540 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); 1541 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
1541 // vadd (float). 1542 // vadd (float).
1542 __ vmov(s4, 1.0); 1543 __ vmov(s4, 1.0);
1543 __ vdup(q0, s4); 1544 __ vdup(q0, s4);
1544 __ vdup(q1, s4); 1545 __ vdup(q1, s4);
1545 __ vadd(q1, q1, q0); 1546 __ vadd(q1, q1, q0);
1546 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vaddf)))); 1547 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vaddf))));
1547 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); 1548 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
1549 // vpadd (float).
1550 __ vmov(s0, 1.0);
1551 __ vmov(s1, 2.0);
1552 __ vmov(s2, 3.0);
1553 __ vmov(s3, 4.0);
1554 __ vpadd(d2, d0, d1);
1555 __ vstr(d2, r0, offsetof(T, vpaddf));
1548 // vsub (float). 1556 // vsub (float).
1549 __ vmov(s4, 2.0); 1557 __ vmov(s4, 2.0);
1550 __ vdup(q0, s4); 1558 __ vdup(q0, s4);
1551 __ vmov(s4, 1.0); 1559 __ vmov(s4, 1.0);
1552 __ vdup(q1, s4); 1560 __ vdup(q1, s4);
1553 __ vsub(q1, q1, q0); 1561 __ vsub(q1, q1, q0);
1554 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vsubf)))); 1562 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vsubf))));
1555 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); 1563 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
1556 // vmul (float). 1564 // vmul (float).
1557 __ vmov(s4, 2.0); 1565 __ vmov(s4, 2.0);
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after
1630 __ mov(r4, Operand(0xff)); 1638 __ mov(r4, Operand(0xff));
1631 __ vdup(Neon32, q0, r4); 1639 __ vdup(Neon32, q0, r4);
1632 __ vdup(Neon8, q1, r4); 1640 __ vdup(Neon8, q1, r4);
1633 __ vmin(NeonS32, q2, q0, q1); 1641 __ vmin(NeonS32, q2, q0, q1);
1634 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vmin_s32)))); 1642 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vmin_s32))));
1635 __ vst1(Neon8, NeonListOperand(q2), NeonMemOperand(r4)); 1643 __ vst1(Neon8, NeonListOperand(q2), NeonMemOperand(r4));
1636 __ vmax(NeonS32, q2, q0, q1); 1644 __ vmax(NeonS32, q2, q0, q1);
1637 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vmax_s32)))); 1645 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vmax_s32))));
1638 __ vst1(Neon8, NeonListOperand(q2), NeonMemOperand(r4)); 1646 __ vst1(Neon8, NeonListOperand(q2), NeonMemOperand(r4));
1639 1647
1648 // vpadd integer.
1649 __ mov(r4, Operand(0x03));
1650 __ vdup(Neon16, q0, r4);
1651 __ vdup(Neon8, q1, r4);
1652 __ vpadd(Neon8, d0, d0, d2);
1653 __ vstr(d0, r0, offsetof(T, vpadd_i8));
1654 __ vpadd(Neon16, d0, d0, d2);
1655 __ vstr(d0, r0, offsetof(T, vpadd_i16));
1656 __ vpadd(Neon32, d0, d0, d2);
1657 __ vstr(d0, r0, offsetof(T, vpadd_i32));
1658
1640 // vpmin/vpmax integer. 1659 // vpmin/vpmax integer.
1641 __ mov(r4, Operand(0x03)); 1660 __ mov(r4, Operand(0x03));
1642 __ vdup(Neon16, q0, r4); 1661 __ vdup(Neon16, q0, r4);
1643 __ vdup(Neon8, q1, r4); 1662 __ vdup(Neon8, q1, r4);
1644 __ vpmin(NeonS8, d4, d0, d2); 1663 __ vpmin(NeonS8, d4, d0, d2);
1645 __ vstr(d4, r0, offsetof(T, vpmin_s8)); 1664 __ vstr(d4, r0, offsetof(T, vpmin_s8));
1646 __ vpmax(NeonS8, d4, d0, d2); 1665 __ vpmax(NeonS8, d4, d0, d2);
1647 __ vstr(d4, r0, offsetof(T, vpmax_s8)); 1666 __ vstr(d4, r0, offsetof(T, vpmax_s8));
1648 __ mov(r4, Operand(0xffff)); 1667 __ mov(r4, Operand(0xffff));
1649 __ vdup(Neon32, q0, r4); 1668 __ vdup(Neon32, q0, r4);
(...skipping 458 matching lines...) Expand 10 before | Expand all | Expand 10 after
2108 CHECK_EQ_32X4(vabs_s16, 0x7f7f7f7fu, 0x01010101u, 0x00010001u, 0x7f807f80u); 2127 CHECK_EQ_32X4(vabs_s16, 0x7f7f7f7fu, 0x01010101u, 0x00010001u, 0x7f807f80u);
2109 CHECK_EQ_32X4(vabs_s32, 0x7f7f7f7fu, 0x01010101u, 0x00000001u, 0x7f7f7f80u); 2128 CHECK_EQ_32X4(vabs_s32, 0x7f7f7f7fu, 0x01010101u, 0x00000001u, 0x7f7f7f80u);
2110 CHECK_EQ_32X4(vneg_s8, 0x81818181u, 0xffffffffu, 0x01010101u, 0x80808080u); 2129 CHECK_EQ_32X4(vneg_s8, 0x81818181u, 0xffffffffu, 0x01010101u, 0x80808080u);
2111 CHECK_EQ_32X4(vneg_s16, 0x80818081u, 0xfefffeffu, 0x00010001u, 0x7f807f80u); 2130 CHECK_EQ_32X4(vneg_s16, 0x80818081u, 0xfefffeffu, 0x00010001u, 0x7f807f80u);
2112 CHECK_EQ_32X4(vneg_s32, 0x80808081u, 0xfefefeffu, 0x00000001u, 0x7f7f7f80u); 2131 CHECK_EQ_32X4(vneg_s32, 0x80808081u, 0xfefefeffu, 0x00000001u, 0x7f7f7f80u);
2113 2132
2114 CHECK_EQ_SPLAT(veor, 0x00ff00ffu); 2133 CHECK_EQ_SPLAT(veor, 0x00ff00ffu);
2115 CHECK_EQ_SPLAT(vand, 0x00fe00feu); 2134 CHECK_EQ_SPLAT(vand, 0x00fe00feu);
2116 CHECK_EQ_SPLAT(vorr, 0x00ff00ffu); 2135 CHECK_EQ_SPLAT(vorr, 0x00ff00ffu);
2117 CHECK_EQ_SPLAT(vaddf, 2.0); 2136 CHECK_EQ_SPLAT(vaddf, 2.0);
2137 CHECK_EQ_32X2(vpaddf, 3.0, 7.0);
2118 CHECK_EQ_SPLAT(vminf, 1.0); 2138 CHECK_EQ_SPLAT(vminf, 1.0);
2119 CHECK_EQ_SPLAT(vmaxf, 2.0); 2139 CHECK_EQ_SPLAT(vmaxf, 2.0);
2120 CHECK_EQ_SPLAT(vsubf, -1.0); 2140 CHECK_EQ_SPLAT(vsubf, -1.0);
2121 CHECK_EQ_SPLAT(vmulf, 4.0); 2141 CHECK_EQ_SPLAT(vmulf, 4.0);
2122 CHECK_ESTIMATE_SPLAT(vrecpe, 0.5f, 0.1f); // 1 / 2 2142 CHECK_ESTIMATE_SPLAT(vrecpe, 0.5f, 0.1f); // 1 / 2
2123 CHECK_EQ_SPLAT(vrecps, -1.0f); // 2 - (2 * 1.5) 2143 CHECK_EQ_SPLAT(vrecps, -1.0f); // 2 - (2 * 1.5)
2124 CHECK_ESTIMATE_SPLAT(vrsqrte, 0.5f, 0.1f); // 1 / sqrt(4) 2144 CHECK_ESTIMATE_SPLAT(vrsqrte, 0.5f, 0.1f); // 1 / sqrt(4)
2125 CHECK_EQ_SPLAT(vrsqrts, -1.0f); // (3 - (2 * 2.5)) / 2 2145 CHECK_EQ_SPLAT(vrsqrts, -1.0f); // (3 - (2 * 2.5)) / 2
2126 CHECK_EQ_SPLAT(vceqf, 0xffffffffu); 2146 CHECK_EQ_SPLAT(vceqf, 0xffffffffu);
2127 // [0] >= [-1, 1, -0, 0] 2147 // [0] >= [-1, 1, -0, 0]
2128 CHECK_EQ_32X4(vcgef, 0u, 0xffffffffu, 0xffffffffu, 0xffffffffu); 2148 CHECK_EQ_32X4(vcgef, 0u, 0xffffffffu, 0xffffffffu, 0xffffffffu);
2129 CHECK_EQ_32X4(vcgtf, 0u, 0xffffffffu, 0u, 0u); 2149 CHECK_EQ_32X4(vcgtf, 0u, 0xffffffffu, 0u, 0u);
2130 // [0, 3, 0, 3, ...] and [3, 3, 3, 3, ...] 2150 // [0, 3, 0, 3, ...] and [3, 3, 3, 3, ...]
2131 CHECK_EQ_SPLAT(vmin_s8, 0x00030003u); 2151 CHECK_EQ_SPLAT(vmin_s8, 0x00030003u);
2132 CHECK_EQ_SPLAT(vmax_s8, 0x03030303u); 2152 CHECK_EQ_SPLAT(vmax_s8, 0x03030303u);
2133 // [0x00ff, 0x00ff, ...] and [0xffff, 0xffff, ...] 2153 // [0x00ff, 0x00ff, ...] and [0xffff, 0xffff, ...]
2134 CHECK_EQ_SPLAT(vmin_u16, 0x00ff00ffu); 2154 CHECK_EQ_SPLAT(vmin_u16, 0x00ff00ffu);
2135 CHECK_EQ_SPLAT(vmax_u16, 0xffffffffu); 2155 CHECK_EQ_SPLAT(vmax_u16, 0xffffffffu);
2136 // [0x000000ff, 0x000000ff, ...] and [0xffffffff, 0xffffffff, ...] 2156 // [0x000000ff, 0x000000ff, ...] and [0xffffffff, 0xffffffff, ...]
2137 CHECK_EQ_SPLAT(vmin_s32, 0xffffffffu); 2157 CHECK_EQ_SPLAT(vmin_s32, 0xffffffffu);
2138 CHECK_EQ_SPLAT(vmax_s32, 0xffu); 2158 CHECK_EQ_SPLAT(vmax_s32, 0xffu);
2139 // [0, 3, 0, 3, ...] and [3, 3, 3, 3, ...] 2159 // [0, 3, 0, 3, ...] and [3, 3, 3, 3, ...]
2160 CHECK_EQ_32X2(vpadd_i8, 0x03030303u, 0x06060606u);
2161 CHECK_EQ_32X2(vpadd_i16, 0x0c0c0606u, 0x06060606u);
2162 CHECK_EQ_32X2(vpadd_i32, 0x12120c0cu, 0x06060606u);
2140 CHECK_EQ_32X2(vpmin_s8, 0x00000000u, 0x03030303u); 2163 CHECK_EQ_32X2(vpmin_s8, 0x00000000u, 0x03030303u);
2141 CHECK_EQ_32X2(vpmax_s8, 0x03030303u, 0x03030303u); 2164 CHECK_EQ_32X2(vpmax_s8, 0x03030303u, 0x03030303u);
2142 // [0, ffff, 0, ffff] and [ffff, ffff] 2165 // [0, ffff, 0, ffff] and [ffff, ffff]
2143 CHECK_EQ_32X2(vpmin_u16, 0x00000000u, 0xffffffffu); 2166 CHECK_EQ_32X2(vpmin_u16, 0x00000000u, 0xffffffffu);
2144 CHECK_EQ_32X2(vpmax_u16, 0xffffffffu, 0xffffffffu); 2167 CHECK_EQ_32X2(vpmax_u16, 0xffffffffu, 0xffffffffu);
2145 // [0x000000ff, 0x00000000u] and [0xffffffff, 0xffffffff, ...] 2168 // [0x000000ff, 0x00000000u] and [0xffffffff, 0xffffffff, ...]
2146 CHECK_EQ_32X2(vpmin_s32, 0x00u, 0xffffffffu); 2169 CHECK_EQ_32X2(vpmin_s32, 0x00u, 0xffffffffu);
2147 CHECK_EQ_32X2(vpmax_s32, 0xffu, 0xffffffffu); 2170 CHECK_EQ_32X2(vpmax_s32, 0xffu, 0xffffffffu);
2148 CHECK_EQ_SPLAT(vadd8, 0x03030303u); 2171 CHECK_EQ_SPLAT(vadd8, 0x03030303u);
2149 CHECK_EQ_SPLAT(vadd16, 0x00030003u); 2172 CHECK_EQ_SPLAT(vadd16, 0x00030003u);
(...skipping 1759 matching lines...) Expand 10 before | Expand all | Expand 10 after
3909 HandleScope scope(isolate); 3932 HandleScope scope(isolate);
3910 3933
3911 Assembler assm(isolate, NULL, 0); 3934 Assembler assm(isolate, NULL, 0);
3912 __ mov(r0, Operand(isolate->factory()->infinity_value())); 3935 __ mov(r0, Operand(isolate->factory()->infinity_value()));
3913 __ BlockConstPoolFor(1019); 3936 __ BlockConstPoolFor(1019);
3914 for (int i = 0; i < 1019; ++i) __ nop(); 3937 for (int i = 0; i < 1019; ++i) __ nop();
3915 __ vldr(d0, MemOperand(r0, 0)); 3938 __ vldr(d0, MemOperand(r0, 0));
3916 } 3939 }
3917 3940
3918 #undef __ 3941 #undef __
OLDNEW
« no previous file with comments | « src/wasm/wasm-opcodes.cc ('k') | test/cctest/test-disasm-arm.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698