Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(5)

Side by Side Diff: test/cctest/test-assembler-arm.cc

Issue 2739033002: [ARM] Implement more NEON permutation instructions. (Closed)
Patch Set: Martyn's review comments. Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/arm/simulator-arm.cc ('k') | test/cctest/test-disasm-arm.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 1302 matching lines...) Expand 10 before | Expand all | Expand 10 after
1313 uint32_t vshr_s8[4], vshr_u16[4], vshr_s32[5]; 1313 uint32_t vshr_s8[4], vshr_u16[4], vshr_s32[5];
1314 uint32_t vceq[4], vceqf[4], vcgef[4], vcgtf[4]; 1314 uint32_t vceq[4], vceqf[4], vcgef[4], vcgtf[4];
1315 uint32_t vcge_s8[4], vcge_u16[4], vcge_s32[4]; 1315 uint32_t vcge_s8[4], vcge_u16[4], vcge_s32[4];
1316 uint32_t vcgt_s8[4], vcgt_u16[4], vcgt_s32[4]; 1316 uint32_t vcgt_s8[4], vcgt_u16[4], vcgt_s32[4];
1317 float vrecpe[4], vrecps[4], vrsqrte[4], vrsqrts[4]; 1317 float vrecpe[4], vrecps[4], vrsqrte[4], vrsqrts[4];
1318 float vminf[4], vmaxf[4]; 1318 float vminf[4], vmaxf[4];
1319 uint32_t vtst[4], vbsl[4]; 1319 uint32_t vtst[4], vbsl[4];
1320 uint32_t vext[4]; 1320 uint32_t vext[4];
1321 uint32_t vzip8a[4], vzip8b[4], vzip16a[4], vzip16b[4], vzip32a[4], 1321 uint32_t vzip8a[4], vzip8b[4], vzip16a[4], vzip16b[4], vzip32a[4],
1322 vzip32b[4]; 1322 vzip32b[4];
1323 uint32_t vuzp8a[4], vuzp8b[4], vuzp16a[4], vuzp16b[4], vuzp32a[4],
1324 vuzp32b[4];
1323 uint32_t vrev64_32[4], vrev64_16[4], vrev64_8[4]; 1325 uint32_t vrev64_32[4], vrev64_16[4], vrev64_8[4];
1324 uint32_t vrev32_16[4], vrev32_8[4]; 1326 uint32_t vrev32_16[4], vrev32_8[4], vrev16_8[4];
1325 uint32_t vrev16_8[4]; 1327 uint32_t vtrn8a[4], vtrn8b[4], vtrn16a[4], vtrn16b[4], vtrn32a[4],
1328 vtrn32b[4];
1326 uint32_t vtbl[2], vtbx[2]; 1329 uint32_t vtbl[2], vtbx[2];
1327 } T; 1330 } T;
1328 T t; 1331 T t;
1329 1332
1330 // Create a function that accepts &t, and loads, manipulates, and stores 1333 // Create a function that accepts &t, and loads, manipulates, and stores
1331 // the doubles, floats, and SIMD values. 1334 // the doubles, floats, and SIMD values.
1332 Assembler assm(isolate, NULL, 0); 1335 Assembler assm(isolate, NULL, 0);
1333 1336
1334 if (CpuFeatures::IsSupported(NEON)) { 1337 if (CpuFeatures::IsSupported(NEON)) {
1335 CpuFeatureScope scope(&assm, NEON); 1338 CpuFeatureScope scope(&assm, NEON);
(...skipping 524 matching lines...) Expand 10 before | Expand all | Expand 10 after
1860 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); 1863 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
1861 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, lane_test)))); 1864 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, lane_test))));
1862 __ vld1(Neon8, NeonListOperand(q0), NeonMemOperand(r4)); 1865 __ vld1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
1863 __ vmov(q1, q0); 1866 __ vmov(q1, q0);
1864 __ vzip(Neon32, q0, q1); 1867 __ vzip(Neon32, q0, q1);
1865 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vzip32a)))); 1868 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vzip32a))));
1866 __ vst1(Neon8, NeonListOperand(q0), NeonMemOperand(r4)); 1869 __ vst1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
1867 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vzip32b)))); 1870 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vzip32b))));
1868 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); 1871 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
1869 1872
1873 // vuzp.
1874 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, lane_test))));
1875 __ vld1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
1876 __ vmov(q1, q0);
1877 __ vuzp(Neon8, q0, q1);
1878 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vuzp8a))));
1879 __ vst1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
1880 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vuzp8b))));
1881 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
1882 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, lane_test))));
1883 __ vld1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
1884 __ vmov(q1, q0);
1885 __ vuzp(Neon16, q0, q1);
1886 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vuzp16a))));
1887 __ vst1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
1888 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vuzp16b))));
1889 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
1890 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, lane_test))));
1891 __ vld1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
1892 __ vmov(q1, q0);
1893 __ vuzp(Neon32, q0, q1);
1894 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vuzp32a))));
1895 __ vst1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
1896 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vuzp32b))));
1897 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
1898
1899 // vtrn.
1900 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, lane_test))));
1901 __ vld1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
1902 __ vmov(q1, q0);
1903 __ vtrn(Neon8, q0, q1);
1904 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vtrn8a))));
1905 __ vst1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
1906 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vtrn8b))));
1907 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
1908 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, lane_test))));
1909 __ vld1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
1910 __ vmov(q1, q0);
1911 __ vtrn(Neon16, q0, q1);
1912 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vtrn16a))));
1913 __ vst1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
1914 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vtrn16b))));
1915 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
1916 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, lane_test))));
1917 __ vld1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
1918 __ vmov(q1, q0);
1919 __ vtrn(Neon32, q0, q1);
1920 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vtrn32a))));
1921 __ vst1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
1922 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vtrn32b))));
1923 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
1924
1870 // vrev64/32/16 1925 // vrev64/32/16
1871 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, lane_test)))); 1926 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, lane_test))));
1872 __ vld1(Neon8, NeonListOperand(q0), NeonMemOperand(r4)); 1927 __ vld1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
1873 __ vrev64(Neon32, q1, q0); 1928 __ vrev64(Neon32, q1, q0);
1874 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vrev64_32)))); 1929 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vrev64_32))));
1875 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); 1930 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
1876 __ vrev64(Neon16, q1, q0); 1931 __ vrev64(Neon16, q1, q0);
1877 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vrev64_16)))); 1932 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vrev64_16))));
1878 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); 1933 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
1879 __ vrev64(Neon8, q1, q0); 1934 __ vrev64(Neon8, q1, q0);
(...skipping 187 matching lines...) Expand 10 before | Expand all | Expand 10 after
2067 2122
2068 CHECK_EQ_32X4(vext, 0x06050403u, 0x0a090807u, 0x0e0d0c0bu, 0x0201000fu); 2123 CHECK_EQ_32X4(vext, 0x06050403u, 0x0a090807u, 0x0e0d0c0bu, 0x0201000fu);
2069 2124
2070 CHECK_EQ_32X4(vzip8a, 0x01010000u, 0x03030202u, 0x05050404u, 0x07070606u); 2125 CHECK_EQ_32X4(vzip8a, 0x01010000u, 0x03030202u, 0x05050404u, 0x07070606u);
2071 CHECK_EQ_32X4(vzip8b, 0x09090808u, 0x0b0b0a0au, 0x0d0d0c0cu, 0x0f0f0e0eu); 2126 CHECK_EQ_32X4(vzip8b, 0x09090808u, 0x0b0b0a0au, 0x0d0d0c0cu, 0x0f0f0e0eu);
2072 CHECK_EQ_32X4(vzip16a, 0x01000100u, 0x03020302u, 0x05040504u, 0x07060706u); 2127 CHECK_EQ_32X4(vzip16a, 0x01000100u, 0x03020302u, 0x05040504u, 0x07060706u);
2073 CHECK_EQ_32X4(vzip16b, 0x09080908u, 0x0b0a0b0au, 0x0d0c0d0cu, 0x0f0e0f0eu); 2128 CHECK_EQ_32X4(vzip16b, 0x09080908u, 0x0b0a0b0au, 0x0d0c0d0cu, 0x0f0e0f0eu);
2074 CHECK_EQ_32X4(vzip32a, 0x03020100u, 0x03020100u, 0x07060504u, 0x07060504u); 2129 CHECK_EQ_32X4(vzip32a, 0x03020100u, 0x03020100u, 0x07060504u, 0x07060504u);
2075 CHECK_EQ_32X4(vzip32b, 0x0b0a0908u, 0x0b0a0908u, 0x0f0e0d0cu, 0x0f0e0d0cu); 2130 CHECK_EQ_32X4(vzip32b, 0x0b0a0908u, 0x0b0a0908u, 0x0f0e0d0cu, 0x0f0e0d0cu);
2076 2131
2132 CHECK_EQ_32X4(vuzp8a, 0x06040200u, 0x0e0c0a08u, 0x06040200u, 0x0e0c0a08u);
2133 CHECK_EQ_32X4(vuzp8b, 0x07050301u, 0x0f0d0b09u, 0x07050301u, 0x0f0d0b09u);
2134 CHECK_EQ_32X4(vuzp16a, 0x05040100u, 0x0d0c0908u, 0x05040100u, 0x0d0c0908u);
2135 CHECK_EQ_32X4(vuzp16b, 0x07060302u, 0x0f0e0b0au, 0x07060302u, 0x0f0e0b0au);
2136 CHECK_EQ_32X4(vuzp32a, 0x03020100u, 0x0b0a0908u, 0x03020100u, 0x0b0a0908u);
2137 CHECK_EQ_32X4(vuzp32b, 0x07060504u, 0x0f0e0d0cu, 0x07060504u, 0x0f0e0d0cu);
2138
2139 CHECK_EQ_32X4(vtrn8a, 0x02020000u, 0x06060404u, 0x0a0a0808u, 0x0e0e0c0cu);
2140 CHECK_EQ_32X4(vtrn8b, 0x03030101u, 0x07070505u, 0x0b0b0909u, 0x0f0f0d0du);
2141 CHECK_EQ_32X4(vtrn16a, 0x01000100u, 0x05040504u, 0x09080908u, 0x0d0c0d0cu);
2142 CHECK_EQ_32X4(vtrn16b, 0x03020302u, 0x07060706u, 0x0b0a0b0au, 0x0f0e0f0eu);
2143 CHECK_EQ_32X4(vtrn32a, 0x03020100u, 0x03020100u, 0x0b0a0908u, 0x0b0a0908u);
2144 CHECK_EQ_32X4(vtrn32b, 0x07060504u, 0x07060504u, 0x0f0e0d0cu, 0x0f0e0d0cu);
2145
2077 // src: 0 1 2 3 4 5 6 7 8 9 a b c d e f (little endian) 2146 // src: 0 1 2 3 4 5 6 7 8 9 a b c d e f (little endian)
2078 CHECK_EQ_32X4(vrev64_32, 0x07060504u, 0x03020100u, 0x0f0e0d0cu, 2147 CHECK_EQ_32X4(vrev64_32, 0x07060504u, 0x03020100u, 0x0f0e0d0cu,
2079 0x0b0a0908u); 2148 0x0b0a0908u);
2080 CHECK_EQ_32X4(vrev64_16, 0x05040706u, 0x01000302u, 0x0d0c0f0eu, 2149 CHECK_EQ_32X4(vrev64_16, 0x05040706u, 0x01000302u, 0x0d0c0f0eu,
2081 0x09080b0au); 2150 0x09080b0au);
2082 CHECK_EQ_32X4(vrev64_8, 0x04050607u, 0x00010203u, 0x0c0d0e0fu, 0x08090a0bu); 2151 CHECK_EQ_32X4(vrev64_8, 0x04050607u, 0x00010203u, 0x0c0d0e0fu, 0x08090a0bu);
2083 CHECK_EQ_32X4(vrev32_16, 0x01000302u, 0x05040706u, 0x09080b0au, 2152 CHECK_EQ_32X4(vrev32_16, 0x01000302u, 0x05040706u, 0x09080b0au,
2084 0x0d0c0f0eu); 2153 0x0d0c0f0eu);
2085 CHECK_EQ_32X4(vrev32_8, 0x00010203u, 0x04050607u, 0x08090a0bu, 0x0c0d0e0fu); 2154 CHECK_EQ_32X4(vrev32_8, 0x00010203u, 0x04050607u, 0x08090a0bu, 0x0c0d0e0fu);
2086 CHECK_EQ_32X4(vrev16_8, 0x02030001u, 0x06070405u, 0x0a0b0809u, 0x0e0f0c0du); 2155 CHECK_EQ_32X4(vrev16_8, 0x02030001u, 0x06070405u, 0x0a0b0809u, 0x0e0f0c0du);
(...skipping 1677 matching lines...) Expand 10 before | Expand all | Expand 10 after
3764 HandleScope scope(isolate); 3833 HandleScope scope(isolate);
3765 3834
3766 Assembler assm(isolate, NULL, 0); 3835 Assembler assm(isolate, NULL, 0);
3767 __ mov(r0, Operand(isolate->factory()->infinity_value())); 3836 __ mov(r0, Operand(isolate->factory()->infinity_value()));
3768 __ BlockConstPoolFor(1019); 3837 __ BlockConstPoolFor(1019);
3769 for (int i = 0; i < 1019; ++i) __ nop(); 3838 for (int i = 0; i < 1019; ++i) __ nop();
3770 __ vldr(d0, MemOperand(r0, 0)); 3839 __ vldr(d0, MemOperand(r0, 0));
3771 } 3840 }
3772 3841
3773 #undef __ 3842 #undef __
OLDNEW
« no previous file with comments | « src/arm/simulator-arm.cc ('k') | test/cctest/test-disasm-arm.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698