Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(4)

Side by Side Diff: src/compiler/arm/code-generator-arm.cc

Issue 2801183002: [WASM SIMD] Implement primitive shuffles. (Closed)
Patch Set: Fix ARM release build. Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/arm/simulator-arm.cc ('k') | src/compiler/arm/instruction-codes-arm.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2014 the V8 project authors. All rights reserved. 1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/compiler/code-generator.h" 5 #include "src/compiler/code-generator.h"
6 6
7 #include "src/arm/macro-assembler-arm.h" 7 #include "src/arm/macro-assembler-arm.h"
8 #include "src/assembler-inl.h" 8 #include "src/assembler-inl.h"
9 #include "src/compilation-info.h" 9 #include "src/compilation-info.h"
10 #include "src/compiler/code-generator-impl.h" 10 #include "src/compiler/code-generator-impl.h"
(...skipping 2010 matching lines...) Expand 10 before | Expand all | Expand 10 after
2021 __ vmvn(i.OutputSimd128Register(), i.InputSimd128Register(0)); 2021 __ vmvn(i.OutputSimd128Register(), i.InputSimd128Register(0));
2022 break; 2022 break;
2023 } 2023 }
2024 case kArmS128Select: { 2024 case kArmS128Select: {
2025 // vbsl clobbers the mask input so make sure it was DefineSameAsFirst. 2025 // vbsl clobbers the mask input so make sure it was DefineSameAsFirst.
2026 DCHECK(i.OutputSimd128Register().is(i.InputSimd128Register(0))); 2026 DCHECK(i.OutputSimd128Register().is(i.InputSimd128Register(0)));
2027 __ vbsl(i.OutputSimd128Register(), i.InputSimd128Register(1), 2027 __ vbsl(i.OutputSimd128Register(), i.InputSimd128Register(1),
2028 i.InputSimd128Register(2)); 2028 i.InputSimd128Register(2));
2029 break; 2029 break;
2030 } 2030 }
2031 case kArmS32x4ZipLeft: {
martyn.capewell 2017/04/10 16:46:55 It might be helpful to add a comment to each of th
bbudge 2017/04/10 23:26:47 Here's what this is supposed to do: src0 = 0,1,2,3
2032 Simd128Register dst = i.OutputSimd128Register(),
2033 src1 = i.InputSimd128Register(1);
2034 DCHECK(dst.is(i.InputSimd128Register(0)));
2035 __ vmov(dst.high(), src1.low());
martyn.capewell 2017/04/10 16:46:55 If the assembler supports it, you could use: vdu
2036 __ vzip(Neon32, dst.low(), dst.high());
2037 break;
2038 }
2039 case kArmS32x4ZipRight: {
martyn.capewell 2017/04/10 16:46:55 vdup.32 dst.low, src.high[0] vdup.32 dst.high, s
2040 Simd128Register dst = i.OutputSimd128Register(),
2041 src1 = i.InputSimd128Register(1);
2042 DCHECK(dst.is(i.InputSimd128Register(0)));
2043 __ vmov(dst.low(), src1.high());
2044 __ vzip(Neon32, dst.low(), dst.high());
2045 break;
2046 }
2047 case kArmS32x4UnzipLeft: {
martyn.capewell 2017/04/10 16:46:55 vmovn.i64 dst.low, src vmov dst.high, dst.low
2048 Simd128Register dst = i.OutputSimd128Register(),
2049 src1 = i.InputSimd128Register(1);
2050 DCHECK(dst.is(i.InputSimd128Register(0)));
2051 __ vmov(kScratchQuadReg, src1);
2052 __ vuzp(Neon32, dst, kScratchQuadReg);
2053 __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero);
2054 break;
2055 }
2056 case kArmS32x4UnzipRight: {
martyn.capewell 2017/04/10 16:46:55 vshrn.i64 dst.low, src, #32 vmov dst.high, dst.l
2057 Simd128Register dst = i.OutputSimd128Register(),
2058 src1 = i.InputSimd128Register(1);
2059 DCHECK(dst.is(i.InputSimd128Register(0)));
2060 __ vmov(kScratchQuadReg, src1);
2061 __ vuzp(Neon32, kScratchQuadReg, dst);
2062 __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero);
2063 break;
2064 }
2065 case kArmS32x4TransposeLeft: {
martyn.capewell 2017/04/10 16:46:55 vdup.32 dst.low, src.low[0] vdup.32 dst.high, sr
2066 Simd128Register dst = i.OutputSimd128Register(),
2067 src1 = i.InputSimd128Register(1);
2068 DCHECK(dst.is(i.InputSimd128Register(0)));
2069 __ vmov(kScratchQuadReg, src1);
2070 __ vtrn(Neon32, dst, kScratchQuadReg);
2071 __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero);
2072 break;
2073 }
2074 case kArmS32x4TransposeRight: {
martyn.capewell 2017/04/10 16:46:56 vdup dst.low, src.low[1] vdup dst.high, src.high
2075 Simd128Register dst = i.OutputSimd128Register(),
2076 src1 = i.InputSimd128Register(1);
2077 DCHECK(dst.is(i.InputSimd128Register(0)));
2078 __ vmov(kScratchQuadReg, src1);
2079 __ vtrn(Neon32, kScratchQuadReg, dst);
2080 __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero);
2081 break;
2082 }
2083 case kArmS16x8ZipLeft: {
2084 Simd128Register dst = i.OutputSimd128Register(),
2085 src1 = i.InputSimd128Register(1);
2086 DCHECK(dst.is(i.InputSimd128Register(0)));
2087 __ vmov(dst.high(), src1.low());
2088 __ vzip(Neon16, dst.low(), dst.high());
2089 break;
2090 }
2091 case kArmS16x8ZipRight: {
2092 Simd128Register dst = i.OutputSimd128Register(),
2093 src1 = i.InputSimd128Register(1);
2094 DCHECK(dst.is(i.InputSimd128Register(0)));
2095 __ vmov(dst.low(), src1.high());
2096 __ vzip(Neon16, dst.low(), dst.high());
2097 break;
2098 }
2099 case kArmS16x8UnzipLeft: {
martyn.capewell 2017/04/10 16:46:55 vmovn.i32 dst.low, src vmov dst.high, dst.low
2100 Simd128Register dst = i.OutputSimd128Register(),
2101 src1 = i.InputSimd128Register(1);
2102 DCHECK(dst.is(i.InputSimd128Register(0)));
2103 __ vmov(kScratchQuadReg, src1);
2104 __ vuzp(Neon16, dst, kScratchQuadReg);
2105 __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero);
2106 break;
2107 }
2108 case kArmS16x8UnzipRight: {
martyn.capewell 2017/04/10 16:46:55 vshrn.i32 dst.low, src, #16 vmov dst.high, dst.l
2109 Simd128Register dst = i.OutputSimd128Register(),
2110 src1 = i.InputSimd128Register(1);
2111 DCHECK(dst.is(i.InputSimd128Register(0)));
2112 __ vmov(kScratchQuadReg, src1);
2113 __ vuzp(Neon16, kScratchQuadReg, dst);
2114 __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero);
2115 break;
2116 }
2117 case kArmS16x8TransposeLeft: {
2118 Simd128Register dst = i.OutputSimd128Register(),
2119 src1 = i.InputSimd128Register(1);
2120 DCHECK(dst.is(i.InputSimd128Register(0)));
2121 __ vmov(kScratchQuadReg, src1);
2122 __ vtrn(Neon16, dst, kScratchQuadReg);
2123 __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero);
2124 break;
2125 }
2126 case kArmS16x8TransposeRight: {
2127 Simd128Register dst = i.OutputSimd128Register(),
2128 src1 = i.InputSimd128Register(1);
2129 DCHECK(dst.is(i.InputSimd128Register(0)));
2130 __ vmov(kScratchQuadReg, src1);
2131 __ vtrn(Neon16, kScratchQuadReg, dst);
2132 __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero);
2133 break;
2134 }
2135 case kArmS8x16ZipLeft: {
2136 Simd128Register dst = i.OutputSimd128Register(),
2137 src1 = i.InputSimd128Register(1);
2138 DCHECK(dst.is(i.InputSimd128Register(0)));
2139 __ vmov(dst.high(), src1.low());
2140 __ vzip(Neon8, dst.low(), dst.high());
2141 break;
2142 }
2143 case kArmS8x16ZipRight: {
2144 Simd128Register dst = i.OutputSimd128Register(),
2145 src1 = i.InputSimd128Register(1);
2146 DCHECK(dst.is(i.InputSimd128Register(0)));
2147 __ vmov(dst.low(), src1.high());
2148 __ vzip(Neon8, dst.low(), dst.high());
2149 break;
2150 }
2151 case kArmS8x16UnzipLeft: {
martyn.capewell 2017/04/10 16:46:55 vmovn.i16 dst.low, src vmov dst.high, dst.low
2152 Simd128Register dst = i.OutputSimd128Register(),
2153 src1 = i.InputSimd128Register(1);
2154 DCHECK(dst.is(i.InputSimd128Register(0)));
2155 __ vmov(kScratchQuadReg, src1);
2156 __ vuzp(Neon8, dst, kScratchQuadReg);
2157 __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero);
2158 break;
2159 }
2160 case kArmS8x16UnzipRight: {
martyn.capewell 2017/04/10 16:46:55 vshrn.i16, dst.low, src, #8 vmov dst.high, dst.l
2161 Simd128Register dst = i.OutputSimd128Register(),
2162 src1 = i.InputSimd128Register(1);
2163 DCHECK(dst.is(i.InputSimd128Register(0)));
2164 __ vmov(kScratchQuadReg, src1);
2165 __ vuzp(Neon8, kScratchQuadReg, dst);
2166 __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero);
2167 break;
2168 }
2169 case kArmS8x16TransposeLeft: {
2170 Simd128Register dst = i.OutputSimd128Register(),
2171 src1 = i.InputSimd128Register(1);
2172 DCHECK(dst.is(i.InputSimd128Register(0)));
2173 __ vmov(kScratchQuadReg, src1);
2174 __ vtrn(Neon8, dst, kScratchQuadReg);
2175 __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero);
2176 break;
2177 }
2178 case kArmS8x16TransposeRight: {
2179 Simd128Register dst = i.OutputSimd128Register(),
2180 src1 = i.InputSimd128Register(1);
2181 DCHECK(dst.is(i.InputSimd128Register(0)));
2182 __ vmov(kScratchQuadReg, src1);
2183 __ vtrn(Neon8, kScratchQuadReg, dst);
2184 __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero);
2185 break;
2186 }
2187 case kArmS8x16Concat: {
2188 __ vext(i.OutputSimd128Register(), i.InputSimd128Register(0),
2189 i.InputSimd128Register(1), i.InputInt4(2));
2190 break;
2191 }
2192 case kArmS64x2Reverse: {
martyn.capewell 2017/04/10 16:46:55 vext(dst, src, src, 8);
bbudge 2017/04/10 23:26:47 Nice. However, since we expose VEXT (as S8x16Conca
2193 Simd128Register dst = i.OutputSimd128Register(),
2194 src = i.InputSimd128Register(0);
2195 if (!dst.is(src)) __ vmov(dst, src);
2196 __ vswp(dst.low(), dst.high());
2197 break;
2198 }
2199 case kArmS32x2Reverse: {
2200 __ vrev64(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0));
2201 break;
2202 }
2203 case kArmS16x4Reverse: {
2204 __ vrev64(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0));
2205 break;
2206 }
2207 case kArmS16x2Reverse: {
2208 __ vrev32(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0));
2209 break;
2210 }
2211 case kArmS8x8Reverse: {
2212 __ vrev64(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0));
2213 break;
2214 }
2215 case kArmS8x4Reverse: {
2216 __ vrev32(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0));
2217 break;
2218 }
2219 case kArmS8x2Reverse: {
2220 __ vrev16(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0));
2221 break;
2222 }
2031 case kArmS1x4AnyTrue: { 2223 case kArmS1x4AnyTrue: {
2032 const QwNeonRegister& src = i.InputSimd128Register(0); 2224 const QwNeonRegister& src = i.InputSimd128Register(0);
2033 __ vpmax(NeonU32, kScratchDoubleReg, src.low(), src.high()); 2225 __ vpmax(NeonU32, kScratchDoubleReg, src.low(), src.high());
2034 __ vpmax(NeonU32, kScratchDoubleReg, kScratchDoubleReg, 2226 __ vpmax(NeonU32, kScratchDoubleReg, kScratchDoubleReg,
2035 kScratchDoubleReg); 2227 kScratchDoubleReg);
2036 __ ExtractLane(i.OutputRegister(), kScratchDoubleReg, NeonS32, 0); 2228 __ ExtractLane(i.OutputRegister(), kScratchDoubleReg, NeonS32, 0);
2037 break; 2229 break;
2038 } 2230 }
2039 case kArmS1x4AllTrue: { 2231 case kArmS1x4AllTrue: {
2040 const QwNeonRegister& src = i.InputSimd128Register(0); 2232 const QwNeonRegister& src = i.InputSimd128Register(0);
(...skipping 790 matching lines...) Expand 10 before | Expand all | Expand 10 after
2831 padding_size -= v8::internal::Assembler::kInstrSize; 3023 padding_size -= v8::internal::Assembler::kInstrSize;
2832 } 3024 }
2833 } 3025 }
2834 } 3026 }
2835 3027
2836 #undef __ 3028 #undef __
2837 3029
2838 } // namespace compiler 3030 } // namespace compiler
2839 } // namespace internal 3031 } // namespace internal
2840 } // namespace v8 3032 } // namespace v8
OLDNEW
« no previous file with comments | « src/arm/simulator-arm.cc ('k') | src/compiler/arm/instruction-codes-arm.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698