OLD | NEW |
---|---|
1 // Copyright 2014 the V8 project authors. All rights reserved. | 1 // Copyright 2014 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "src/compiler/code-generator.h" | 5 #include "src/compiler/code-generator.h" |
6 | 6 |
7 #include "src/arm/macro-assembler-arm.h" | 7 #include "src/arm/macro-assembler-arm.h" |
8 #include "src/assembler-inl.h" | 8 #include "src/assembler-inl.h" |
9 #include "src/compilation-info.h" | 9 #include "src/compilation-info.h" |
10 #include "src/compiler/code-generator-impl.h" | 10 #include "src/compiler/code-generator-impl.h" |
(...skipping 2010 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2021 __ vmvn(i.OutputSimd128Register(), i.InputSimd128Register(0)); | 2021 __ vmvn(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
2022 break; | 2022 break; |
2023 } | 2023 } |
2024 case kArmS128Select: { | 2024 case kArmS128Select: { |
2025 // vbsl clobbers the mask input so make sure it was DefineSameAsFirst. | 2025 // vbsl clobbers the mask input so make sure it was DefineSameAsFirst. |
2026 DCHECK(i.OutputSimd128Register().is(i.InputSimd128Register(0))); | 2026 DCHECK(i.OutputSimd128Register().is(i.InputSimd128Register(0))); |
2027 __ vbsl(i.OutputSimd128Register(), i.InputSimd128Register(1), | 2027 __ vbsl(i.OutputSimd128Register(), i.InputSimd128Register(1), |
2028 i.InputSimd128Register(2)); | 2028 i.InputSimd128Register(2)); |
2029 break; | 2029 break; |
2030 } | 2030 } |
2031 case kArmS32x4ZipLeft: { | |
martyn.capewell
2017/04/10 16:46:55
It might be helpful to add a comment to each of th
bbudge
2017/04/10 23:26:47
Here's what this is supposed to do:
src0 = 0,1,2,3
| |
2032 Simd128Register dst = i.OutputSimd128Register(), | |
2033 src1 = i.InputSimd128Register(1); | |
2034 DCHECK(dst.is(i.InputSimd128Register(0))); | |
2035 __ vmov(dst.high(), src1.low()); | |
martyn.capewell
2017/04/10 16:46:55
If the assembler supports it, you could use:
vdu
| |
2036 __ vzip(Neon32, dst.low(), dst.high()); | |
2037 break; | |
2038 } | |
2039 case kArmS32x4ZipRight: { | |
martyn.capewell
2017/04/10 16:46:55
vdup.32 dst.low, src.high[0]
vdup.32 dst.high, s
| |
2040 Simd128Register dst = i.OutputSimd128Register(), | |
2041 src1 = i.InputSimd128Register(1); | |
2042 DCHECK(dst.is(i.InputSimd128Register(0))); | |
2043 __ vmov(dst.low(), src1.high()); | |
2044 __ vzip(Neon32, dst.low(), dst.high()); | |
2045 break; | |
2046 } | |
2047 case kArmS32x4UnzipLeft: { | |
martyn.capewell
2017/04/10 16:46:55
vmovn.i64 dst.low, src
vmov dst.high, dst.low
| |
2048 Simd128Register dst = i.OutputSimd128Register(), | |
2049 src1 = i.InputSimd128Register(1); | |
2050 DCHECK(dst.is(i.InputSimd128Register(0))); | |
2051 __ vmov(kScratchQuadReg, src1); | |
2052 __ vuzp(Neon32, dst, kScratchQuadReg); | |
2053 __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero); | |
2054 break; | |
2055 } | |
2056 case kArmS32x4UnzipRight: { | |
martyn.capewell
2017/04/10 16:46:55
vshrn.i64 dst.low, src, #32
vmov dst.high, dst.l
| |
2057 Simd128Register dst = i.OutputSimd128Register(), | |
2058 src1 = i.InputSimd128Register(1); | |
2059 DCHECK(dst.is(i.InputSimd128Register(0))); | |
2060 __ vmov(kScratchQuadReg, src1); | |
2061 __ vuzp(Neon32, kScratchQuadReg, dst); | |
2062 __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero); | |
2063 break; | |
2064 } | |
2065 case kArmS32x4TransposeLeft: { | |
martyn.capewell
2017/04/10 16:46:55
vdup.32 dst.low, src.low[0]
vdup.32 dst.high, sr
| |
2066 Simd128Register dst = i.OutputSimd128Register(), | |
2067 src1 = i.InputSimd128Register(1); | |
2068 DCHECK(dst.is(i.InputSimd128Register(0))); | |
2069 __ vmov(kScratchQuadReg, src1); | |
2070 __ vtrn(Neon32, dst, kScratchQuadReg); | |
2071 __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero); | |
2072 break; | |
2073 } | |
2074 case kArmS32x4TransposeRight: { | |
martyn.capewell
2017/04/10 16:46:56
vdup dst.low, src.low[1]
vdup dst.high, src.high
| |
2075 Simd128Register dst = i.OutputSimd128Register(), | |
2076 src1 = i.InputSimd128Register(1); | |
2077 DCHECK(dst.is(i.InputSimd128Register(0))); | |
2078 __ vmov(kScratchQuadReg, src1); | |
2079 __ vtrn(Neon32, kScratchQuadReg, dst); | |
2080 __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero); | |
2081 break; | |
2082 } | |
2083 case kArmS16x8ZipLeft: { | |
2084 Simd128Register dst = i.OutputSimd128Register(), | |
2085 src1 = i.InputSimd128Register(1); | |
2086 DCHECK(dst.is(i.InputSimd128Register(0))); | |
2087 __ vmov(dst.high(), src1.low()); | |
2088 __ vzip(Neon16, dst.low(), dst.high()); | |
2089 break; | |
2090 } | |
2091 case kArmS16x8ZipRight: { | |
2092 Simd128Register dst = i.OutputSimd128Register(), | |
2093 src1 = i.InputSimd128Register(1); | |
2094 DCHECK(dst.is(i.InputSimd128Register(0))); | |
2095 __ vmov(dst.low(), src1.high()); | |
2096 __ vzip(Neon16, dst.low(), dst.high()); | |
2097 break; | |
2098 } | |
2099 case kArmS16x8UnzipLeft: { | |
martyn.capewell
2017/04/10 16:46:55
vmovn.i32 dst.low, src
vmov dst.high, dst.low
| |
2100 Simd128Register dst = i.OutputSimd128Register(), | |
2101 src1 = i.InputSimd128Register(1); | |
2102 DCHECK(dst.is(i.InputSimd128Register(0))); | |
2103 __ vmov(kScratchQuadReg, src1); | |
2104 __ vuzp(Neon16, dst, kScratchQuadReg); | |
2105 __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero); | |
2106 break; | |
2107 } | |
2108 case kArmS16x8UnzipRight: { | |
martyn.capewell
2017/04/10 16:46:55
vshrn.i32 dst.low, src, #16
vmov dst.high, dst.l
| |
2109 Simd128Register dst = i.OutputSimd128Register(), | |
2110 src1 = i.InputSimd128Register(1); | |
2111 DCHECK(dst.is(i.InputSimd128Register(0))); | |
2112 __ vmov(kScratchQuadReg, src1); | |
2113 __ vuzp(Neon16, kScratchQuadReg, dst); | |
2114 __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero); | |
2115 break; | |
2116 } | |
2117 case kArmS16x8TransposeLeft: { | |
2118 Simd128Register dst = i.OutputSimd128Register(), | |
2119 src1 = i.InputSimd128Register(1); | |
2120 DCHECK(dst.is(i.InputSimd128Register(0))); | |
2121 __ vmov(kScratchQuadReg, src1); | |
2122 __ vtrn(Neon16, dst, kScratchQuadReg); | |
2123 __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero); | |
2124 break; | |
2125 } | |
2126 case kArmS16x8TransposeRight: { | |
2127 Simd128Register dst = i.OutputSimd128Register(), | |
2128 src1 = i.InputSimd128Register(1); | |
2129 DCHECK(dst.is(i.InputSimd128Register(0))); | |
2130 __ vmov(kScratchQuadReg, src1); | |
2131 __ vtrn(Neon16, kScratchQuadReg, dst); | |
2132 __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero); | |
2133 break; | |
2134 } | |
2135 case kArmS8x16ZipLeft: { | |
2136 Simd128Register dst = i.OutputSimd128Register(), | |
2137 src1 = i.InputSimd128Register(1); | |
2138 DCHECK(dst.is(i.InputSimd128Register(0))); | |
2139 __ vmov(dst.high(), src1.low()); | |
2140 __ vzip(Neon8, dst.low(), dst.high()); | |
2141 break; | |
2142 } | |
2143 case kArmS8x16ZipRight: { | |
2144 Simd128Register dst = i.OutputSimd128Register(), | |
2145 src1 = i.InputSimd128Register(1); | |
2146 DCHECK(dst.is(i.InputSimd128Register(0))); | |
2147 __ vmov(dst.low(), src1.high()); | |
2148 __ vzip(Neon8, dst.low(), dst.high()); | |
2149 break; | |
2150 } | |
2151 case kArmS8x16UnzipLeft: { | |
martyn.capewell
2017/04/10 16:46:55
vmovn.i16 dst.low, src
vmov dst.high, dst.low
| |
2152 Simd128Register dst = i.OutputSimd128Register(), | |
2153 src1 = i.InputSimd128Register(1); | |
2154 DCHECK(dst.is(i.InputSimd128Register(0))); | |
2155 __ vmov(kScratchQuadReg, src1); | |
2156 __ vuzp(Neon8, dst, kScratchQuadReg); | |
2157 __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero); | |
2158 break; | |
2159 } | |
2160 case kArmS8x16UnzipRight: { | |
martyn.capewell
2017/04/10 16:46:55
vshrn.i16, dst.low, src, #8
vmov dst.high, dst.l
| |
2161 Simd128Register dst = i.OutputSimd128Register(), | |
2162 src1 = i.InputSimd128Register(1); | |
2163 DCHECK(dst.is(i.InputSimd128Register(0))); | |
2164 __ vmov(kScratchQuadReg, src1); | |
2165 __ vuzp(Neon8, kScratchQuadReg, dst); | |
2166 __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero); | |
2167 break; | |
2168 } | |
2169 case kArmS8x16TransposeLeft: { | |
2170 Simd128Register dst = i.OutputSimd128Register(), | |
2171 src1 = i.InputSimd128Register(1); | |
2172 DCHECK(dst.is(i.InputSimd128Register(0))); | |
2173 __ vmov(kScratchQuadReg, src1); | |
2174 __ vtrn(Neon8, dst, kScratchQuadReg); | |
2175 __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero); | |
2176 break; | |
2177 } | |
2178 case kArmS8x16TransposeRight: { | |
2179 Simd128Register dst = i.OutputSimd128Register(), | |
2180 src1 = i.InputSimd128Register(1); | |
2181 DCHECK(dst.is(i.InputSimd128Register(0))); | |
2182 __ vmov(kScratchQuadReg, src1); | |
2183 __ vtrn(Neon8, kScratchQuadReg, dst); | |
2184 __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero); | |
2185 break; | |
2186 } | |
2187 case kArmS8x16Concat: { | |
2188 __ vext(i.OutputSimd128Register(), i.InputSimd128Register(0), | |
2189 i.InputSimd128Register(1), i.InputInt4(2)); | |
2190 break; | |
2191 } | |
2192 case kArmS64x2Reverse: { | |
martyn.capewell
2017/04/10 16:46:55
vext(dst, src, src, 8);
bbudge
2017/04/10 23:26:47
Nice. However, since we expose VEXT (as S8x16Conca
| |
2193 Simd128Register dst = i.OutputSimd128Register(), | |
2194 src = i.InputSimd128Register(0); | |
2195 if (!dst.is(src)) __ vmov(dst, src); | |
2196 __ vswp(dst.low(), dst.high()); | |
2197 break; | |
2198 } | |
2199 case kArmS32x2Reverse: { | |
2200 __ vrev64(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0)); | |
2201 break; | |
2202 } | |
2203 case kArmS16x4Reverse: { | |
2204 __ vrev64(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0)); | |
2205 break; | |
2206 } | |
2207 case kArmS16x2Reverse: { | |
2208 __ vrev32(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0)); | |
2209 break; | |
2210 } | |
2211 case kArmS8x8Reverse: { | |
2212 __ vrev64(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0)); | |
2213 break; | |
2214 } | |
2215 case kArmS8x4Reverse: { | |
2216 __ vrev32(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0)); | |
2217 break; | |
2218 } | |
2219 case kArmS8x2Reverse: { | |
2220 __ vrev16(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0)); | |
2221 break; | |
2222 } | |
2031 case kArmS1x4AnyTrue: { | 2223 case kArmS1x4AnyTrue: { |
2032 const QwNeonRegister& src = i.InputSimd128Register(0); | 2224 const QwNeonRegister& src = i.InputSimd128Register(0); |
2033 __ vpmax(NeonU32, kScratchDoubleReg, src.low(), src.high()); | 2225 __ vpmax(NeonU32, kScratchDoubleReg, src.low(), src.high()); |
2034 __ vpmax(NeonU32, kScratchDoubleReg, kScratchDoubleReg, | 2226 __ vpmax(NeonU32, kScratchDoubleReg, kScratchDoubleReg, |
2035 kScratchDoubleReg); | 2227 kScratchDoubleReg); |
2036 __ ExtractLane(i.OutputRegister(), kScratchDoubleReg, NeonS32, 0); | 2228 __ ExtractLane(i.OutputRegister(), kScratchDoubleReg, NeonS32, 0); |
2037 break; | 2229 break; |
2038 } | 2230 } |
2039 case kArmS1x4AllTrue: { | 2231 case kArmS1x4AllTrue: { |
2040 const QwNeonRegister& src = i.InputSimd128Register(0); | 2232 const QwNeonRegister& src = i.InputSimd128Register(0); |
(...skipping 790 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2831 padding_size -= v8::internal::Assembler::kInstrSize; | 3023 padding_size -= v8::internal::Assembler::kInstrSize; |
2832 } | 3024 } |
2833 } | 3025 } |
2834 } | 3026 } |
2835 | 3027 |
2836 #undef __ | 3028 #undef __ |
2837 | 3029 |
2838 } // namespace compiler | 3030 } // namespace compiler |
2839 } // namespace internal | 3031 } // namespace internal |
2840 } // namespace v8 | 3032 } // namespace v8 |
OLD | NEW |