 Chromium Code Reviews
 Chromium Code Reviews Issue 2951793003:
  [wasm] Implement remaining SIMD x64 compare ops, unops.  (Closed)
    
  
    Issue 2951793003:
  [wasm] Implement remaining SIMD x64 compare ops, unops.  (Closed) 
  | OLD | NEW | 
|---|---|
| 1 // Copyright 2013 the V8 project authors. All rights reserved. | 1 // Copyright 2013 the V8 project authors. All rights reserved. | 
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be | 
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. | 
| 4 | 4 | 
| 5 #include "src/compiler/code-generator.h" | 5 #include "src/compiler/code-generator.h" | 
| 6 | 6 | 
| 7 #include <limits> | 7 #include <limits> | 
| 8 | 8 | 
| 9 #include "src/compilation-info.h" | 9 #include "src/compilation-info.h" | 
| 10 #include "src/compiler/code-generator-impl.h" | 10 #include "src/compiler/code-generator-impl.h" | 
| (...skipping 2168 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2179 case kX64I32x4ReplaceLane: { | 2179 case kX64I32x4ReplaceLane: { | 
| 2180 CpuFeatureScope sse_scope(masm(), SSE4_1); | 2180 CpuFeatureScope sse_scope(masm(), SSE4_1); | 
| 2181 if (instr->InputAt(2)->IsRegister()) { | 2181 if (instr->InputAt(2)->IsRegister()) { | 
| 2182 __ Pinsrd(i.OutputSimd128Register(), i.InputRegister(2), | 2182 __ Pinsrd(i.OutputSimd128Register(), i.InputRegister(2), | 
| 2183 i.InputInt8(1)); | 2183 i.InputInt8(1)); | 
| 2184 } else { | 2184 } else { | 
| 2185 __ Pinsrd(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1)); | 2185 __ Pinsrd(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1)); | 
| 2186 } | 2186 } | 
| 2187 break; | 2187 break; | 
| 2188 } | 2188 } | 
| 2189 case kX64I32x4Neg: { | |
| 2190 XMMRegister dst = i.OutputSimd128Register(); | |
| 2191 XMMRegister src = i.InputSimd128Register(0); | |
| 2192 if (dst.is(src)) { | |
| 2193 __ movaps(kScratchDoubleReg, dst); | |
| 2194 __ pxor(dst, dst); | |
| 2195 __ psubd(dst, kScratchDoubleReg); | |
| 2196 } else { | |
| 2197 __ pxor(dst, dst); | |
| 2198 __ psubd(dst, src); | |
| 2199 } | |
| 2200 break; | |
| 2201 } | |
| 2189 case kX64I32x4Shl: { | 2202 case kX64I32x4Shl: { | 
| 2190 __ pslld(i.OutputSimd128Register(), i.InputInt8(1)); | 2203 __ pslld(i.OutputSimd128Register(), i.InputInt8(1)); | 
| 2191 break; | 2204 break; | 
| 2192 } | 2205 } | 
| 2193 case kX64I32x4ShrS: { | 2206 case kX64I32x4ShrS: { | 
| 2194 __ psrad(i.OutputSimd128Register(), i.InputInt8(1)); | 2207 __ psrad(i.OutputSimd128Register(), i.InputInt8(1)); | 
| 2195 break; | 2208 break; | 
| 2196 } | 2209 } | 
| 2197 case kX64I32x4Add: { | 2210 case kX64I32x4Add: { | 
| 2198 __ paddd(i.OutputSimd128Register(), i.InputSimd128Register(1)); | 2211 __ paddd(i.OutputSimd128Register(), i.InputSimd128Register(1)); | 
| (...skipping 26 matching lines...) Expand all Loading... | |
| 2225 case kX64I32x4Eq: { | 2238 case kX64I32x4Eq: { | 
| 2226 __ pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1)); | 2239 __ pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1)); | 
| 2227 break; | 2240 break; | 
| 2228 } | 2241 } | 
| 2229 case kX64I32x4Ne: { | 2242 case kX64I32x4Ne: { | 
| 2230 __ pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1)); | 2243 __ pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1)); | 
| 2231 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); | 2244 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); | 
| 2232 __ pxor(i.OutputSimd128Register(), kScratchDoubleReg); | 2245 __ pxor(i.OutputSimd128Register(), kScratchDoubleReg); | 
| 2233 break; | 2246 break; | 
| 2234 } | 2247 } | 
| 2248 case kX64I32x4GtS: { | |
| 2249 __ pcmpgtd(i.OutputSimd128Register(), i.InputSimd128Register(1)); | |
| 2250 break; | |
| 2251 } | |
| 2252 case kX64I32x4GeS: { | |
| 2253 XMMRegister dst = i.OutputSimd128Register(); | |
| 2254 __ movaps(kScratchDoubleReg, i.InputSimd128Register(1)); | |
| 
bbudge
2017/06/22 17:30:10
Can you use pminsd here to save 1 instruction?
 
gdeepti
2017/06/27 20:47:42
Done.
 | |
| 2255 __ pcmpgtd(kScratchDoubleReg, dst); | |
| 2256 __ pcmpeqd(dst, dst); | |
| 2257 __ pxor(dst, kScratchDoubleReg); | |
| 2258 break; | |
| 2259 } | |
| 2235 case kX64I32x4ShrU: { | 2260 case kX64I32x4ShrU: { | 
| 2236 __ psrld(i.OutputSimd128Register(), i.InputInt8(1)); | 2261 __ psrld(i.OutputSimd128Register(), i.InputInt8(1)); | 
| 2237 break; | 2262 break; | 
| 2238 } | 2263 } | 
| 2239 case kX64I32x4MinU: { | 2264 case kX64I32x4MinU: { | 
| 2240 CpuFeatureScope sse_scope(masm(), SSE4_1); | 2265 CpuFeatureScope sse_scope(masm(), SSE4_1); | 
| 2241 __ pminud(i.OutputSimd128Register(), i.InputSimd128Register(1)); | 2266 __ pminud(i.OutputSimd128Register(), i.InputSimd128Register(1)); | 
| 2242 break; | 2267 break; | 
| 2243 } | 2268 } | 
| 2244 case kX64I32x4MaxU: { | 2269 case kX64I32x4MaxU: { | 
| 2245 CpuFeatureScope sse_scope(masm(), SSE4_1); | 2270 CpuFeatureScope sse_scope(masm(), SSE4_1); | 
| 2246 __ pmaxud(i.OutputSimd128Register(), i.InputSimd128Register(1)); | 2271 __ pmaxud(i.OutputSimd128Register(), i.InputSimd128Register(1)); | 
| 2247 break; | 2272 break; | 
| 2248 } | 2273 } | 
| 2274 case kX64I32x4GtU: { | |
| 2275 XMMRegister dst = i.OutputSimd128Register(); | |
| 2276 XMMRegister src = i.InputSimd128Register(1); | |
| 2277 __ Set(kScratchRegister, 0x80000000); | |
| 2278 __ movd(kScratchDoubleReg, kScratchRegister); | |
| 2279 __ pshufd(kScratchDoubleReg, kScratchDoubleReg, 0x0); | |
| 2280 __ pxor(src, kScratchDoubleReg); | |
| 2281 __ pxor(dst, kScratchDoubleReg); | |
| 2282 __ pcmpgtd(dst, src); | |
| 2283 // Reset input to not clobber. | |
| 2284 __ pxor(src, kScratchDoubleReg); | |
| 
gdeepti
2017/06/21 03:44:01
This is similar to the code Clang generates for un
 
bbudge
2017/06/22 16:29:49
I think we'll have to support S128 constants at so
 
gdeepti
2017/06/22 17:26:48
I like the simplicity of the pminud/pmaxud approac
 
zvi
2017/06/24 23:04:09
I agree that the pmax/pmin option is preferable fo
 
gdeepti
2017/06/27 20:47:42
Bill: 
This returns true for the == case as well,
 
zvi
2017/06/27 22:30:32
I didn't mean to say you are selecting MMX instruc
 
gdeepti
2017/06/27 23:23:37
Sorry for the confusion, I remembered stale state
 | |
| 2285 break; | |
| 2286 } | |
| 2287 case kX64I32x4GeU: { | |
| 2288 CpuFeatureScope sse_scope(masm(), SSE4_1); | |
| 2289 XMMRegister dst = i.OutputSimd128Register(); | |
| 2290 XMMRegister src = i.InputSimd128Register(1); | |
| 2291 __ pminud(dst, src); | |
| 2292 __ pcmpeqd(dst, src); | |
| 2293 break; | |
| 2294 } | |
| 2249 case kX64S128Zero: { | 2295 case kX64S128Zero: { | 
| 2250 XMMRegister dst = i.OutputSimd128Register(); | 2296 XMMRegister dst = i.OutputSimd128Register(); | 
| 2251 __ xorps(dst, dst); | 2297 __ xorps(dst, dst); | 
| 2252 break; | 2298 break; | 
| 2253 } | 2299 } | 
| 2254 case kX64I16x8Splat: { | 2300 case kX64I16x8Splat: { | 
| 2255 XMMRegister dst = i.OutputSimd128Register(); | 2301 XMMRegister dst = i.OutputSimd128Register(); | 
| 2256 __ movd(dst, i.InputRegister(0)); | 2302 __ movd(dst, i.InputRegister(0)); | 
| 2257 __ pshuflw(dst, dst, 0x0); | 2303 __ pshuflw(dst, dst, 0x0); | 
| 2258 __ pshufhw(dst, dst, 0x0); | 2304 __ pshufhw(dst, dst, 0x0); | 
| (...skipping 10 matching lines...) Expand all Loading... | |
| 2269 case kX64I16x8ReplaceLane: { | 2315 case kX64I16x8ReplaceLane: { | 
| 2270 CpuFeatureScope sse_scope(masm(), SSE4_1); | 2316 CpuFeatureScope sse_scope(masm(), SSE4_1); | 
| 2271 if (instr->InputAt(2)->IsRegister()) { | 2317 if (instr->InputAt(2)->IsRegister()) { | 
| 2272 __ pinsrw(i.OutputSimd128Register(), i.InputRegister(2), | 2318 __ pinsrw(i.OutputSimd128Register(), i.InputRegister(2), | 
| 2273 i.InputInt8(1)); | 2319 i.InputInt8(1)); | 
| 2274 } else { | 2320 } else { | 
| 2275 __ pinsrw(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1)); | 2321 __ pinsrw(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1)); | 
| 2276 } | 2322 } | 
| 2277 break; | 2323 break; | 
| 2278 } | 2324 } | 
| 2325 case kX64I16x8Neg: { | |
| 2326 XMMRegister dst = i.OutputSimd128Register(); | |
| 2327 XMMRegister src = i.InputSimd128Register(0); | |
| 2328 if (dst.is(src)) { | |
| 2329 __ movaps(kScratchDoubleReg, dst); | |
| 2330 __ pxor(dst, dst); | |
| 2331 __ psubw(dst, kScratchDoubleReg); | |
| 2332 } else { | |
| 2333 __ pxor(dst, dst); | |
| 2334 __ psubw(dst, src); | |
| 2335 } | |
| 2336 break; | |
| 2337 } | |
| 2279 case kX64I16x8Shl: { | 2338 case kX64I16x8Shl: { | 
| 2280 __ psllw(i.OutputSimd128Register(), i.InputInt8(1)); | 2339 __ psllw(i.OutputSimd128Register(), i.InputInt8(1)); | 
| 2281 break; | 2340 break; | 
| 2282 } | 2341 } | 
| 2283 case kX64I16x8ShrS: { | 2342 case kX64I16x8ShrS: { | 
| 2284 __ psraw(i.OutputSimd128Register(), i.InputInt8(1)); | 2343 __ psraw(i.OutputSimd128Register(), i.InputInt8(1)); | 
| 2285 break; | 2344 break; | 
| 2286 } | 2345 } | 
| 2287 case kX64I16x8Add: { | 2346 case kX64I16x8Add: { | 
| 2288 __ paddw(i.OutputSimd128Register(), i.InputSimd128Register(1)); | 2347 __ paddw(i.OutputSimd128Register(), i.InputSimd128Register(1)); | 
| (...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2323 case kX64I16x8Eq: { | 2382 case kX64I16x8Eq: { | 
| 2324 __ pcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(1)); | 2383 __ pcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(1)); | 
| 2325 break; | 2384 break; | 
| 2326 } | 2385 } | 
| 2327 case kX64I16x8Ne: { | 2386 case kX64I16x8Ne: { | 
| 2328 __ pcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(1)); | 2387 __ pcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(1)); | 
| 2329 __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg); | 2388 __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg); | 
| 2330 __ pxor(i.OutputSimd128Register(), kScratchDoubleReg); | 2389 __ pxor(i.OutputSimd128Register(), kScratchDoubleReg); | 
| 2331 break; | 2390 break; | 
| 2332 } | 2391 } | 
| 2392 case kX64I16x8GtS: { | |
| 2393 __ pcmpgtw(i.OutputSimd128Register(), i.InputSimd128Register(1)); | |
| 2394 break; | |
| 2395 } | |
| 2396 case kX64I16x8GeS: { | |
| 2397 XMMRegister dst = i.OutputSimd128Register(); | |
| 2398 __ movaps(kScratchDoubleReg, i.InputSimd128Register(1)); | |
| 2399 __ pcmpgtw(kScratchDoubleReg, dst); | |
| 2400 __ pcmpeqd(dst, dst); | |
| 2401 __ pxor(dst, kScratchDoubleReg); | |
| 2402 break; | |
| 2403 } | |
| 2333 case kX64I16x8ShrU: { | 2404 case kX64I16x8ShrU: { | 
| 2334 __ psrlw(i.OutputSimd128Register(), i.InputInt8(1)); | 2405 __ psrlw(i.OutputSimd128Register(), i.InputInt8(1)); | 
| 2335 break; | 2406 break; | 
| 2336 } | 2407 } | 
| 2337 case kX64I16x8AddSaturateU: { | 2408 case kX64I16x8AddSaturateU: { | 
| 2338 __ paddusw(i.OutputSimd128Register(), i.InputSimd128Register(1)); | 2409 __ paddusw(i.OutputSimd128Register(), i.InputSimd128Register(1)); | 
| 2339 break; | 2410 break; | 
| 2340 } | 2411 } | 
| 2341 case kX64I16x8SubSaturateU: { | 2412 case kX64I16x8SubSaturateU: { | 
| 2342 __ psubusw(i.OutputSimd128Register(), i.InputSimd128Register(1)); | 2413 __ psubusw(i.OutputSimd128Register(), i.InputSimd128Register(1)); | 
| 2343 break; | 2414 break; | 
| 2344 } | 2415 } | 
| 2345 case kX64I16x8MinU: { | 2416 case kX64I16x8MinU: { | 
| 2346 CpuFeatureScope sse_scope(masm(), SSE4_1); | 2417 CpuFeatureScope sse_scope(masm(), SSE4_1); | 
| 2347 __ pminuw(i.OutputSimd128Register(), i.InputSimd128Register(1)); | 2418 __ pminuw(i.OutputSimd128Register(), i.InputSimd128Register(1)); | 
| 2348 break; | 2419 break; | 
| 2349 } | 2420 } | 
| 2350 case kX64I16x8MaxU: { | 2421 case kX64I16x8MaxU: { | 
| 2351 CpuFeatureScope sse_scope(masm(), SSE4_1); | 2422 CpuFeatureScope sse_scope(masm(), SSE4_1); | 
| 2352 __ pmaxuw(i.OutputSimd128Register(), i.InputSimd128Register(1)); | 2423 __ pmaxuw(i.OutputSimd128Register(), i.InputSimd128Register(1)); | 
| 2353 break; | 2424 break; | 
| 2354 } | 2425 } | 
| 2426 case kX64I16x8GtU: { | |
| 2427 XMMRegister dst = i.OutputSimd128Register(); | |
| 2428 XMMRegister src = i.InputSimd128Register(1); | |
| 2429 __ Set(kScratchRegister, 0x80008000); | |
| 2430 __ movd(kScratchDoubleReg, kScratchRegister); | |
| 2431 __ pshufd(kScratchDoubleReg, kScratchDoubleReg, 0x0); | |
| 2432 __ pxor(src, kScratchDoubleReg); | |
| 2433 __ pxor(dst, kScratchDoubleReg); | |
| 2434 __ pcmpgtw(dst, src); | |
| 2435 // Reset input to not clobber. | |
| 2436 __ pxor(src, kScratchDoubleReg); | |
| 2437 break; | |
| 2438 } | |
| 2439 case kX64I16x8GeU: { | |
| 2440 CpuFeatureScope sse_scope(masm(), SSE4_1); | |
| 2441 XMMRegister dst = i.OutputSimd128Register(); | |
| 2442 XMMRegister src = i.InputSimd128Register(1); | |
| 2443 __ pminuw(dst, src); | |
| 2444 __ pcmpeqw(dst, src); | |
| 2445 break; | |
| 2446 } | |
| 2355 case kX64I8x16Splat: { | 2447 case kX64I8x16Splat: { | 
| 2356 CpuFeatureScope sse_scope(masm(), SSSE3); | 2448 CpuFeatureScope sse_scope(masm(), SSSE3); | 
| 2357 XMMRegister dst = i.OutputSimd128Register(); | 2449 XMMRegister dst = i.OutputSimd128Register(); | 
| 2358 __ movd(dst, i.InputRegister(0)); | 2450 __ movd(dst, i.InputRegister(0)); | 
| 2359 __ xorps(kScratchDoubleReg, kScratchDoubleReg); | 2451 __ xorps(kScratchDoubleReg, kScratchDoubleReg); | 
| 2360 __ pshufb(dst, kScratchDoubleReg); | 2452 __ pshufb(dst, kScratchDoubleReg); | 
| 2361 break; | 2453 break; | 
| 2362 } | 2454 } | 
| 2363 case kX64I8x16ExtractLane: { | 2455 case kX64I8x16ExtractLane: { | 
| 2364 CpuFeatureScope sse_scope(masm(), SSE4_1); | 2456 CpuFeatureScope sse_scope(masm(), SSE4_1); | 
| 2365 Register dst = i.OutputRegister(); | 2457 Register dst = i.OutputRegister(); | 
| 2366 __ pextrb(dst, i.InputSimd128Register(0), i.InputInt8(1)); | 2458 __ pextrb(dst, i.InputSimd128Register(0), i.InputInt8(1)); | 
| 2367 __ movsxbl(dst, dst); | 2459 __ movsxbl(dst, dst); | 
| 2368 break; | 2460 break; | 
| 2369 } | 2461 } | 
| 2370 case kX64I8x16ReplaceLane: { | 2462 case kX64I8x16ReplaceLane: { | 
| 2371 CpuFeatureScope sse_scope(masm(), SSE4_1); | 2463 CpuFeatureScope sse_scope(masm(), SSE4_1); | 
| 2372 if (instr->InputAt(2)->IsRegister()) { | 2464 if (instr->InputAt(2)->IsRegister()) { | 
| 2373 __ pinsrb(i.OutputSimd128Register(), i.InputRegister(2), | 2465 __ pinsrb(i.OutputSimd128Register(), i.InputRegister(2), | 
| 2374 i.InputInt8(1)); | 2466 i.InputInt8(1)); | 
| 2375 } else { | 2467 } else { | 
| 2376 __ pinsrb(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1)); | 2468 __ pinsrb(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1)); | 
| 2377 } | 2469 } | 
| 2378 break; | 2470 break; | 
| 2379 } | 2471 } | 
| 2472 case kX64I8x16Neg: { | |
| 2473 XMMRegister dst = i.OutputSimd128Register(); | |
| 2474 XMMRegister src = i.InputSimd128Register(0); | |
| 2475 if (dst.is(src)) { | |
| 2476 __ movaps(kScratchDoubleReg, dst); | |
| 2477 __ pxor(dst, dst); | |
| 2478 __ psubb(dst, kScratchDoubleReg); | |
| 2479 } else { | |
| 2480 __ pxor(dst, dst); | |
| 2481 __ psubb(dst, src); | |
| 2482 } | |
| 2483 break; | |
| 2484 } | |
| 2380 case kX64I8x16Add: { | 2485 case kX64I8x16Add: { | 
| 2381 __ paddb(i.OutputSimd128Register(), i.InputSimd128Register(1)); | 2486 __ paddb(i.OutputSimd128Register(), i.InputSimd128Register(1)); | 
| 2382 break; | 2487 break; | 
| 2383 } | 2488 } | 
| 2384 case kX64I8x16AddSaturateS: { | 2489 case kX64I8x16AddSaturateS: { | 
| 2385 __ paddsb(i.OutputSimd128Register(), i.InputSimd128Register(1)); | 2490 __ paddsb(i.OutputSimd128Register(), i.InputSimd128Register(1)); | 
| 2386 break; | 2491 break; | 
| 2387 } | 2492 } | 
| 2388 case kX64I8x16Sub: { | 2493 case kX64I8x16Sub: { | 
| 2389 __ psubb(i.OutputSimd128Register(), i.InputSimd128Register(1)); | 2494 __ psubb(i.OutputSimd128Register(), i.InputSimd128Register(1)); | 
| (...skipping 16 matching lines...) Expand all Loading... | |
| 2406 case kX64I8x16Eq: { | 2511 case kX64I8x16Eq: { | 
| 2407 __ pcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(1)); | 2512 __ pcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(1)); | 
| 2408 break; | 2513 break; | 
| 2409 } | 2514 } | 
| 2410 case kX64I8x16Ne: { | 2515 case kX64I8x16Ne: { | 
| 2411 __ pcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(1)); | 2516 __ pcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(1)); | 
| 2412 __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg); | 2517 __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg); | 
| 2413 __ pxor(i.OutputSimd128Register(), kScratchDoubleReg); | 2518 __ pxor(i.OutputSimd128Register(), kScratchDoubleReg); | 
| 2414 break; | 2519 break; | 
| 2415 } | 2520 } | 
| 2521 case kX64I8x16GtS: { | |
| 2522 __ pcmpgtb(i.OutputSimd128Register(), i.InputSimd128Register(1)); | |
| 2523 break; | |
| 2524 } | |
| 2525 case kX64I8x16GeS: { | |
| 2526 XMMRegister dst = i.OutputSimd128Register(); | |
| 2527 __ movaps(kScratchDoubleReg, i.InputSimd128Register(1)); | |
| 2528 __ pcmpgtb(kScratchDoubleReg, dst); | |
| 2529 __ pcmpeqd(dst, dst); | |
| 2530 __ pxor(dst, kScratchDoubleReg); | |
| 2531 break; | |
| 2532 } | |
| 2416 case kX64I8x16AddSaturateU: { | 2533 case kX64I8x16AddSaturateU: { | 
| 2417 __ paddusb(i.OutputSimd128Register(), i.InputSimd128Register(1)); | 2534 __ paddusb(i.OutputSimd128Register(), i.InputSimd128Register(1)); | 
| 2418 break; | 2535 break; | 
| 2419 } | 2536 } | 
| 2420 case kX64I8x16SubSaturateU: { | 2537 case kX64I8x16SubSaturateU: { | 
| 2421 __ psubusb(i.OutputSimd128Register(), i.InputSimd128Register(1)); | 2538 __ psubusb(i.OutputSimd128Register(), i.InputSimd128Register(1)); | 
| 2422 break; | 2539 break; | 
| 2423 } | 2540 } | 
| 2424 case kX64I8x16MinU: { | 2541 case kX64I8x16MinU: { | 
| 2425 CpuFeatureScope sse_scope(masm(), SSE4_1); | 2542 CpuFeatureScope sse_scope(masm(), SSE4_1); | 
| 2426 __ pminub(i.OutputSimd128Register(), i.InputSimd128Register(1)); | 2543 __ pminub(i.OutputSimd128Register(), i.InputSimd128Register(1)); | 
| 2427 break; | 2544 break; | 
| 2428 } | 2545 } | 
| 2429 case kX64I8x16MaxU: { | 2546 case kX64I8x16MaxU: { | 
| 2430 CpuFeatureScope sse_scope(masm(), SSE4_1); | 2547 CpuFeatureScope sse_scope(masm(), SSE4_1); | 
| 2431 __ pmaxub(i.OutputSimd128Register(), i.InputSimd128Register(1)); | 2548 __ pmaxub(i.OutputSimd128Register(), i.InputSimd128Register(1)); | 
| 2432 break; | 2549 break; | 
| 2433 } | 2550 } | 
| 2551 case kX64I8x16GtU: { | |
| 2552 XMMRegister dst = i.OutputSimd128Register(); | |
| 2553 XMMRegister src = i.InputSimd128Register(1); | |
| 2554 __ Set(kScratchRegister, 0x80808080); | |
| 2555 __ movd(kScratchDoubleReg, kScratchRegister); | |
| 2556 __ pshufd(kScratchDoubleReg, kScratchDoubleReg, 0x0); | |
| 2557 __ pxor(src, kScratchDoubleReg); | |
| 2558 __ pxor(dst, kScratchDoubleReg); | |
| 2559 __ pcmpgtb(dst, src); | |
| 2560 // Reset input to not clobber. | |
| 2561 __ pxor(src, kScratchDoubleReg); | |
| 2562 break; | |
| 2563 } | |
| 2564 case kX64I8x16GeU: { | |
| 2565 CpuFeatureScope sse_scope(masm(), SSE4_1); | |
| 2566 XMMRegister dst = i.OutputSimd128Register(); | |
| 2567 XMMRegister src = i.InputSimd128Register(1); | |
| 2568 __ pminub(dst, src); | |
| 2569 __ pcmpeqb(dst, src); | |
| 2570 break; | |
| 2571 } | |
| 2434 case kX64S128And: { | 2572 case kX64S128And: { | 
| 2435 __ pand(i.OutputSimd128Register(), i.InputSimd128Register(1)); | 2573 __ pand(i.OutputSimd128Register(), i.InputSimd128Register(1)); | 
| 2436 break; | 2574 break; | 
| 2437 } | 2575 } | 
| 2438 case kX64S128Or: { | 2576 case kX64S128Or: { | 
| 2439 __ por(i.OutputSimd128Register(), i.InputSimd128Register(1)); | 2577 __ por(i.OutputSimd128Register(), i.InputSimd128Register(1)); | 
| 2440 break; | 2578 break; | 
| 2441 } | 2579 } | 
| 2442 case kX64S128Xor: { | 2580 case kX64S128Xor: { | 
| 2443 __ pxor(i.OutputSimd128Register(), i.InputSimd128Register(1)); | 2581 __ pxor(i.OutputSimd128Register(), i.InputSimd128Register(1)); | 
| 2444 break; | 2582 break; | 
| 2445 } | 2583 } | 
| 2446 case kX64S128Not: { | 2584 case kX64S128Not: { | 
| 2447 XMMRegister dst = i.OutputSimd128Register(); | 2585 XMMRegister dst = i.OutputSimd128Register(); | 
| 2448 __ pcmpeqd(dst, dst); | 2586 XMMRegister src = i.InputSimd128Register(0); | 
| 2449 __ pxor(dst, i.InputSimd128Register(1)); | 2587 if (dst.is(src)) { | 
| 2588 __ movaps(kScratchDoubleReg, dst); | |
| 2589 __ pcmpeqd(dst, dst); | |
| 2590 __ pxor(dst, kScratchDoubleReg); | |
| 2591 } else { | |
| 2592 __ pcmpeqd(dst, dst); | |
| 2593 __ pxor(dst, src); | |
| 2594 } | |
| 2595 | |
| 2450 break; | 2596 break; | 
| 2451 } | 2597 } | 
| 2452 case kX64S128Select: { | 2598 case kX64S128Select: { | 
| 2453 // Mask used here is stored in dst. | 2599 // Mask used here is stored in dst. | 
| 2454 XMMRegister dst = i.OutputSimd128Register(); | 2600 XMMRegister dst = i.OutputSimd128Register(); | 
| 2455 __ movaps(kScratchDoubleReg, i.InputSimd128Register(1)); | 2601 __ movaps(kScratchDoubleReg, i.InputSimd128Register(1)); | 
| 2456 __ xorps(kScratchDoubleReg, i.InputSimd128Register(2)); | 2602 __ xorps(kScratchDoubleReg, i.InputSimd128Register(2)); | 
| 2457 __ andps(dst, kScratchDoubleReg); | 2603 __ andps(dst, kScratchDoubleReg); | 
| 2458 __ xorps(dst, i.InputSimd128Register(2)); | 2604 __ xorps(dst, i.InputSimd128Register(2)); | 
| 2459 break; | 2605 break; | 
| (...skipping 752 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3212 int padding_size = last_lazy_deopt_pc_ + space_needed - current_pc; | 3358 int padding_size = last_lazy_deopt_pc_ + space_needed - current_pc; | 
| 3213 __ Nop(padding_size); | 3359 __ Nop(padding_size); | 
| 3214 } | 3360 } | 
| 3215 } | 3361 } | 
| 3216 | 3362 | 
| 3217 #undef __ | 3363 #undef __ | 
| 3218 | 3364 | 
| 3219 } // namespace compiler | 3365 } // namespace compiler | 
| 3220 } // namespace internal | 3366 } // namespace internal | 
| 3221 } // namespace v8 | 3367 } // namespace v8 | 
| OLD | NEW |