Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(170)

Side by Side Diff: source/row_win.cc

Issue 1377053003: remove sse2 functions that also have ssse3 (Closed) Base URL: https://chromium.googlesource.com/libyuv/libyuv@master
Patch Set: lint warning fixes Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« include/libyuv/row.h ('K') | « source/row_gcc.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 3442 matching lines...) Expand 10 before | Expand all | Expand 10 after
3453 vmovdqu [edx], ymm0 3453 vmovdqu [edx], ymm0
3454 lea edx, [edx + 32] 3454 lea edx, [edx + 32]
3455 sub ecx, 32 3455 sub ecx, 32
3456 jg convertloop 3456 jg convertloop
3457 vzeroupper 3457 vzeroupper
3458 ret 3458 ret
3459 } 3459 }
3460 } 3460 }
3461 #endif // HAS_MIRRORROW_AVX2 3461 #endif // HAS_MIRRORROW_AVX2
3462 3462
3463 #ifdef HAS_MIRRORROW_SSE2
3464 __declspec(naked)
3465 void MirrorRow_SSE2(const uint8* src, uint8* dst, int width) {
3466 __asm {
3467 mov eax, [esp + 4] // src
3468 mov edx, [esp + 8] // dst
3469 mov ecx, [esp + 12] // width
3470
3471 convertloop:
3472 movdqu xmm0, [eax - 16 + ecx]
3473 movdqa xmm1, xmm0 // swap bytes
3474 psllw xmm0, 8
3475 psrlw xmm1, 8
3476 por xmm0, xmm1
3477 pshuflw xmm0, xmm0, 0x1b // swap words
3478 pshufhw xmm0, xmm0, 0x1b
3479 pshufd xmm0, xmm0, 0x4e // swap qwords
3480 movdqu [edx], xmm0
3481 lea edx, [edx + 16]
3482 sub ecx, 16
3483 jg convertloop
3484 ret
3485 }
3486 }
3487 #endif // HAS_MIRRORROW_SSE2
3488
3489 #ifdef HAS_MIRRORROW_UV_SSSE3 3463 #ifdef HAS_MIRRORROW_UV_SSSE3
3490 // Shuffle table for reversing the bytes of UV channels. 3464 // Shuffle table for reversing the bytes of UV channels.
3491 static const uvec8 kShuffleMirrorUV = { 3465 static const uvec8 kShuffleMirrorUV = {
3492 14u, 12u, 10u, 8u, 6u, 4u, 2u, 0u, 15u, 13u, 11u, 9u, 7u, 5u, 3u, 1u 3466 14u, 12u, 10u, 8u, 6u, 4u, 2u, 0u, 15u, 13u, 11u, 9u, 7u, 5u, 3u, 1u
3493 }; 3467 };
3494 3468
3495 __declspec(naked) 3469 __declspec(naked)
3496 void MirrorUVRow_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v, 3470 void MirrorUVRow_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v,
3497 int width) { 3471 int width) {
3498 __asm { 3472 __asm {
(...skipping 876 matching lines...) Expand 10 before | Expand all | Expand 10 after
4375 lea edx, [edx + 8] 4349 lea edx, [edx + 8]
4376 sub ecx, 16 4350 sub ecx, 16
4377 jg convertloop 4351 jg convertloop
4378 4352
4379 pop edi 4353 pop edi
4380 ret 4354 ret
4381 } 4355 }
4382 } 4356 }
4383 #endif // HAS_YUY2TOYROW_SSE2 4357 #endif // HAS_YUY2TOYROW_SSE2
4384 4358
4385 #ifdef HAS_ARGBBLENDROW_SSE2
4386 // Blend 8 pixels at a time.
4387 __declspec(naked)
4388 void ARGBBlendRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
4389 uint8* dst_argb, int width) {
4390 __asm {
4391 push esi
4392 mov eax, [esp + 4 + 4] // src_argb0
4393 mov esi, [esp + 4 + 8] // src_argb1
4394 mov edx, [esp + 4 + 12] // dst_argb
4395 mov ecx, [esp + 4 + 16] // width
4396 pcmpeqb xmm7, xmm7 // generate constant 1
4397 psrlw xmm7, 15
4398 pcmpeqb xmm6, xmm6 // generate mask 0x00ff00ff
4399 psrlw xmm6, 8
4400 pcmpeqb xmm5, xmm5 // generate mask 0xff00ff00
4401 psllw xmm5, 8
4402 pcmpeqb xmm4, xmm4 // generate mask 0xff000000
4403 pslld xmm4, 24
4404 sub ecx, 4
4405 jl convertloop4b // less than 4 pixels?
4406
4407 // 4 pixel loop.
4408 convertloop4:
4409 movdqu xmm3, [eax] // src argb
4410 lea eax, [eax + 16]
4411 movdqa xmm0, xmm3 // src argb
4412 pxor xmm3, xmm4 // ~alpha
4413 movdqu xmm2, [esi] // _r_b
4414 psrlw xmm3, 8 // alpha
4415 pshufhw xmm3, xmm3, 0F5h // 8 alpha words
4416 pshuflw xmm3, xmm3, 0F5h
4417 pand xmm2, xmm6 // _r_b
4418 paddw xmm3, xmm7 // 256 - alpha
4419 pmullw xmm2, xmm3 // _r_b * alpha
4420 movdqu xmm1, [esi] // _a_g
4421 lea esi, [esi + 16]
4422 psrlw xmm1, 8 // _a_g
4423 por xmm0, xmm4 // set alpha to 255
4424 pmullw xmm1, xmm3 // _a_g * alpha
4425 psrlw xmm2, 8 // _r_b convert to 8 bits again
4426 paddusb xmm0, xmm2 // + src argb
4427 pand xmm1, xmm5 // a_g_ convert to 8 bits again
4428 paddusb xmm0, xmm1 // + src argb
4429 movdqu [edx], xmm0
4430 lea edx, [edx + 16]
4431 sub ecx, 4
4432 jge convertloop4
4433
4434 convertloop4b:
4435 add ecx, 4 - 1
4436 jl convertloop1b
4437
4438 // 1 pixel loop.
4439 convertloop1:
4440 movd xmm3, [eax] // src argb
4441 lea eax, [eax + 4]
4442 movdqa xmm0, xmm3 // src argb
4443 pxor xmm3, xmm4 // ~alpha
4444 movd xmm2, [esi] // _r_b
4445 psrlw xmm3, 8 // alpha
4446 pshufhw xmm3, xmm3, 0F5h // 8 alpha words
4447 pshuflw xmm3, xmm3, 0F5h
4448 pand xmm2, xmm6 // _r_b
4449 paddw xmm3, xmm7 // 256 - alpha
4450 pmullw xmm2, xmm3 // _r_b * alpha
4451 movd xmm1, [esi] // _a_g
4452 lea esi, [esi + 4]
4453 psrlw xmm1, 8 // _a_g
4454 por xmm0, xmm4 // set alpha to 255
4455 pmullw xmm1, xmm3 // _a_g * alpha
4456 psrlw xmm2, 8 // _r_b convert to 8 bits again
4457 paddusb xmm0, xmm2 // + src argb
4458 pand xmm1, xmm5 // a_g_ convert to 8 bits again
4459 paddusb xmm0, xmm1 // + src argb
4460 movd [edx], xmm0
4461 lea edx, [edx + 4]
4462 sub ecx, 1
4463 jge convertloop1
4464
4465 convertloop1b:
4466 pop esi
4467 ret
4468 }
4469 }
4470 #endif // HAS_ARGBBLENDROW_SSE2
4471
4472 #ifdef HAS_ARGBBLENDROW_SSSE3 4359 #ifdef HAS_ARGBBLENDROW_SSSE3
4473 // Shuffle table for isolating alpha. 4360 // Shuffle table for isolating alpha.
4474 static const uvec8 kShuffleAlpha = { 4361 static const uvec8 kShuffleAlpha = {
4475 3u, 0x80, 3u, 0x80, 7u, 0x80, 7u, 0x80, 4362 3u, 0x80, 3u, 0x80, 7u, 0x80, 7u, 0x80,
4476 11u, 0x80, 11u, 0x80, 15u, 0x80, 15u, 0x80 4363 11u, 0x80, 11u, 0x80, 15u, 0x80, 15u, 0x80
4477 }; 4364 };
4478 // Same as SSE2, but replaces: 4365
4479 // psrlw xmm3, 8 // alpha
4480 // pshufhw xmm3, xmm3, 0F5h // 8 alpha words
4481 // pshuflw xmm3, xmm3, 0F5h
4482 // with..
4483 // pshufb xmm3, kShuffleAlpha // alpha
4484 // Blend 8 pixels at a time. 4366 // Blend 8 pixels at a time.
4485
4486 __declspec(naked) 4367 __declspec(naked)
4487 void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1, 4368 void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
4488 uint8* dst_argb, int width) { 4369 uint8* dst_argb, int width) {
4489 __asm { 4370 __asm {
4490 push esi 4371 push esi
4491 mov eax, [esp + 4 + 4] // src_argb0 4372 mov eax, [esp + 4 + 4] // src_argb0
4492 mov esi, [esp + 4 + 8] // src_argb1 4373 mov esi, [esp + 4 + 8] // src_argb1
4493 mov edx, [esp + 4 + 12] // dst_argb 4374 mov edx, [esp + 4 + 12] // dst_argb
4494 mov ecx, [esp + 4 + 16] // width 4375 mov ecx, [esp + 4 + 16] // width
4495 pcmpeqb xmm7, xmm7 // generate constant 0x0001 4376 pcmpeqb xmm7, xmm7 // generate constant 0x0001
(...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after
4557 sub ecx, 1 4438 sub ecx, 1
4558 jge convertloop1 4439 jge convertloop1
4559 4440
4560 convertloop1b: 4441 convertloop1b:
4561 pop esi 4442 pop esi
4562 ret 4443 ret
4563 } 4444 }
4564 } 4445 }
4565 #endif // HAS_ARGBBLENDROW_SSSE3 4446 #endif // HAS_ARGBBLENDROW_SSSE3
4566 4447
4567 #ifdef HAS_ARGBATTENUATEROW_SSE2
4568 // Attenuate 4 pixels at a time.
4569 __declspec(naked)
4570 void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) {
4571 __asm {
4572 mov eax, [esp + 4] // src_argb0
4573 mov edx, [esp + 8] // dst_argb
4574 mov ecx, [esp + 12] // width
4575 pcmpeqb xmm4, xmm4 // generate mask 0xff000000
4576 pslld xmm4, 24
4577 pcmpeqb xmm5, xmm5 // generate mask 0x00ffffff
4578 psrld xmm5, 8
4579
4580 convertloop:
4581 movdqu xmm0, [eax] // read 4 pixels
4582 punpcklbw xmm0, xmm0 // first 2
4583 pshufhw xmm2, xmm0, 0FFh // 8 alpha words
4584 pshuflw xmm2, xmm2, 0FFh
4585 pmulhuw xmm0, xmm2 // rgb * a
4586 movdqu xmm1, [eax] // read 4 pixels
4587 punpckhbw xmm1, xmm1 // next 2 pixels
4588 pshufhw xmm2, xmm1, 0FFh // 8 alpha words
4589 pshuflw xmm2, xmm2, 0FFh
4590 pmulhuw xmm1, xmm2 // rgb * a
4591 movdqu xmm2, [eax] // alphas
4592 lea eax, [eax + 16]
4593 psrlw xmm0, 8
4594 pand xmm2, xmm4
4595 psrlw xmm1, 8
4596 packuswb xmm0, xmm1
4597 pand xmm0, xmm5 // keep original alphas
4598 por xmm0, xmm2
4599 movdqu [edx], xmm0
4600 lea edx, [edx + 16]
4601 sub ecx, 4
4602 jg convertloop
4603
4604 ret
4605 }
4606 }
4607 #endif // HAS_ARGBATTENUATEROW_SSE2
4608
4609 #ifdef HAS_ARGBATTENUATEROW_SSSE3 4448 #ifdef HAS_ARGBATTENUATEROW_SSSE3
4610 // Shuffle table duplicating alpha. 4449 // Shuffle table duplicating alpha.
4611 static const uvec8 kShuffleAlpha0 = { 4450 static const uvec8 kShuffleAlpha0 = {
4612 3u, 3u, 3u, 3u, 3u, 3u, 128u, 128u, 7u, 7u, 7u, 7u, 7u, 7u, 128u, 128u, 4451 3u, 3u, 3u, 3u, 3u, 3u, 128u, 128u, 7u, 7u, 7u, 7u, 7u, 7u, 128u, 128u,
4613 }; 4452 };
4614 static const uvec8 kShuffleAlpha1 = { 4453 static const uvec8 kShuffleAlpha1 = {
4615 11u, 11u, 11u, 11u, 11u, 11u, 128u, 128u, 4454 11u, 11u, 11u, 11u, 11u, 11u, 128u, 128u,
4616 15u, 15u, 15u, 15u, 15u, 15u, 128u, 128u, 4455 15u, 15u, 15u, 15u, 15u, 15u, 128u, 128u,
4617 }; 4456 };
4618 __declspec(naked) 4457 __declspec(naked)
(...skipping 2085 matching lines...) Expand 10 before | Expand all | Expand 10 after
6704 } 6543 }
6705 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 6544 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3
6706 6545
6707 #endif // defined(_M_X64) 6546 #endif // defined(_M_X64)
6708 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64)) 6547 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64))
6709 6548
6710 #ifdef __cplusplus 6549 #ifdef __cplusplus
6711 } // extern "C" 6550 } // extern "C"
6712 } // namespace libyuv 6551 } // namespace libyuv
6713 #endif 6552 #endif
OLDNEW
« include/libyuv/row.h ('K') | « source/row_gcc.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698