| OLD | NEW |
| 1 // VERSION 2 | 1 // VERSION 2 |
| 2 /* | 2 /* |
| 3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
| 4 * | 4 * |
| 5 * Use of this source code is governed by a BSD-style license | 5 * Use of this source code is governed by a BSD-style license |
| 6 * that can be found in the LICENSE file in the root of the source | 6 * that can be found in the LICENSE file in the root of the source |
| 7 * tree. An additional intellectual property rights grant can be found | 7 * tree. An additional intellectual property rights grant can be found |
| 8 * in the file PATENTS. All contributing project authors may | 8 * in the file PATENTS. All contributing project authors may |
| 9 * be found in the AUTHORS file in the root of the source tree. | 9 * be found in the AUTHORS file in the root of the source tree. |
| 10 */ | 10 */ |
| (...skipping 3578 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3589 "psrlw $0x8,%%xmm0 \n" | 3589 "psrlw $0x8,%%xmm0 \n" |
| 3590 "packuswb %%xmm0,%%xmm0 \n" | 3590 "packuswb %%xmm0,%%xmm0 \n" |
| 3591 "movq %%xmm0,(%3,%2,1) \n" | 3591 "movq %%xmm0,(%3,%2,1) \n" |
| 3592 "lea 0x8(%2),%2 \n" | 3592 "lea 0x8(%2),%2 \n" |
| 3593 "sub $0x8,%4 \n" | 3593 "sub $0x8,%4 \n" |
| 3594 "jg 1b \n" | 3594 "jg 1b \n" |
| 3595 : "+r"(src0), // %0 | 3595 : "+r"(src0), // %0 |
| 3596 "+r"(src1), // %1 | 3596 "+r"(src1), // %1 |
| 3597 "+r"(alpha), // %2 | 3597 "+r"(alpha), // %2 |
| 3598 "+r"(dst), // %3 | 3598 "+r"(dst), // %3 |
| 3599 "+r"(width) // %4 | 3599 "+rm"(width) // %4 |
| 3600 :: "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm5", "xmm6", "xmm7" | 3600 :: "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm5", "xmm6", "xmm7" |
| 3601 ); | 3601 ); |
| 3602 } | 3602 } |
| 3603 #endif // HAS_BLENDPLANEROW_SSSE3 | 3603 #endif // HAS_BLENDPLANEROW_SSSE3 |
| 3604 | 3604 |
| 3605 #ifdef HAS_BLENDPLANEROW_AVX2 | 3605 #ifdef HAS_BLENDPLANEROW_AVX2 |
| 3606 // Blend 32 pixels at a time. | 3606 // Blend 32 pixels at a time. |
| 3607 // unsigned version of math | 3607 // unsigned version of math |
| 3608 // =((A2*C2)+(B2*(255-C2))+255)/256 | 3608 // =((A2*C2)+(B2*(255-C2))+255)/256 |
| 3609 // signed version of math | 3609 // signed version of math |
| (...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3646 "vpackuswb %%ymm3,%%ymm0,%%ymm0 \n" | 3646 "vpackuswb %%ymm3,%%ymm0,%%ymm0 \n" |
| 3647 "vmovdqu %%ymm0,(%3,%2,1) \n" | 3647 "vmovdqu %%ymm0,(%3,%2,1) \n" |
| 3648 "lea 0x20(%2),%2 \n" | 3648 "lea 0x20(%2),%2 \n" |
| 3649 "sub $0x20,%4 \n" | 3649 "sub $0x20,%4 \n" |
| 3650 "jg 1b \n" | 3650 "jg 1b \n" |
| 3651 "vzeroupper \n" | 3651 "vzeroupper \n" |
| 3652 : "+r"(src0), // %0 | 3652 : "+r"(src0), // %0 |
| 3653 "+r"(src1), // %1 | 3653 "+r"(src1), // %1 |
| 3654 "+r"(alpha), // %2 | 3654 "+r"(alpha), // %2 |
| 3655 "+r"(dst), // %3 | 3655 "+r"(dst), // %3 |
| 3656 "+r"(width) // %4 | 3656 "+rm"(width) // %4 |
| 3657 :: "memory", "cc", "eax", | 3657 :: "memory", "cc", "eax", |
| 3658 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" | 3658 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" |
| 3659 ); | 3659 ); |
| 3660 } | 3660 } |
| 3661 #endif // HAS_BLENDPLANEROW_AVX2 | 3661 #endif // HAS_BLENDPLANEROW_AVX2 |
| 3662 | 3662 |
| 3663 #ifdef HAS_ARGBATTENUATEROW_SSSE3 | 3663 #ifdef HAS_ARGBATTENUATEROW_SSSE3 |
| 3664 // Shuffle table duplicating alpha | 3664 // Shuffle table duplicating alpha |
| 3665 static uvec8 kShuffleAlpha0 = { | 3665 static uvec8 kShuffleAlpha0 = { |
| 3666 3u, 3u, 3u, 3u, 3u, 3u, 128u, 128u, 7u, 7u, 7u, 7u, 7u, 7u, 128u, 128u | 3666 3u, 3u, 3u, 3u, 3u, 3u, 128u, 128u, 7u, 7u, 7u, 7u, 7u, 7u, 128u, 128u |
| (...skipping 1262 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4929 // Blend 100 / 0 - Copy row unchanged. | 4929 // Blend 100 / 0 - Copy row unchanged. |
| 4930 LABELALIGN | 4930 LABELALIGN |
| 4931 "100: \n" | 4931 "100: \n" |
| 4932 "movdqu " MEMACCESS(1) ",%%xmm0 \n" | 4932 "movdqu " MEMACCESS(1) ",%%xmm0 \n" |
| 4933 MEMOPMEM(movdqu,xmm0,0x00,1,0,1) | 4933 MEMOPMEM(movdqu,xmm0,0x00,1,0,1) |
| 4934 "lea " MEMLEA(0x10,1) ",%1 \n" | 4934 "lea " MEMLEA(0x10,1) ",%1 \n" |
| 4935 "sub $0x10,%2 \n" | 4935 "sub $0x10,%2 \n" |
| 4936 "jg 100b \n" | 4936 "jg 100b \n" |
| 4937 | 4937 |
| 4938 "99: \n" | 4938 "99: \n" |
| 4939 : "+r"(dst_ptr), // %0 | 4939 : "+r"(dst_ptr), // %0 |
| 4940 "+r"(src_ptr), // %1 | 4940 "+r"(src_ptr), // %1 |
| 4941 "+r"(dst_width), // %2 | 4941 "+rm"(dst_width), // %2 |
| 4942 "+r"(source_y_fraction) // %3 | 4942 "+r"(source_y_fraction) // %3 |
| 4943 : "r"((intptr_t)(src_stride)) // %4 | 4943 : "r"((intptr_t)(src_stride)) // %4 |
| 4944 : "memory", "cc", "eax", NACL_R14 | 4944 : "memory", "cc", "eax", NACL_R14 |
| 4945 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | 4945 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
| 4946 ); | 4946 ); |
| 4947 } | 4947 } |
| 4948 #endif // HAS_INTERPOLATEROW_SSSE3 | 4948 #endif // HAS_INTERPOLATEROW_SSSE3 |
| 4949 | 4949 |
| 4950 #ifdef HAS_INTERPOLATEROW_AVX2 | 4950 #ifdef HAS_INTERPOLATEROW_AVX2 |
| 4951 // Bilinear filter 32x2 -> 32x1 | 4951 // Bilinear filter 32x2 -> 32x1 |
| (...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5007 LABELALIGN | 5007 LABELALIGN |
| 5008 "100: \n" | 5008 "100: \n" |
| 5009 "rep movsb " MEMMOVESTRING(1,0) " \n" | 5009 "rep movsb " MEMMOVESTRING(1,0) " \n" |
| 5010 "jmp 999f \n" | 5010 "jmp 999f \n" |
| 5011 | 5011 |
| 5012 "99: \n" | 5012 "99: \n" |
| 5013 "vzeroupper \n" | 5013 "vzeroupper \n" |
| 5014 "999: \n" | 5014 "999: \n" |
| 5015 : "+D"(dst_ptr), // %0 | 5015 : "+D"(dst_ptr), // %0 |
| 5016 "+S"(src_ptr), // %1 | 5016 "+S"(src_ptr), // %1 |
| 5017 "+c"(dst_width), // %2 | 5017 "+cm"(dst_width), // %2 |
| 5018 "+r"(source_y_fraction) // %3 | 5018 "+r"(source_y_fraction) // %3 |
| 5019 : "r"((intptr_t)(src_stride)) // %4 | 5019 : "r"((intptr_t)(src_stride)) // %4 |
| 5020 : "memory", "cc", "eax", NACL_R14 | 5020 : "memory", "cc", "eax", NACL_R14 |
| 5021 "xmm0", "xmm1", "xmm2", "xmm4", "xmm5" | 5021 "xmm0", "xmm1", "xmm2", "xmm4", "xmm5" |
| 5022 ); | 5022 ); |
| 5023 } | 5023 } |
| 5024 #endif // HAS_INTERPOLATEROW_AVX2 | 5024 #endif // HAS_INTERPOLATEROW_AVX2 |
| 5025 | 5025 |
| 5026 #ifdef HAS_ARGBSHUFFLEROW_SSSE3 | 5026 #ifdef HAS_ARGBSHUFFLEROW_SSSE3 |
| 5027 // For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA. | 5027 // For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA. |
| (...skipping 497 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5525 ); | 5525 ); |
| 5526 } | 5526 } |
| 5527 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 5527 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 |
| 5528 | 5528 |
| 5529 #endif // defined(__x86_64__) || defined(__i386__) | 5529 #endif // defined(__x86_64__) || defined(__i386__) |
| 5530 | 5530 |
| 5531 #ifdef __cplusplus | 5531 #ifdef __cplusplus |
| 5532 } // extern "C" | 5532 } // extern "C" |
| 5533 } // namespace libyuv | 5533 } // namespace libyuv |
| 5534 #endif | 5534 #endif |
| OLD | NEW |