OLD | NEW |
1 // VERSION 2 | 1 // VERSION 2 |
2 /* | 2 /* |
3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
4 * | 4 * |
5 * Use of this source code is governed by a BSD-style license | 5 * Use of this source code is governed by a BSD-style license |
6 * that can be found in the LICENSE file in the root of the source | 6 * that can be found in the LICENSE file in the root of the source |
7 * tree. An additional intellectual property rights grant can be found | 7 * tree. An additional intellectual property rights grant can be found |
8 * in the file PATENTS. All contributing project authors may | 8 * in the file PATENTS. All contributing project authors may |
9 * be found in the AUTHORS file in the root of the source tree. | 9 * be found in the AUTHORS file in the root of the source tree. |
10 */ | 10 */ |
(...skipping 3578 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3589 "psrlw $0x8,%%xmm0 \n" | 3589 "psrlw $0x8,%%xmm0 \n" |
3590 "packuswb %%xmm0,%%xmm0 \n" | 3590 "packuswb %%xmm0,%%xmm0 \n" |
3591 "movq %%xmm0,(%3,%2,1) \n" | 3591 "movq %%xmm0,(%3,%2,1) \n" |
3592 "lea 0x8(%2),%2 \n" | 3592 "lea 0x8(%2),%2 \n" |
3593 "sub $0x8,%4 \n" | 3593 "sub $0x8,%4 \n" |
3594 "jg 1b \n" | 3594 "jg 1b \n" |
3595 : "+r"(src0), // %0 | 3595 : "+r"(src0), // %0 |
3596 "+r"(src1), // %1 | 3596 "+r"(src1), // %1 |
3597 "+r"(alpha), // %2 | 3597 "+r"(alpha), // %2 |
3598 "+r"(dst), // %3 | 3598 "+r"(dst), // %3 |
3599 "+r"(width) // %4 | 3599 "+rm"(width) // %4 |
3600 :: "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm5", "xmm6", "xmm7" | 3600 :: "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm5", "xmm6", "xmm7" |
3601 ); | 3601 ); |
3602 } | 3602 } |
3603 #endif // HAS_BLENDPLANEROW_SSSE3 | 3603 #endif // HAS_BLENDPLANEROW_SSSE3 |
3604 | 3604 |
3605 #ifdef HAS_BLENDPLANEROW_AVX2 | 3605 #ifdef HAS_BLENDPLANEROW_AVX2 |
3606 // Blend 32 pixels at a time. | 3606 // Blend 32 pixels at a time. |
3607 // unsigned version of math | 3607 // unsigned version of math |
3608 // =((A2*C2)+(B2*(255-C2))+255)/256 | 3608 // =((A2*C2)+(B2*(255-C2))+255)/256 |
3609 // signed version of math | 3609 // signed version of math |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3646 "vpackuswb %%ymm3,%%ymm0,%%ymm0 \n" | 3646 "vpackuswb %%ymm3,%%ymm0,%%ymm0 \n" |
3647 "vmovdqu %%ymm0,(%3,%2,1) \n" | 3647 "vmovdqu %%ymm0,(%3,%2,1) \n" |
3648 "lea 0x20(%2),%2 \n" | 3648 "lea 0x20(%2),%2 \n" |
3649 "sub $0x20,%4 \n" | 3649 "sub $0x20,%4 \n" |
3650 "jg 1b \n" | 3650 "jg 1b \n" |
3651 "vzeroupper \n" | 3651 "vzeroupper \n" |
3652 : "+r"(src0), // %0 | 3652 : "+r"(src0), // %0 |
3653 "+r"(src1), // %1 | 3653 "+r"(src1), // %1 |
3654 "+r"(alpha), // %2 | 3654 "+r"(alpha), // %2 |
3655 "+r"(dst), // %3 | 3655 "+r"(dst), // %3 |
3656 "+r"(width) // %4 | 3656 "+rm"(width) // %4 |
3657 :: "memory", "cc", "eax", | 3657 :: "memory", "cc", "eax", |
3658 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" | 3658 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" |
3659 ); | 3659 ); |
3660 } | 3660 } |
3661 #endif // HAS_BLENDPLANEROW_AVX2 | 3661 #endif // HAS_BLENDPLANEROW_AVX2 |
3662 | 3662 |
3663 #ifdef HAS_ARGBATTENUATEROW_SSSE3 | 3663 #ifdef HAS_ARGBATTENUATEROW_SSSE3 |
3664 // Shuffle table duplicating alpha | 3664 // Shuffle table duplicating alpha |
3665 static uvec8 kShuffleAlpha0 = { | 3665 static uvec8 kShuffleAlpha0 = { |
3666 3u, 3u, 3u, 3u, 3u, 3u, 128u, 128u, 7u, 7u, 7u, 7u, 7u, 7u, 128u, 128u | 3666 3u, 3u, 3u, 3u, 3u, 3u, 128u, 128u, 7u, 7u, 7u, 7u, 7u, 7u, 128u, 128u |
(...skipping 1262 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4929 // Blend 100 / 0 - Copy row unchanged. | 4929 // Blend 100 / 0 - Copy row unchanged. |
4930 LABELALIGN | 4930 LABELALIGN |
4931 "100: \n" | 4931 "100: \n" |
4932 "movdqu " MEMACCESS(1) ",%%xmm0 \n" | 4932 "movdqu " MEMACCESS(1) ",%%xmm0 \n" |
4933 MEMOPMEM(movdqu,xmm0,0x00,1,0,1) | 4933 MEMOPMEM(movdqu,xmm0,0x00,1,0,1) |
4934 "lea " MEMLEA(0x10,1) ",%1 \n" | 4934 "lea " MEMLEA(0x10,1) ",%1 \n" |
4935 "sub $0x10,%2 \n" | 4935 "sub $0x10,%2 \n" |
4936 "jg 100b \n" | 4936 "jg 100b \n" |
4937 | 4937 |
4938 "99: \n" | 4938 "99: \n" |
4939 : "+r"(dst_ptr), // %0 | 4939 : "+r"(dst_ptr), // %0 |
4940 "+r"(src_ptr), // %1 | 4940 "+r"(src_ptr), // %1 |
4941 "+r"(dst_width), // %2 | 4941 "+rm"(dst_width), // %2 |
4942 "+r"(source_y_fraction) // %3 | 4942 "+r"(source_y_fraction) // %3 |
4943 : "r"((intptr_t)(src_stride)) // %4 | 4943 : "r"((intptr_t)(src_stride)) // %4 |
4944 : "memory", "cc", "eax", NACL_R14 | 4944 : "memory", "cc", "eax", NACL_R14 |
4945 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | 4945 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
4946 ); | 4946 ); |
4947 } | 4947 } |
4948 #endif // HAS_INTERPOLATEROW_SSSE3 | 4948 #endif // HAS_INTERPOLATEROW_SSSE3 |
4949 | 4949 |
4950 #ifdef HAS_INTERPOLATEROW_AVX2 | 4950 #ifdef HAS_INTERPOLATEROW_AVX2 |
4951 // Bilinear filter 32x2 -> 32x1 | 4951 // Bilinear filter 32x2 -> 32x1 |
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5007 LABELALIGN | 5007 LABELALIGN |
5008 "100: \n" | 5008 "100: \n" |
5009 "rep movsb " MEMMOVESTRING(1,0) " \n" | 5009 "rep movsb " MEMMOVESTRING(1,0) " \n" |
5010 "jmp 999f \n" | 5010 "jmp 999f \n" |
5011 | 5011 |
5012 "99: \n" | 5012 "99: \n" |
5013 "vzeroupper \n" | 5013 "vzeroupper \n" |
5014 "999: \n" | 5014 "999: \n" |
5015 : "+D"(dst_ptr), // %0 | 5015 : "+D"(dst_ptr), // %0 |
5016 "+S"(src_ptr), // %1 | 5016 "+S"(src_ptr), // %1 |
5017 "+c"(dst_width), // %2 | 5017 "+cm"(dst_width), // %2 |
5018 "+r"(source_y_fraction) // %3 | 5018 "+r"(source_y_fraction) // %3 |
5019 : "r"((intptr_t)(src_stride)) // %4 | 5019 : "r"((intptr_t)(src_stride)) // %4 |
5020 : "memory", "cc", "eax", NACL_R14 | 5020 : "memory", "cc", "eax", NACL_R14 |
5021 "xmm0", "xmm1", "xmm2", "xmm4", "xmm5" | 5021 "xmm0", "xmm1", "xmm2", "xmm4", "xmm5" |
5022 ); | 5022 ); |
5023 } | 5023 } |
5024 #endif // HAS_INTERPOLATEROW_AVX2 | 5024 #endif // HAS_INTERPOLATEROW_AVX2 |
5025 | 5025 |
5026 #ifdef HAS_ARGBSHUFFLEROW_SSSE3 | 5026 #ifdef HAS_ARGBSHUFFLEROW_SSSE3 |
5027 // For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA. | 5027 // For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA. |
(...skipping 497 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5525 ); | 5525 ); |
5526 } | 5526 } |
5527 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 5527 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 |
5528 | 5528 |
5529 #endif // defined(__x86_64__) || defined(__i386__) | 5529 #endif // defined(__x86_64__) || defined(__i386__) |
5530 | 5530 |
5531 #ifdef __cplusplus | 5531 #ifdef __cplusplus |
5532 } // extern "C" | 5532 } // extern "C" |
5533 } // namespace libyuv | 5533 } // namespace libyuv |
5534 #endif | 5534 #endif |
OLD | NEW |