OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 4045 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4056 lea edx, [edx + 8] | 4056 lea edx, [edx + 8] |
4057 sub ecx, 16 | 4057 sub ecx, 16 |
4058 jg convertloop | 4058 jg convertloop |
4059 | 4059 |
4060 pop edi | 4060 pop edi |
4061 ret | 4061 ret |
4062 } | 4062 } |
4063 } | 4063 } |
4064 #endif // HAS_YUY2TOYROW_SSE2 | 4064 #endif // HAS_YUY2TOYROW_SSE2 |
4065 | 4065 |
| 4066 #ifdef HAS_BLENDPLANEROW_SSSE3 |
| 4067 // Blend 8 pixels at a time. |
| 4068 // =((G2*C2)+(H2*(D2))+32768+127)/256 |
| 4069 __declspec(naked) |
| 4070 void BlendPlaneRow_SSSE3(const uint8* src0, const uint8* src1, |
| 4071 const uint8* alpha, uint8* dst, int width) { |
| 4072 __asm { |
| 4073 push esi |
| 4074 push edi |
| 4075 pcmpeqb xmm5, xmm5 // generate mask 0xff00ff00 |
| 4076 psllw xmm5, 8 |
| 4077 mov eax, 0x80808080 // 128 for biasing image to signed. |
| 4078 movd xmm6, eax |
| 4079 pshufd xmm6, xmm6, 0x00 |
| 4080 |
| 4081 mov eax, 0x807f807f // 32768 + 127 for unbias and round. |
| 4082 movd xmm7, eax |
| 4083 pshufd xmm7, xmm7, 0x00 |
| 4084 mov eax, [esp + 8 + 4] // src0 |
| 4085 mov edx, [esp + 8 + 8] // src1 |
| 4086 mov esi, [esp + 8 + 12] // alpha |
| 4087 mov edi, [esp + 8 + 16] // dst |
| 4088 mov ecx, [esp + 8 + 20] // width |
| 4089 sub eax, esi |
| 4090 sub edx, esi |
| 4091 sub edi, esi |
| 4092 |
| 4093 // 8 pixel loop. |
| 4094 convertloop8: |
| 4095 movq xmm0, qword ptr [esi] // alpha |
| 4096 punpcklbw xmm0, xmm0 |
| 4097 pxor xmm0, xmm5 // a, 255-a |
| 4098 movq xmm1, qword ptr [eax + esi] // src0 |
| 4099 movq xmm2, qword ptr [edx + esi] // src1 |
| 4100 punpcklbw xmm1, xmm2 |
| 4101 psubb xmm1, xmm6 // bias src0/1 - 128 |
| 4102 pmaddubsw xmm0, xmm1 |
| 4103 paddw xmm0, xmm7 // unbias result - 32768 and round. |
| 4104 psrlw xmm0, 8 |
| 4105 packuswb xmm0, xmm0 |
| 4106 movq qword ptr [edi + esi], xmm0 |
| 4107 lea esi, [esi + 8] |
| 4108 sub ecx, 8 |
| 4109 jge convertloop8 |
| 4110 |
| 4111 pop edi |
| 4112 pop esi |
| 4113 ret |
| 4114 } |
| 4115 } |
| 4116 #endif // HAS_BLENDPLANEROW_SSSE3 |
| 4117 |
4066 #ifdef HAS_ARGBBLENDROW_SSSE3 | 4118 #ifdef HAS_ARGBBLENDROW_SSSE3 |
4067 // Shuffle table for isolating alpha. | 4119 // Shuffle table for isolating alpha. |
4068 static const uvec8 kShuffleAlpha = { | 4120 static const uvec8 kShuffleAlpha = { |
4069 3u, 0x80, 3u, 0x80, 7u, 0x80, 7u, 0x80, | 4121 3u, 0x80, 3u, 0x80, 7u, 0x80, 7u, 0x80, |
4070 11u, 0x80, 11u, 0x80, 15u, 0x80, 15u, 0x80 | 4122 11u, 0x80, 11u, 0x80, 15u, 0x80, 15u, 0x80 |
4071 }; | 4123 }; |
4072 | 4124 |
4073 // Blend 8 pixels at a time. | 4125 // Blend 8 pixels at a time. |
4074 __declspec(naked) | 4126 __declspec(naked) |
4075 void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1, | 4127 void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1, |
(...skipping 2174 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6250 } | 6302 } |
6251 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 6303 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 |
6252 | 6304 |
6253 #endif // defined(_M_X64) | 6305 #endif // defined(_M_X64) |
6254 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64)) | 6306 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64)) |
6255 | 6307 |
6256 #ifdef __cplusplus | 6308 #ifdef __cplusplus |
6257 } // extern "C" | 6309 } // extern "C" |
6258 } // namespace libyuv | 6310 } // namespace libyuv |
6259 #endif | 6311 #endif |
OLD | NEW |