| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2012 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2012 The LibYuv Project Authors. All rights reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| 11 #include "libyuv/basic_types.h" | 11 #include "libyuv/basic_types.h" |
| 12 #include "libyuv/row.h" | 12 #include "libyuv/row.h" |
| 13 | 13 |
| 14 #ifdef __cplusplus | 14 #ifdef __cplusplus |
| 15 namespace libyuv { | 15 namespace libyuv { |
| 16 extern "C" { | 16 extern "C" { |
| 17 #endif | 17 #endif |
| 18 | 18 |
| 19 #if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) | 19 // This module is for Visual C x86. |
| 20 #if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \ |
| 21 defined(_MSC_VER) && !defined(__clang__) |
| 20 | 22 |
| 21 __declspec(naked) __declspec(align(16)) | 23 __declspec(naked) |
| 22 uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) { | 24 uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) { |
| 23 __asm { | 25 __asm { |
| 24 mov eax, [esp + 4] // src_a | 26 mov eax, [esp + 4] // src_a |
| 25 mov edx, [esp + 8] // src_b | 27 mov edx, [esp + 8] // src_b |
| 26 mov ecx, [esp + 12] // count | 28 mov ecx, [esp + 12] // count |
| 27 pxor xmm0, xmm0 | 29 pxor xmm0, xmm0 |
| 28 pxor xmm5, xmm5 | 30 pxor xmm5, xmm5 |
| 29 | 31 |
| 30 wloop: | 32 wloop: |
| 31 movdqu xmm1, [eax] | 33 movdqu xmm1, [eax] |
| (...skipping 20 matching lines...) Expand all Loading... |
| 52 paddd xmm0, xmm1 | 54 paddd xmm0, xmm1 |
| 53 movd eax, xmm0 | 55 movd eax, xmm0 |
| 54 ret | 56 ret |
| 55 } | 57 } |
| 56 } | 58 } |
| 57 | 59 |
| 58 // Visual C 2012 required for AVX2. | 60 // Visual C 2012 required for AVX2. |
| 59 #if _MSC_VER >= 1700 | 61 #if _MSC_VER >= 1700 |
| 60 // C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX. | 62 // C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX. |
| 61 #pragma warning(disable: 4752) | 63 #pragma warning(disable: 4752) |
| 62 __declspec(naked) __declspec(align(16)) | 64 __declspec(naked) |
| 63 uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) { | 65 uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) { |
| 64 __asm { | 66 __asm { |
| 65 mov eax, [esp + 4] // src_a | 67 mov eax, [esp + 4] // src_a |
| 66 mov edx, [esp + 8] // src_b | 68 mov edx, [esp + 8] // src_b |
| 67 mov ecx, [esp + 12] // count | 69 mov ecx, [esp + 12] // count |
| 68 vpxor ymm0, ymm0, ymm0 // sum | 70 vpxor ymm0, ymm0, ymm0 // sum |
| 69 vpxor ymm5, ymm5, ymm5 // constant 0 for unpck | 71 vpxor ymm5, ymm5, ymm5 // constant 0 for unpck |
| 70 sub edx, eax | 72 sub edx, eax |
| 71 | 73 |
| 72 wloop: | 74 wloop: |
| (...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 126 }; | 128 }; |
| 127 | 129 |
| 128 // 27: 66 0F 38 40 C6 pmulld xmm0,xmm6 | 130 // 27: 66 0F 38 40 C6 pmulld xmm0,xmm6 |
| 129 // 44: 66 0F 38 40 DD pmulld xmm3,xmm5 | 131 // 44: 66 0F 38 40 DD pmulld xmm3,xmm5 |
| 130 // 59: 66 0F 38 40 E5 pmulld xmm4,xmm5 | 132 // 59: 66 0F 38 40 E5 pmulld xmm4,xmm5 |
| 131 // 72: 66 0F 38 40 D5 pmulld xmm2,xmm5 | 133 // 72: 66 0F 38 40 D5 pmulld xmm2,xmm5 |
| 132 // 83: 66 0F 38 40 CD pmulld xmm1,xmm5 | 134 // 83: 66 0F 38 40 CD pmulld xmm1,xmm5 |
| 133 #define pmulld(reg) _asm _emit 0x66 _asm _emit 0x0F _asm _emit 0x38 \ | 135 #define pmulld(reg) _asm _emit 0x66 _asm _emit 0x0F _asm _emit 0x38 \ |
| 134 _asm _emit 0x40 _asm _emit reg | 136 _asm _emit 0x40 _asm _emit reg |
| 135 | 137 |
| 136 __declspec(naked) __declspec(align(16)) | 138 __declspec(naked) |
| 137 uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) { | 139 uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) { |
| 138 __asm { | 140 __asm { |
| 139 mov eax, [esp + 4] // src | 141 mov eax, [esp + 4] // src |
| 140 mov ecx, [esp + 8] // count | 142 mov ecx, [esp + 8] // count |
| 141 movd xmm0, [esp + 12] // seed | 143 movd xmm0, [esp + 12] // seed |
| 142 | 144 |
| 143 pxor xmm7, xmm7 // constant 0 for unpck | 145 pxor xmm7, xmm7 // constant 0 for unpck |
| 144 movdqa xmm6, kHash16x33 | 146 movdqa xmm6, kHash16x33 |
| 145 | 147 |
| 146 wloop: | 148 wloop: |
| (...skipping 30 matching lines...) Expand all Loading... |
| 177 sub ecx, 16 | 179 sub ecx, 16 |
| 178 jg wloop | 180 jg wloop |
| 179 | 181 |
| 180 movd eax, xmm0 // return hash | 182 movd eax, xmm0 // return hash |
| 181 ret | 183 ret |
| 182 } | 184 } |
| 183 } | 185 } |
| 184 | 186 |
| 185 // Visual C 2012 required for AVX2. | 187 // Visual C 2012 required for AVX2. |
| 186 #if _MSC_VER >= 1700 | 188 #if _MSC_VER >= 1700 |
| 187 __declspec(naked) __declspec(align(16)) | 189 __declspec(naked) |
| 188 uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) { | 190 uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) { |
| 189 __asm { | 191 __asm { |
| 190 mov eax, [esp + 4] // src | 192 mov eax, [esp + 4] // src |
| 191 mov ecx, [esp + 8] // count | 193 mov ecx, [esp + 8] // count |
| 192 movd xmm0, [esp + 12] // seed | 194 movd xmm0, [esp + 12] // seed |
| 193 movdqa xmm6, kHash16x33 | 195 movdqa xmm6, kHash16x33 |
| 194 | 196 |
| 195 wloop: | 197 wloop: |
| 196 vpmovzxbd xmm3, dword ptr [eax] // src[0-3] | 198 vpmovzxbd xmm3, dword ptr [eax] // src[0-3] |
| 197 pmulld xmm0, xmm6 // hash *= 33 ^ 16 | 199 pmulld xmm0, xmm6 // hash *= 33 ^ 16 |
| (...skipping 14 matching lines...) Expand all Loading... |
| 212 paddd xmm1, xmm2 | 214 paddd xmm1, xmm2 |
| 213 paddd xmm0, xmm1 | 215 paddd xmm0, xmm1 |
| 214 sub ecx, 16 | 216 sub ecx, 16 |
| 215 jg wloop | 217 jg wloop |
| 216 | 218 |
| 217 movd eax, xmm0 // return hash | 219 movd eax, xmm0 // return hash |
| 218 ret | 220 ret |
| 219 } | 221 } |
| 220 } | 222 } |
| 221 #endif // _MSC_VER >= 1700 | 223 #endif // _MSC_VER >= 1700 |
| 222 | 224 #endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) |
| 223 #endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) | |
| 224 | 225 |
| 225 #ifdef __cplusplus | 226 #ifdef __cplusplus |
| 226 } // extern "C" | 227 } // extern "C" |
| 227 } // namespace libyuv | 228 } // namespace libyuv |
| 228 #endif | 229 #endif |
| OLD | NEW |