OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 3358 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3369 #endif // HAS_MERGEUVROW_AVX2 | 3369 #endif // HAS_MERGEUVROW_AVX2 |
3370 | 3370 |
3371 #ifdef HAS_COPYROW_SSE2 | 3371 #ifdef HAS_COPYROW_SSE2 |
3372 // CopyRow copys 'count' bytes using a 16 byte load/store, 32 bytes at time. | 3372 // CopyRow copys 'count' bytes using a 16 byte load/store, 32 bytes at time. |
3373 __declspec(naked) | 3373 __declspec(naked) |
3374 void CopyRow_SSE2(const uint8* src, uint8* dst, int count) { | 3374 void CopyRow_SSE2(const uint8* src, uint8* dst, int count) { |
3375 __asm { | 3375 __asm { |
3376 mov eax, [esp + 4] // src | 3376 mov eax, [esp + 4] // src |
3377 mov edx, [esp + 8] // dst | 3377 mov edx, [esp + 8] // dst |
3378 mov ecx, [esp + 12] // count | 3378 mov ecx, [esp + 12] // count |
| 3379 test eax, 15 |
| 3380 jne convertloopu |
| 3381 test edx, 15 |
| 3382 jne convertloopu |
3379 | 3383 |
3380 convertloop: | 3384 convertloopa: |
| 3385 movdqa xmm0, [eax] |
| 3386 movdqa xmm1, [eax + 16] |
| 3387 lea eax, [eax + 32] |
| 3388 movdqa [edx], xmm0 |
| 3389 movdqa [edx + 16], xmm1 |
| 3390 lea edx, [edx + 32] |
| 3391 sub ecx, 32 |
| 3392 jg convertloopa |
| 3393 ret |
| 3394 |
| 3395 convertloopu: |
3381 movdqu xmm0, [eax] | 3396 movdqu xmm0, [eax] |
3382 movdqu xmm1, [eax + 16] | 3397 movdqu xmm1, [eax + 16] |
3383 lea eax, [eax + 32] | 3398 lea eax, [eax + 32] |
3384 movdqu [edx], xmm0 | 3399 movdqu [edx], xmm0 |
3385 movdqu [edx + 16], xmm1 | 3400 movdqu [edx + 16], xmm1 |
3386 lea edx, [edx + 32] | 3401 lea edx, [edx + 32] |
3387 sub ecx, 32 | 3402 sub ecx, 32 |
3388 jg convertloop | 3403 jg convertloopu |
3389 ret | 3404 ret |
3390 } | 3405 } |
3391 } | 3406 } |
3392 #endif // HAS_COPYROW_SSE2 | 3407 #endif // HAS_COPYROW_SSE2 |
3393 | 3408 |
3394 #ifdef HAS_COPYROW_AVX | 3409 #ifdef HAS_COPYROW_AVX |
3395 // CopyRow copys 'count' bytes using a 32 byte load/store, 64 bytes at time. | 3410 // CopyRow copys 'count' bytes using a 32 byte load/store, 64 bytes at time. |
3396 __declspec(naked) | 3411 __declspec(naked) |
3397 void CopyRow_AVX(const uint8* src, uint8* dst, int count) { | 3412 void CopyRow_AVX(const uint8* src, uint8* dst, int count) { |
3398 __asm { | 3413 __asm { |
(...skipping 2829 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6228 } | 6243 } |
6229 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 6244 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 |
6230 | 6245 |
6231 #endif // defined(_M_X64) | 6246 #endif // defined(_M_X64) |
6232 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64)) | 6247 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64)) |
6233 | 6248 |
6234 #ifdef __cplusplus | 6249 #ifdef __cplusplus |
6235 } // extern "C" | 6250 } // extern "C" |
6236 } // namespace libyuv | 6251 } // namespace libyuv |
6237 #endif | 6252 #endif |
OLD | NEW |