Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(137)

Side by Side Diff: source/row_win.cc

Issue 1455463002: test for unaligned vs aligned for CopyRow_SSE2 (Closed) Base URL: https://chromium.googlesource.com/libyuv/libyuv@master
Patch Set: Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/row_gcc.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 3358 matching lines...) Expand 10 before | Expand all | Expand 10 after
3369 #endif // HAS_MERGEUVROW_AVX2 3369 #endif // HAS_MERGEUVROW_AVX2
3370 3370
3371 #ifdef HAS_COPYROW_SSE2 3371 #ifdef HAS_COPYROW_SSE2
3372 // CopyRow copys 'count' bytes using a 16 byte load/store, 32 bytes at time. 3372 // CopyRow copys 'count' bytes using a 16 byte load/store, 32 bytes at time.
3373 __declspec(naked) 3373 __declspec(naked)
3374 void CopyRow_SSE2(const uint8* src, uint8* dst, int count) { 3374 void CopyRow_SSE2(const uint8* src, uint8* dst, int count) {
3375 __asm { 3375 __asm {
3376 mov eax, [esp + 4] // src 3376 mov eax, [esp + 4] // src
3377 mov edx, [esp + 8] // dst 3377 mov edx, [esp + 8] // dst
3378 mov ecx, [esp + 12] // count 3378 mov ecx, [esp + 12] // count
3379 test eax, 15
3380 jne convertloopu
3381 test edx, 15
3382 jne convertloopu
3379 3383
3380 convertloop: 3384 convertloopa:
3385 movdqa xmm0, [eax]
3386 movdqa xmm1, [eax + 16]
3387 lea eax, [eax + 32]
3388 movdqa [edx], xmm0
3389 movdqa [edx + 16], xmm1
3390 lea edx, [edx + 32]
3391 sub ecx, 32
3392 jg convertloopa
3393 ret
3394
3395 convertloopu:
3381 movdqu xmm0, [eax] 3396 movdqu xmm0, [eax]
3382 movdqu xmm1, [eax + 16] 3397 movdqu xmm1, [eax + 16]
3383 lea eax, [eax + 32] 3398 lea eax, [eax + 32]
3384 movdqu [edx], xmm0 3399 movdqu [edx], xmm0
3385 movdqu [edx + 16], xmm1 3400 movdqu [edx + 16], xmm1
3386 lea edx, [edx + 32] 3401 lea edx, [edx + 32]
3387 sub ecx, 32 3402 sub ecx, 32
3388 jg convertloop 3403 jg convertloopu
3389 ret 3404 ret
3390 } 3405 }
3391 } 3406 }
3392 #endif // HAS_COPYROW_SSE2 3407 #endif // HAS_COPYROW_SSE2
3393 3408
3394 #ifdef HAS_COPYROW_AVX 3409 #ifdef HAS_COPYROW_AVX
3395 // CopyRow copys 'count' bytes using a 32 byte load/store, 64 bytes at time. 3410 // CopyRow copys 'count' bytes using a 32 byte load/store, 64 bytes at time.
3396 __declspec(naked) 3411 __declspec(naked)
3397 void CopyRow_AVX(const uint8* src, uint8* dst, int count) { 3412 void CopyRow_AVX(const uint8* src, uint8* dst, int count) {
3398 __asm { 3413 __asm {
(...skipping 2829 matching lines...) Expand 10 before | Expand all | Expand 10 after
6228 } 6243 }
6229 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 6244 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3
6230 6245
6231 #endif // defined(_M_X64) 6246 #endif // defined(_M_X64)
6232 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64)) 6247 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64))
6233 6248
6234 #ifdef __cplusplus 6249 #ifdef __cplusplus
6235 } // extern "C" 6250 } // extern "C"
6236 } // namespace libyuv 6251 } // namespace libyuv
6237 #endif 6252 #endif
OLDNEW
« no previous file with comments | « source/row_gcc.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698