Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1)

Side by Side Diff: source/row_win.cc

Issue 1420033004: remove store bgra, abgr, raw unused macros. (Closed) Base URL: https://chromium.googlesource.com/libyuv/libyuv@master
Patch Set: Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/row_gcc.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after
77 #define STOREARGB \ 77 #define STOREARGB \
78 xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \ 78 xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \
79 xmm2 = _mm_unpacklo_epi8(xmm2, xmm5); \ 79 xmm2 = _mm_unpacklo_epi8(xmm2, xmm5); \
80 xmm1 = _mm_loadu_si128(&xmm0); \ 80 xmm1 = _mm_loadu_si128(&xmm0); \
81 xmm0 = _mm_unpacklo_epi16(xmm0, xmm2); \ 81 xmm0 = _mm_unpacklo_epi16(xmm0, xmm2); \
82 xmm1 = _mm_unpackhi_epi16(xmm1, xmm2); \ 82 xmm1 = _mm_unpackhi_epi16(xmm1, xmm2); \
83 _mm_storeu_si128((__m128i *)dst_argb, xmm0); \ 83 _mm_storeu_si128((__m128i *)dst_argb, xmm0); \
84 _mm_storeu_si128((__m128i *)(dst_argb + 16), xmm1); \ 84 _mm_storeu_si128((__m128i *)(dst_argb + 16), xmm1); \
85 dst_argb += 32; 85 dst_argb += 32;
86 86
87 // Store 8 ABGR values.
88 #define STOREABGR \
89 xmm2 = _mm_unpacklo_epi8(xmm2, xmm1); \
90 xmm0 = _mm_unpacklo_epi8(xmm0, xmm5); \
91 xmm1 = _mm_loadu_si128(&xmm2); \
92 xmm2 = _mm_unpacklo_epi16(xmm2, xmm0); \
93 xmm1 = _mm_unpackhi_epi16(xmm1, xmm0); \
94 _mm_storeu_si128((__m128i *)dst_abgr, xmm2); \
95 _mm_storeu_si128((__m128i *)(dst_abgr + 16), xmm1); \
96 dst_abgr += 32;
97
98 87
99 #if defined(HAS_I422TOARGBROW_SSSE3) 88 #if defined(HAS_I422TOARGBROW_SSSE3)
100 void I422ToARGBRow_SSSE3(const uint8* y_buf, 89 void I422ToARGBRow_SSSE3(const uint8* y_buf,
101 const uint8* u_buf, 90 const uint8* u_buf,
102 const uint8* v_buf, 91 const uint8* v_buf,
103 uint8* dst_argb, 92 uint8* dst_argb,
104 const struct YuvConstants* yuvconstants, 93 const struct YuvConstants* yuvconstants,
105 int width) { 94 int width) {
106 __m128i xmm0, xmm1, xmm2, xmm4; 95 __m128i xmm0, xmm1, xmm2, xmm4;
107 const __m128i xmm5 = _mm_set1_epi8(-1); 96 const __m128i xmm5 = _mm_set1_epi8(-1);
(...skipping 1910 matching lines...) Expand 10 before | Expand all | Expand 10 after
2018 __asm vpermq ymm0, ymm0, 0xd8 \ 2007 __asm vpermq ymm0, ymm0, 0xd8 \
2019 __asm vpunpcklbw ymm2, ymm2, ymm5 /* RA */ \ 2008 __asm vpunpcklbw ymm2, ymm2, ymm5 /* RA */ \
2020 __asm vpermq ymm2, ymm2, 0xd8 \ 2009 __asm vpermq ymm2, ymm2, 0xd8 \
2021 __asm vpunpcklwd ymm1, ymm0, ymm2 /* BGRA first 8 pixels */ \ 2010 __asm vpunpcklwd ymm1, ymm0, ymm2 /* BGRA first 8 pixels */ \
2022 __asm vpunpckhwd ymm0, ymm0, ymm2 /* BGRA next 8 pixels */ \ 2011 __asm vpunpckhwd ymm0, ymm0, ymm2 /* BGRA next 8 pixels */ \
2023 __asm vmovdqu 0[edx], ymm1 \ 2012 __asm vmovdqu 0[edx], ymm1 \
2024 __asm vmovdqu 32[edx], ymm0 \ 2013 __asm vmovdqu 32[edx], ymm0 \
2025 __asm lea edx, [edx + 64] \ 2014 __asm lea edx, [edx + 64] \
2026 } 2015 }
2027 2016
2028 // Store 16 ABGR values.
2029 #define STOREBGRA_AVX2 __asm { \
2030 __asm vpunpcklbw ymm1, ymm1, ymm0 /* GB */ \
2031 __asm vpermq ymm1, ymm1, 0xd8 \
2032 __asm vpunpcklbw ymm2, ymm5, ymm2 /* AR */ \
2033 __asm vpermq ymm2, ymm2, 0xd8 \
2034 __asm vpunpcklwd ymm0, ymm2, ymm1 /* ARGB first 8 pixels */ \
2035 __asm vpunpckhwd ymm2, ymm2, ymm1 /* ARGB next 8 pixels */ \
2036 __asm vmovdqu [edx], ymm0 \
2037 __asm vmovdqu [edx + 32], ymm2 \
2038 __asm lea edx, [edx + 64] \
2039 }
2040
2041 // Store 16 RGBA values. 2017 // Store 16 RGBA values.
2042 #define STORERGBA_AVX2 __asm { \ 2018 #define STORERGBA_AVX2 __asm { \
2043 __asm vpunpcklbw ymm1, ymm1, ymm2 /* GR */ \ 2019 __asm vpunpcklbw ymm1, ymm1, ymm2 /* GR */ \
2044 __asm vpermq ymm1, ymm1, 0xd8 \ 2020 __asm vpermq ymm1, ymm1, 0xd8 \
2045 __asm vpunpcklbw ymm2, ymm5, ymm0 /* AB */ \ 2021 __asm vpunpcklbw ymm2, ymm5, ymm0 /* AB */ \
2046 __asm vpermq ymm2, ymm2, 0xd8 \ 2022 __asm vpermq ymm2, ymm2, 0xd8 \
2047 __asm vpunpcklwd ymm0, ymm2, ymm1 /* ABGR first 8 pixels */ \ 2023 __asm vpunpcklwd ymm0, ymm2, ymm1 /* ABGR first 8 pixels */ \
2048 __asm vpunpckhwd ymm1, ymm2, ymm1 /* ABGR next 8 pixels */ \ 2024 __asm vpunpckhwd ymm1, ymm2, ymm1 /* ABGR next 8 pixels */ \
2049 __asm vmovdqu [edx], ymm0 \ 2025 __asm vmovdqu [edx], ymm0 \
2050 __asm vmovdqu [edx + 32], ymm1 \ 2026 __asm vmovdqu [edx + 32], ymm1 \
2051 __asm lea edx, [edx + 64] \ 2027 __asm lea edx, [edx + 64] \
2052 } 2028 }
2053 2029
2054 // Store 16 ABGR values.
2055 #define STOREABGR_AVX2 __asm { \
2056 __asm vpunpcklbw ymm1, ymm2, ymm1 /* RG */ \
2057 __asm vpermq ymm1, ymm1, 0xd8 \
2058 __asm vpunpcklbw ymm2, ymm0, ymm5 /* BA */ \
2059 __asm vpermq ymm2, ymm2, 0xd8 \
2060 __asm vpunpcklwd ymm0, ymm1, ymm2 /* RGBA first 8 pixels */ \
2061 __asm vpunpckhwd ymm1, ymm1, ymm2 /* RGBA next 8 pixels */ \
2062 __asm vmovdqu [edx], ymm0 \
2063 __asm vmovdqu [edx + 32], ymm1 \
2064 __asm lea edx, [edx + 64] \
2065 }
2066
2067 #ifdef HAS_I422TOARGBROW_AVX2 2030 #ifdef HAS_I422TOARGBROW_AVX2
2068 // 16 pixels 2031 // 16 pixels
2069 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). 2032 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
2070 __declspec(naked) 2033 __declspec(naked)
2071 void I422ToARGBRow_AVX2(const uint8* y_buf, 2034 void I422ToARGBRow_AVX2(const uint8* y_buf,
2072 const uint8* u_buf, 2035 const uint8* u_buf,
2073 const uint8* v_buf, 2036 const uint8* v_buf,
2074 uint8* dst_argb, 2037 uint8* dst_argb,
2075 const struct YuvConstants* yuvconstants, 2038 const struct YuvConstants* yuvconstants,
2076 int width) { 2039 int width) {
(...skipping 453 matching lines...) Expand 10 before | Expand all | Expand 10 after
2530 __asm punpcklbw xmm1, xmm0 /* GB */ \ 2493 __asm punpcklbw xmm1, xmm0 /* GB */ \
2531 __asm punpcklbw xmm5, xmm2 /* AR */ \ 2494 __asm punpcklbw xmm5, xmm2 /* AR */ \
2532 __asm movdqa xmm0, xmm5 \ 2495 __asm movdqa xmm0, xmm5 \
2533 __asm punpcklwd xmm5, xmm1 /* BGRA first 4 pixels */ \ 2496 __asm punpcklwd xmm5, xmm1 /* BGRA first 4 pixels */ \
2534 __asm punpckhwd xmm0, xmm1 /* BGRA next 4 pixels */ \ 2497 __asm punpckhwd xmm0, xmm1 /* BGRA next 4 pixels */ \
2535 __asm movdqu 0[edx], xmm5 \ 2498 __asm movdqu 0[edx], xmm5 \
2536 __asm movdqu 16[edx], xmm0 \ 2499 __asm movdqu 16[edx], xmm0 \
2537 __asm lea edx, [edx + 32] \ 2500 __asm lea edx, [edx + 32] \
2538 } 2501 }
2539 2502
2540 // Store 8 ABGR values.
2541 #define STOREABGR __asm { \
2542 __asm punpcklbw xmm2, xmm1 /* RG */ \
2543 __asm punpcklbw xmm0, xmm5 /* BA */ \
2544 __asm movdqa xmm1, xmm2 \
2545 __asm punpcklwd xmm2, xmm0 /* RGBA first 4 pixels */ \
2546 __asm punpckhwd xmm1, xmm0 /* RGBA next 4 pixels */ \
2547 __asm movdqu 0[edx], xmm2 \
2548 __asm movdqu 16[edx], xmm1 \
2549 __asm lea edx, [edx + 32] \
2550 }
2551
2552 // Store 8 RGBA values. 2503 // Store 8 RGBA values.
2553 #define STORERGBA __asm { \ 2504 #define STORERGBA __asm { \
2554 __asm pcmpeqb xmm5, xmm5 /* generate 0xffffffff for alpha */ \ 2505 __asm pcmpeqb xmm5, xmm5 /* generate 0xffffffff for alpha */ \
2555 __asm punpcklbw xmm1, xmm2 /* GR */ \ 2506 __asm punpcklbw xmm1, xmm2 /* GR */ \
2556 __asm punpcklbw xmm5, xmm0 /* AB */ \ 2507 __asm punpcklbw xmm5, xmm0 /* AB */ \
2557 __asm movdqa xmm0, xmm5 \ 2508 __asm movdqa xmm0, xmm5 \
2558 __asm punpcklwd xmm5, xmm1 /* RGBA first 4 pixels */ \ 2509 __asm punpcklwd xmm5, xmm1 /* RGBA first 4 pixels */ \
2559 __asm punpckhwd xmm0, xmm1 /* RGBA next 4 pixels */ \ 2510 __asm punpckhwd xmm0, xmm1 /* RGBA next 4 pixels */ \
2560 __asm movdqu 0[edx], xmm5 \ 2511 __asm movdqu 0[edx], xmm5 \
2561 __asm movdqu 16[edx], xmm0 \ 2512 __asm movdqu 16[edx], xmm0 \
(...skipping 10 matching lines...) Expand all
2572 __asm punpckhwd xmm1, xmm2 /* BGRR next 4 pixels */ \ 2523 __asm punpckhwd xmm1, xmm2 /* BGRR next 4 pixels */ \
2573 /* RRGB -> RGB24 */ \ 2524 /* RRGB -> RGB24 */ \
2574 __asm pshufb xmm0, xmm5 /* Pack first 8 and last 4 bytes. */ \ 2525 __asm pshufb xmm0, xmm5 /* Pack first 8 and last 4 bytes. */ \
2575 __asm pshufb xmm1, xmm6 /* Pack first 12 bytes. */ \ 2526 __asm pshufb xmm1, xmm6 /* Pack first 12 bytes. */ \
2576 __asm palignr xmm1, xmm0, 12 /* last 4 bytes of xmm0 + 12 xmm1 */ \ 2527 __asm palignr xmm1, xmm0, 12 /* last 4 bytes of xmm0 + 12 xmm1 */ \
2577 __asm movq qword ptr 0[edx], xmm0 /* First 8 bytes */ \ 2528 __asm movq qword ptr 0[edx], xmm0 /* First 8 bytes */ \
2578 __asm movdqu 8[edx], xmm1 /* Last 16 bytes */ \ 2529 __asm movdqu 8[edx], xmm1 /* Last 16 bytes */ \
2579 __asm lea edx, [edx + 24] \ 2530 __asm lea edx, [edx + 24] \
2580 } 2531 }
2581 2532
2582 // Store 8 RAW values.
2583 #define STORERAW __asm { \
2584 /* Weave into RRGB */ \
2585 __asm punpcklbw xmm0, xmm1 /* BG */ \
2586 __asm punpcklbw xmm2, xmm2 /* RR */ \
2587 __asm movdqa xmm1, xmm0 \
2588 __asm punpcklwd xmm0, xmm2 /* BGRR first 4 pixels */ \
2589 __asm punpckhwd xmm1, xmm2 /* BGRR next 4 pixels */ \
2590 /* Step 4: RRGB -> RAW */ \
2591 __asm pshufb xmm0, xmm5 /* Pack first 8 and last 4 bytes. */ \
2592 __asm pshufb xmm1, xmm6 /* Pack first 12 bytes. */ \
2593 __asm palignr xmm1, xmm0, 12 /* last 4 bytes of xmm0 + 12 xmm1 */ \
2594 __asm movq qword ptr 0[edx], xmm0 /* First 8 bytes */ \
2595 __asm movdqu 8[edx], xmm1 /* Last 16 bytes */ \
2596 __asm lea edx, [edx + 24] \
2597 }
2598
2599 // Store 8 RGB565 values. 2533 // Store 8 RGB565 values.
2600 #define STORERGB565 __asm { \ 2534 #define STORERGB565 __asm { \
2601 /* Weave into RRGB */ \ 2535 /* Weave into RRGB */ \
2602 __asm punpcklbw xmm0, xmm1 /* BG */ \ 2536 __asm punpcklbw xmm0, xmm1 /* BG */ \
2603 __asm punpcklbw xmm2, xmm2 /* RR */ \ 2537 __asm punpcklbw xmm2, xmm2 /* RR */ \
2604 __asm movdqa xmm1, xmm0 \ 2538 __asm movdqa xmm1, xmm0 \
2605 __asm punpcklwd xmm0, xmm2 /* BGRR first 4 pixels */ \ 2539 __asm punpcklwd xmm0, xmm2 /* BGRR first 4 pixels */ \
2606 __asm punpckhwd xmm1, xmm2 /* BGRR next 4 pixels */ \ 2540 __asm punpckhwd xmm1, xmm2 /* BGRR next 4 pixels */ \
2607 /* RRGB -> RGB565 */ \ 2541 /* RRGB -> RGB565 */ \
2608 __asm movdqa xmm3, xmm0 /* B first 4 pixels of argb */ \ 2542 __asm movdqa xmm3, xmm0 /* B first 4 pixels of argb */ \
(...skipping 3644 matching lines...) Expand 10 before | Expand all | Expand 10 after
6253 } 6187 }
6254 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 6188 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3
6255 6189
6256 #endif // defined(_M_X64) 6190 #endif // defined(_M_X64)
6257 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64)) 6191 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64))
6258 6192
6259 #ifdef __cplusplus 6193 #ifdef __cplusplus
6260 } // extern "C" 6194 } // extern "C"
6261 } // namespace libyuv 6195 } // namespace libyuv
6262 #endif 6196 #endif
OLDNEW
« no previous file with comments | « source/row_gcc.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698