Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(74)

Side by Side Diff: source/row_win.cc

Issue 1359183003: win64 version of I422AlphaToARGB (Closed) Base URL: https://chromium.googlesource.com/libyuv/libyuv@master
Patch Set: bump version Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/row_common.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 19 matching lines...) Expand all
30 30
31 // Read 4 UV from 422, upsample to 8 UV. 31 // Read 4 UV from 422, upsample to 8 UV.
32 #define READYUV422 \ 32 #define READYUV422 \
33 xmm0 = _mm_cvtsi32_si128(*(uint32*)u_buf); \ 33 xmm0 = _mm_cvtsi32_si128(*(uint32*)u_buf); \
34 xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset)); \ 34 xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset)); \
35 xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \ 35 xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \
36 xmm0 = _mm_unpacklo_epi16(xmm0, xmm0); \ 36 xmm0 = _mm_unpacklo_epi16(xmm0, xmm0); \
37 u_buf += 4; \ 37 u_buf += 4; \
38 xmm4 = _mm_loadl_epi64((__m128i*)y_buf); \ 38 xmm4 = _mm_loadl_epi64((__m128i*)y_buf); \
39 xmm4 = _mm_unpacklo_epi8(xmm4, xmm4); \ 39 xmm4 = _mm_unpacklo_epi8(xmm4, xmm4); \
40 y_buf += 8;
41
42 // Read 4 UV from 422, upsample to 8 UV. With 8 Alpha.
43 #define READYUVA422 \
44 xmm0 = _mm_cvtsi32_si128(*(uint32*)u_buf); \
45 xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset)); \
46 xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \
47 xmm0 = _mm_unpacklo_epi16(xmm0, xmm0); \
48 u_buf += 4; \
49 xmm4 = _mm_loadl_epi64((__m128i*)y_buf); \
50 xmm4 = _mm_unpacklo_epi8(xmm4, xmm4); \
40 y_buf += 8; \ 51 y_buf += 8; \
52 xmm5 = _mm_loadl_epi64((__m128i*)a_buf); \
53 a_buf += 8;
41 54
42 // Convert 8 pixels: 8 UV and 8 Y. 55 // Convert 8 pixels: 8 UV and 8 Y.
43 #define YUVTORGB(yuvconstants) \ 56 #define YUVTORGB(yuvconstants) \
44 xmm1 = _mm_loadu_si128(&xmm0); \ 57 xmm1 = _mm_loadu_si128(&xmm0); \
45 xmm2 = _mm_loadu_si128(&xmm0); \ 58 xmm2 = _mm_loadu_si128(&xmm0); \
46 xmm0 = _mm_maddubs_epi16(xmm0, *(__m128i*)yuvconstants->kUVToB); \ 59 xmm0 = _mm_maddubs_epi16(xmm0, *(__m128i*)yuvconstants->kUVToB); \
47 xmm1 = _mm_maddubs_epi16(xmm1, *(__m128i*)yuvconstants->kUVToG); \ 60 xmm1 = _mm_maddubs_epi16(xmm1, *(__m128i*)yuvconstants->kUVToG); \
48 xmm2 = _mm_maddubs_epi16(xmm2, *(__m128i*)yuvconstants->kUVToR); \ 61 xmm2 = _mm_maddubs_epi16(xmm2, *(__m128i*)yuvconstants->kUVToR); \
49 xmm0 = _mm_sub_epi16(*(__m128i*)yuvconstants->kUVBiasB, xmm0); \ 62 xmm0 = _mm_sub_epi16(*(__m128i*)yuvconstants->kUVBiasB, xmm0); \
50 xmm1 = _mm_sub_epi16(*(__m128i*)yuvconstants->kUVBiasG, xmm1); \ 63 xmm1 = _mm_sub_epi16(*(__m128i*)yuvconstants->kUVBiasG, xmm1); \
(...skipping 20 matching lines...) Expand all
71 _mm_storeu_si128((__m128i *)(dst_argb + 16), xmm1); \ 84 _mm_storeu_si128((__m128i *)(dst_argb + 16), xmm1); \
72 dst_argb += 32; 85 dst_argb += 32;
73 86
74 // Store 8 ABGR values. 87 // Store 8 ABGR values.
75 #define STOREABGR \ 88 #define STOREABGR \
76 xmm2 = _mm_unpacklo_epi8(xmm2, xmm1); \ 89 xmm2 = _mm_unpacklo_epi8(xmm2, xmm1); \
77 xmm0 = _mm_unpacklo_epi8(xmm0, xmm5); \ 90 xmm0 = _mm_unpacklo_epi8(xmm0, xmm5); \
78 xmm1 = _mm_loadu_si128(&xmm2); \ 91 xmm1 = _mm_loadu_si128(&xmm2); \
79 xmm2 = _mm_unpacklo_epi16(xmm2, xmm0); \ 92 xmm2 = _mm_unpacklo_epi16(xmm2, xmm0); \
80 xmm1 = _mm_unpackhi_epi16(xmm1, xmm0); \ 93 xmm1 = _mm_unpackhi_epi16(xmm1, xmm0); \
81 _mm_storeu_si128((__m128i *)dst_argb, xmm2); \ 94 _mm_storeu_si128((__m128i *)dst_abgr, xmm2); \
82 _mm_storeu_si128((__m128i *)(dst_argb + 16), xmm1); \ 95 _mm_storeu_si128((__m128i *)(dst_abgr + 16), xmm1); \
83 dst_argb += 32; 96 dst_abgr += 32;
84 97
85 98
86 #if defined(HAS_I422TOARGBROW_SSSE3) 99 #if defined(HAS_I422TOARGBROW_SSSE3)
87 void I422ToARGBRow_SSSE3(const uint8* y_buf, 100 void I422ToARGBRow_SSSE3(const uint8* y_buf,
88 const uint8* u_buf, 101 const uint8* u_buf,
89 const uint8* v_buf, 102 const uint8* v_buf,
90 uint8* dst_argb, 103 uint8* dst_argb,
91 struct YuvConstants* yuvconstants, 104 struct YuvConstants* yuvconstants,
92 int width) { 105 int width) {
93 __m128i xmm0, xmm1, xmm2, xmm4; 106 __m128i xmm0, xmm1, xmm2, xmm4;
94 const __m128i xmm5 = _mm_set1_epi8(-1); 107 const __m128i xmm5 = _mm_set1_epi8(-1);
95 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; 108 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf;
96 while (width > 0) { 109 while (width > 0) {
97 READYUV422 110 READYUV422
98 YUVTORGB(yuvconstants) 111 YUVTORGB(yuvconstants)
99 STOREARGB 112 STOREARGB
100 width -= 8; 113 width -= 8;
101 } 114 }
102 } 115 }
103 #endif 116 #endif
104 117
105 #if defined(HAS_I422TOABGRROW_SSSE3) 118 #if defined(HAS_I422TOABGRROW_SSSE3)
106 void I422ToABGRRow_SSSE3(const uint8* y_buf, 119 void I422ToABGRRow_SSSE3(const uint8* y_buf,
107 const uint8* u_buf, 120 const uint8* u_buf,
108 const uint8* v_buf, 121 const uint8* v_buf,
109 uint8* dst_argb, 122 uint8* dst_abgr,
110 struct YuvConstants* yuvconstants, 123 struct YuvConstants* yuvconstants,
111 int width) { 124 int width) {
112 __m128i xmm0, xmm1, xmm2, xmm4; 125 __m128i xmm0, xmm1, xmm2, xmm4;
113 const __m128i xmm5 = _mm_set1_epi8(-1); 126 const __m128i xmm5 = _mm_set1_epi8(-1);
114 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; 127 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf;
115 while (width > 0) { 128 while (width > 0) {
116 READYUV422 129 READYUV422
117 YUVTORGB(yuvconstants) 130 YUVTORGB(yuvconstants)
118 STOREABGR 131 STOREABGR
119 width -= 8; 132 width -= 8;
120 } 133 }
121 } 134 }
122 #endif 135 #endif
136
137 #if defined(HAS_I422ALPHATOARGBROW_SSSE3)
138 void I422AlphaToARGBRow_SSSE3(const uint8* y_buf,
139 const uint8* u_buf,
140 const uint8* v_buf,
141 const uint8* a_buf,
142 uint8* dst_argb,
143 struct YuvConstants* yuvconstants,
144 int width) {
145 __m128i xmm0, xmm1, xmm2, xmm4, xmm5;
146 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf;
147 while (width > 0) {
148 READYUVA422
149 YUVTORGB(yuvconstants)
150 STOREARGB
151 width -= 8;
152 }
153 }
154 #endif
155
156 #if defined(HAS_I422ALPHATOABGRROW_SSSE3)
157 void I422AlphaToABGRRow_SSSE3(const uint8* y_buf,
158 const uint8* u_buf,
159 const uint8* v_buf,
160 const uint8* a_buf,
161 uint8* dst_abgr,
162 struct YuvConstants* yuvconstants,
163 int width) {
164 __m128i xmm0, xmm1, xmm2, xmm4, xmm5;
165 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf;
166 while (width > 0) {
167 READYUVA422
168 YUVTORGB(yuvconstants)
169 STOREABGR
170 width -= 8;
171 }
172 }
173 #endif
174
123 // 32 bit 175 // 32 bit
124 #else // defined(_M_X64) 176 #else // defined(_M_X64)
125 #ifdef HAS_ARGBTOYROW_SSSE3 177 #ifdef HAS_ARGBTOYROW_SSSE3
126 178
127 // Constants for ARGB. 179 // Constants for ARGB.
128 static const vec8 kARGBToY = { 180 static const vec8 kARGBToY = {
129 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0 181 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0
130 }; 182 };
131 183
132 // JPeg full range. 184 // JPeg full range.
(...skipping 6519 matching lines...) Expand 10 before | Expand all | Expand 10 after
6652 } 6704 }
6653 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 6705 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3
6654 6706
6655 #endif // defined(_M_X64) 6707 #endif // defined(_M_X64)
6656 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64)) 6708 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64))
6657 6709
6658 #ifdef __cplusplus 6710 #ifdef __cplusplus
6659 } // extern "C" 6711 } // extern "C"
6660 } // namespace libyuv 6712 } // namespace libyuv
6661 #endif 6713 #endif
OLDNEW
« no previous file with comments | « source/row_common.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698