OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 19 matching lines...) Expand all Loading... |
30 | 30 |
31 // Read 4 UV from 422, upsample to 8 UV. | 31 // Read 4 UV from 422, upsample to 8 UV. |
32 #define READYUV422 \ | 32 #define READYUV422 \ |
33 xmm0 = _mm_cvtsi32_si128(*(uint32*)u_buf); \ | 33 xmm0 = _mm_cvtsi32_si128(*(uint32*)u_buf); \ |
34 xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset)); \ | 34 xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset)); \ |
35 xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \ | 35 xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \ |
36 xmm0 = _mm_unpacklo_epi16(xmm0, xmm0); \ | 36 xmm0 = _mm_unpacklo_epi16(xmm0, xmm0); \ |
37 u_buf += 4; \ | 37 u_buf += 4; \ |
38 xmm4 = _mm_loadl_epi64((__m128i*)y_buf); \ | 38 xmm4 = _mm_loadl_epi64((__m128i*)y_buf); \ |
39 xmm4 = _mm_unpacklo_epi8(xmm4, xmm4); \ | 39 xmm4 = _mm_unpacklo_epi8(xmm4, xmm4); \ |
| 40 y_buf += 8; |
| 41 |
| 42 // Read 4 UV from 422, upsample to 8 UV. With 8 Alpha. |
| 43 #define READYUVA422 \ |
| 44 xmm0 = _mm_cvtsi32_si128(*(uint32*)u_buf); \ |
| 45 xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset)); \ |
| 46 xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \ |
| 47 xmm0 = _mm_unpacklo_epi16(xmm0, xmm0); \ |
| 48 u_buf += 4; \ |
| 49 xmm4 = _mm_loadl_epi64((__m128i*)y_buf); \ |
| 50 xmm4 = _mm_unpacklo_epi8(xmm4, xmm4); \ |
40 y_buf += 8; \ | 51 y_buf += 8; \ |
| 52 xmm5 = _mm_loadl_epi64((__m128i*)a_buf); \ |
| 53 a_buf += 8; |
41 | 54 |
42 // Convert 8 pixels: 8 UV and 8 Y. | 55 // Convert 8 pixels: 8 UV and 8 Y. |
43 #define YUVTORGB(yuvconstants) \ | 56 #define YUVTORGB(yuvconstants) \ |
44 xmm1 = _mm_loadu_si128(&xmm0); \ | 57 xmm1 = _mm_loadu_si128(&xmm0); \ |
45 xmm2 = _mm_loadu_si128(&xmm0); \ | 58 xmm2 = _mm_loadu_si128(&xmm0); \ |
46 xmm0 = _mm_maddubs_epi16(xmm0, *(__m128i*)yuvconstants->kUVToB); \ | 59 xmm0 = _mm_maddubs_epi16(xmm0, *(__m128i*)yuvconstants->kUVToB); \ |
47 xmm1 = _mm_maddubs_epi16(xmm1, *(__m128i*)yuvconstants->kUVToG); \ | 60 xmm1 = _mm_maddubs_epi16(xmm1, *(__m128i*)yuvconstants->kUVToG); \ |
48 xmm2 = _mm_maddubs_epi16(xmm2, *(__m128i*)yuvconstants->kUVToR); \ | 61 xmm2 = _mm_maddubs_epi16(xmm2, *(__m128i*)yuvconstants->kUVToR); \ |
49 xmm0 = _mm_sub_epi16(*(__m128i*)yuvconstants->kUVBiasB, xmm0); \ | 62 xmm0 = _mm_sub_epi16(*(__m128i*)yuvconstants->kUVBiasB, xmm0); \ |
50 xmm1 = _mm_sub_epi16(*(__m128i*)yuvconstants->kUVBiasG, xmm1); \ | 63 xmm1 = _mm_sub_epi16(*(__m128i*)yuvconstants->kUVBiasG, xmm1); \ |
(...skipping 20 matching lines...) Expand all Loading... |
71 _mm_storeu_si128((__m128i *)(dst_argb + 16), xmm1); \ | 84 _mm_storeu_si128((__m128i *)(dst_argb + 16), xmm1); \ |
72 dst_argb += 32; | 85 dst_argb += 32; |
73 | 86 |
74 // Store 8 ABGR values. | 87 // Store 8 ABGR values. |
75 #define STOREABGR \ | 88 #define STOREABGR \ |
76 xmm2 = _mm_unpacklo_epi8(xmm2, xmm1); \ | 89 xmm2 = _mm_unpacklo_epi8(xmm2, xmm1); \ |
77 xmm0 = _mm_unpacklo_epi8(xmm0, xmm5); \ | 90 xmm0 = _mm_unpacklo_epi8(xmm0, xmm5); \ |
78 xmm1 = _mm_loadu_si128(&xmm2); \ | 91 xmm1 = _mm_loadu_si128(&xmm2); \ |
79 xmm2 = _mm_unpacklo_epi16(xmm2, xmm0); \ | 92 xmm2 = _mm_unpacklo_epi16(xmm2, xmm0); \ |
80 xmm1 = _mm_unpackhi_epi16(xmm1, xmm0); \ | 93 xmm1 = _mm_unpackhi_epi16(xmm1, xmm0); \ |
81 _mm_storeu_si128((__m128i *)dst_argb, xmm2); \ | 94 _mm_storeu_si128((__m128i *)dst_abgr, xmm2); \ |
82 _mm_storeu_si128((__m128i *)(dst_argb + 16), xmm1); \ | 95 _mm_storeu_si128((__m128i *)(dst_abgr + 16), xmm1); \ |
83 dst_argb += 32; | 96 dst_abgr += 32; |
84 | 97 |
85 | 98 |
86 #if defined(HAS_I422TOARGBROW_SSSE3) | 99 #if defined(HAS_I422TOARGBROW_SSSE3) |
87 void I422ToARGBRow_SSSE3(const uint8* y_buf, | 100 void I422ToARGBRow_SSSE3(const uint8* y_buf, |
88 const uint8* u_buf, | 101 const uint8* u_buf, |
89 const uint8* v_buf, | 102 const uint8* v_buf, |
90 uint8* dst_argb, | 103 uint8* dst_argb, |
91 struct YuvConstants* yuvconstants, | 104 struct YuvConstants* yuvconstants, |
92 int width) { | 105 int width) { |
93 __m128i xmm0, xmm1, xmm2, xmm4; | 106 __m128i xmm0, xmm1, xmm2, xmm4; |
94 const __m128i xmm5 = _mm_set1_epi8(-1); | 107 const __m128i xmm5 = _mm_set1_epi8(-1); |
95 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; | 108 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; |
96 while (width > 0) { | 109 while (width > 0) { |
97 READYUV422 | 110 READYUV422 |
98 YUVTORGB(yuvconstants) | 111 YUVTORGB(yuvconstants) |
99 STOREARGB | 112 STOREARGB |
100 width -= 8; | 113 width -= 8; |
101 } | 114 } |
102 } | 115 } |
103 #endif | 116 #endif |
104 | 117 |
105 #if defined(HAS_I422TOABGRROW_SSSE3) | 118 #if defined(HAS_I422TOABGRROW_SSSE3) |
106 void I422ToABGRRow_SSSE3(const uint8* y_buf, | 119 void I422ToABGRRow_SSSE3(const uint8* y_buf, |
107 const uint8* u_buf, | 120 const uint8* u_buf, |
108 const uint8* v_buf, | 121 const uint8* v_buf, |
109 uint8* dst_argb, | 122 uint8* dst_abgr, |
110 struct YuvConstants* yuvconstants, | 123 struct YuvConstants* yuvconstants, |
111 int width) { | 124 int width) { |
112 __m128i xmm0, xmm1, xmm2, xmm4; | 125 __m128i xmm0, xmm1, xmm2, xmm4; |
113 const __m128i xmm5 = _mm_set1_epi8(-1); | 126 const __m128i xmm5 = _mm_set1_epi8(-1); |
114 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; | 127 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; |
115 while (width > 0) { | 128 while (width > 0) { |
116 READYUV422 | 129 READYUV422 |
117 YUVTORGB(yuvconstants) | 130 YUVTORGB(yuvconstants) |
118 STOREABGR | 131 STOREABGR |
119 width -= 8; | 132 width -= 8; |
120 } | 133 } |
121 } | 134 } |
122 #endif | 135 #endif |
| 136 |
| 137 #if defined(HAS_I422ALPHATOARGBROW_SSSE3) |
| 138 void I422AlphaToARGBRow_SSSE3(const uint8* y_buf, |
| 139 const uint8* u_buf, |
| 140 const uint8* v_buf, |
| 141 const uint8* a_buf, |
| 142 uint8* dst_argb, |
| 143 struct YuvConstants* yuvconstants, |
| 144 int width) { |
| 145 __m128i xmm0, xmm1, xmm2, xmm4, xmm5; |
| 146 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; |
| 147 while (width > 0) { |
| 148 READYUVA422 |
| 149 YUVTORGB(yuvconstants) |
| 150 STOREARGB |
| 151 width -= 8; |
| 152 } |
| 153 } |
| 154 #endif |
| 155 |
| 156 #if defined(HAS_I422ALPHATOABGRROW_SSSE3) |
| 157 void I422AlphaToABGRRow_SSSE3(const uint8* y_buf, |
| 158 const uint8* u_buf, |
| 159 const uint8* v_buf, |
| 160 const uint8* a_buf, |
| 161 uint8* dst_abgr, |
| 162 struct YuvConstants* yuvconstants, |
| 163 int width) { |
| 164 __m128i xmm0, xmm1, xmm2, xmm4, xmm5; |
| 165 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; |
| 166 while (width > 0) { |
| 167 READYUVA422 |
| 168 YUVTORGB(yuvconstants) |
| 169 STOREABGR |
| 170 width -= 8; |
| 171 } |
| 172 } |
| 173 #endif |
| 174 |
123 // 32 bit | 175 // 32 bit |
124 #else // defined(_M_X64) | 176 #else // defined(_M_X64) |
125 #ifdef HAS_ARGBTOYROW_SSSE3 | 177 #ifdef HAS_ARGBTOYROW_SSSE3 |
126 | 178 |
127 // Constants for ARGB. | 179 // Constants for ARGB. |
128 static const vec8 kARGBToY = { | 180 static const vec8 kARGBToY = { |
129 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0 | 181 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0 |
130 }; | 182 }; |
131 | 183 |
132 // JPeg full range. | 184 // JPeg full range. |
(...skipping 6519 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6652 } | 6704 } |
6653 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 6705 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 |
6654 | 6706 |
6655 #endif // defined(_M_X64) | 6707 #endif // defined(_M_X64) |
6656 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64)) | 6708 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64)) |
6657 | 6709 |
6658 #ifdef __cplusplus | 6710 #ifdef __cplusplus |
6659 } // extern "C" | 6711 } // extern "C" |
6660 } // namespace libyuv | 6712 } // namespace libyuv |
6661 #endif | 6713 #endif |
OLD | NEW |