Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(118)

Side by Side Diff: media/base/simd/convert_rgb_to_yuv_sse2.cc

Issue 1542013004: Switch to standard integer types in media/, take 2. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: more stddef Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « media/base/simd/convert_rgb_to_yuv_c.cc ('k') | media/base/simd/convert_rgb_to_yuv_ssse3.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include <stdint.h>
6
5 #include "build/build_config.h" 7 #include "build/build_config.h"
6 #include "media/base/simd/convert_rgb_to_yuv.h" 8 #include "media/base/simd/convert_rgb_to_yuv.h"
7 9
8 #if defined(COMPILER_MSVC) 10 #if defined(COMPILER_MSVC)
9 #include <intrin.h> 11 #include <intrin.h>
10 #else 12 #else
11 #include <mmintrin.h> 13 #include <mmintrin.h>
12 #include <emmintrin.h> 14 #include <emmintrin.h>
13 #endif 15 #endif
14 16
15 #if defined(COMPILER_MSVC) 17 #if defined(COMPILER_MSVC)
16 #define SIMD_ALIGNED(var) __declspec(align(16)) var 18 #define SIMD_ALIGNED(var) __declspec(align(16)) var
17 #else 19 #else
18 #define SIMD_ALIGNED(var) var __attribute__((aligned(16))) 20 #define SIMD_ALIGNED(var) var __attribute__((aligned(16)))
19 #endif 21 #endif
20 22
21 namespace media { 23 namespace media {
22 24
23 #define FIX_SHIFT 12 25 #define FIX_SHIFT 12
24 #define FIX(x) ((x) * (1 << FIX_SHIFT)) 26 #define FIX(x) ((x) * (1 << FIX_SHIFT))
25 27
26 // Define a convenient macro to do static cast. 28 // Define a convenient macro to do static cast.
27 #define INT16_FIX(x) static_cast<int16>(FIX(x)) 29 #define INT16_FIX(x) static_cast<int16_t>(FIX(x))
28 30
29 // Android's pixel layout is RGBA, while other platforms 31 // Android's pixel layout is RGBA, while other platforms
30 // are BGRA. 32 // are BGRA.
31 #if defined(OS_ANDROID) 33 #if defined(OS_ANDROID)
32 SIMD_ALIGNED(const int16 ConvertRGBAToYUV_kTable[8 * 3]) = { 34 SIMD_ALIGNED(const int16_t ConvertRGBAToYUV_kTable[8 * 3]) = {
33 INT16_FIX(0.257), INT16_FIX(0.504), INT16_FIX(0.098), 0, 35 INT16_FIX(0.257), INT16_FIX(0.504), INT16_FIX(0.098), 0,
34 INT16_FIX(0.257), INT16_FIX(0.504), INT16_FIX(0.098), 0, 36 INT16_FIX(0.257), INT16_FIX(0.504), INT16_FIX(0.098), 0,
35 -INT16_FIX(0.148), -INT16_FIX(0.291), INT16_FIX(0.439), 0, 37 -INT16_FIX(0.148), -INT16_FIX(0.291), INT16_FIX(0.439), 0,
36 -INT16_FIX(0.148), -INT16_FIX(0.291), INT16_FIX(0.439), 0, 38 -INT16_FIX(0.148), -INT16_FIX(0.291), INT16_FIX(0.439), 0,
37 INT16_FIX(0.439), -INT16_FIX(0.368), -INT16_FIX(0.071), 0, 39 INT16_FIX(0.439), -INT16_FIX(0.368), -INT16_FIX(0.071), 0,
38 INT16_FIX(0.439), -INT16_FIX(0.368), -INT16_FIX(0.071), 0, 40 INT16_FIX(0.439), -INT16_FIX(0.368), -INT16_FIX(0.071), 0,
39 }; 41 };
40 #else 42 #else
41 SIMD_ALIGNED(const int16 ConvertRGBAToYUV_kTable[8 * 3]) = { 43 SIMD_ALIGNED(const int16_t ConvertRGBAToYUV_kTable[8 * 3]) = {
42 INT16_FIX(0.098), INT16_FIX(0.504), INT16_FIX(0.257), 0, 44 INT16_FIX(0.098), INT16_FIX(0.504), INT16_FIX(0.257), 0,
43 INT16_FIX(0.098), INT16_FIX(0.504), INT16_FIX(0.257), 0, 45 INT16_FIX(0.098), INT16_FIX(0.504), INT16_FIX(0.257), 0,
44 INT16_FIX(0.439), -INT16_FIX(0.291), -INT16_FIX(0.148), 0, 46 INT16_FIX(0.439), -INT16_FIX(0.291), -INT16_FIX(0.148), 0,
45 INT16_FIX(0.439), -INT16_FIX(0.291), -INT16_FIX(0.148), 0, 47 INT16_FIX(0.439), -INT16_FIX(0.291), -INT16_FIX(0.148), 0,
46 -INT16_FIX(0.071), -INT16_FIX(0.368), INT16_FIX(0.439), 0, 48 -INT16_FIX(0.071), -INT16_FIX(0.368), INT16_FIX(0.439), 0,
47 -INT16_FIX(0.071), -INT16_FIX(0.368), INT16_FIX(0.439), 0, 49 -INT16_FIX(0.071), -INT16_FIX(0.368), INT16_FIX(0.439), 0,
48 }; 50 };
49 #endif 51 #endif
50 52
51 #undef INT16_FIX 53 #undef INT16_FIX
52 54
53 // This is the final offset for the conversion from signed yuv values to 55 // This is the final offset for the conversion from signed yuv values to
54 // unsigned values. It is arranged so that offset of 16 is applied to Y 56 // unsigned values. It is arranged so that offset of 16 is applied to Y
55 // components and 128 is added to UV components for 2 pixels. 57 // components and 128 is added to UV components for 2 pixels.
56 SIMD_ALIGNED(const int32 kYOffset[4]) = {16, 16, 16, 16}; 58 SIMD_ALIGNED(const int32_t kYOffset[4]) = {16, 16, 16, 16};
57 59
58 static inline uint8 Clamp(int value) { 60 static inline uint8_t Clamp(int value) {
59 if (value < 0) 61 if (value < 0)
60 return 0; 62 return 0;
61 if (value > 255) 63 if (value > 255)
62 return 255; 64 return 255;
63 return static_cast<uint8>(value); 65 return static_cast<uint8_t>(value);
64 } 66 }
65 67
66 static inline uint8 RGBToY(int r, int g, int b) { 68 static inline uint8_t RGBToY(int r, int g, int b) {
67 int y = ConvertRGBAToYUV_kTable[0] * b + 69 int y = ConvertRGBAToYUV_kTable[0] * b +
68 ConvertRGBAToYUV_kTable[1] * g + 70 ConvertRGBAToYUV_kTable[1] * g +
69 ConvertRGBAToYUV_kTable[2] * r; 71 ConvertRGBAToYUV_kTable[2] * r;
70 y >>= FIX_SHIFT; 72 y >>= FIX_SHIFT;
71 return Clamp(y + 16); 73 return Clamp(y + 16);
72 } 74 }
73 75
74 static inline uint8 RGBToU(int r, int g, int b, int shift) { 76 static inline uint8_t RGBToU(int r, int g, int b, int shift) {
75 int u = ConvertRGBAToYUV_kTable[8] * b + 77 int u = ConvertRGBAToYUV_kTable[8] * b +
76 ConvertRGBAToYUV_kTable[9] * g + 78 ConvertRGBAToYUV_kTable[9] * g +
77 ConvertRGBAToYUV_kTable[10] * r; 79 ConvertRGBAToYUV_kTable[10] * r;
78 u >>= FIX_SHIFT + shift; 80 u >>= FIX_SHIFT + shift;
79 return Clamp(u + 128); 81 return Clamp(u + 128);
80 } 82 }
81 83
82 static inline uint8 RGBToV(int r, int g, int b, int shift) { 84 static inline uint8_t RGBToV(int r, int g, int b, int shift) {
83 int v = ConvertRGBAToYUV_kTable[16] * b + 85 int v = ConvertRGBAToYUV_kTable[16] * b +
84 ConvertRGBAToYUV_kTable[17] * g + 86 ConvertRGBAToYUV_kTable[17] * g +
85 ConvertRGBAToYUV_kTable[18] * r; 87 ConvertRGBAToYUV_kTable[18] * r;
86 v >>= FIX_SHIFT + shift; 88 v >>= FIX_SHIFT + shift;
87 return Clamp(v + 128); 89 return Clamp(v + 128);
88 } 90 }
89 91
90 #define CONVERT_Y(rgb_buf, y_buf) \ 92 #define CONVERT_Y(rgb_buf, y_buf) \
91 b = *rgb_buf++; \ 93 b = *rgb_buf++; \
92 g = *rgb_buf++; \ 94 g = *rgb_buf++; \
93 r = *rgb_buf++; \ 95 r = *rgb_buf++; \
94 ++rgb_buf; \ 96 ++rgb_buf; \
95 sum_b += b; \ 97 sum_b += b; \
96 sum_g += g; \ 98 sum_g += g; \
97 sum_r += r; \ 99 sum_r += r; \
98 *y_buf++ = RGBToY(r, g, b); 100 *y_buf++ = RGBToY(r, g, b);
99 101
100 static inline void ConvertRGBToYUV_V2H2(const uint8* rgb_buf_1, 102 static inline void ConvertRGBToYUV_V2H2(const uint8_t* rgb_buf_1,
101 const uint8* rgb_buf_2, 103 const uint8_t* rgb_buf_2,
102 uint8* y_buf_1, 104 uint8_t* y_buf_1,
103 uint8* y_buf_2, 105 uint8_t* y_buf_2,
104 uint8* u_buf, 106 uint8_t* u_buf,
105 uint8* v_buf) { 107 uint8_t* v_buf) {
106 int sum_b = 0; 108 int sum_b = 0;
107 int sum_g = 0; 109 int sum_g = 0;
108 int sum_r = 0; 110 int sum_r = 0;
109 int r, g, b; 111 int r, g, b;
110 112
111 113
112 114
113 CONVERT_Y(rgb_buf_1, y_buf_1); 115 CONVERT_Y(rgb_buf_1, y_buf_1);
114 CONVERT_Y(rgb_buf_1, y_buf_1); 116 CONVERT_Y(rgb_buf_1, y_buf_1);
115 CONVERT_Y(rgb_buf_2, y_buf_2); 117 CONVERT_Y(rgb_buf_2, y_buf_2);
116 CONVERT_Y(rgb_buf_2, y_buf_2); 118 CONVERT_Y(rgb_buf_2, y_buf_2);
117 *u_buf++ = RGBToU(sum_r, sum_g, sum_b, 2); 119 *u_buf++ = RGBToU(sum_r, sum_g, sum_b, 2);
118 *v_buf++ = RGBToV(sum_r, sum_g, sum_b, 2); 120 *v_buf++ = RGBToV(sum_r, sum_g, sum_b, 2);
119 } 121 }
120 122
121 static inline void ConvertRGBToYUV_V2H1(const uint8* rgb_buf_1, 123 static inline void ConvertRGBToYUV_V2H1(const uint8_t* rgb_buf_1,
122 const uint8* rgb_buf_2, 124 const uint8_t* rgb_buf_2,
123 uint8* y_buf_1, 125 uint8_t* y_buf_1,
124 uint8* y_buf_2, 126 uint8_t* y_buf_2,
125 uint8* u_buf, 127 uint8_t* u_buf,
126 uint8* v_buf) { 128 uint8_t* v_buf) {
127 int sum_b = 0; 129 int sum_b = 0;
128 int sum_g = 0; 130 int sum_g = 0;
129 int sum_r = 0; 131 int sum_r = 0;
130 int r, g, b; 132 int r, g, b;
131 133
132 CONVERT_Y(rgb_buf_1, y_buf_1); 134 CONVERT_Y(rgb_buf_1, y_buf_1);
133 CONVERT_Y(rgb_buf_2, y_buf_2); 135 CONVERT_Y(rgb_buf_2, y_buf_2);
134 *u_buf++ = RGBToU(sum_r, sum_g, sum_b, 1); 136 *u_buf++ = RGBToU(sum_r, sum_g, sum_b, 1);
135 *v_buf++ = RGBToV(sum_r, sum_g, sum_b, 1); 137 *v_buf++ = RGBToV(sum_r, sum_g, sum_b, 1);
136 } 138 }
137 139
138 static inline void ConvertRGBToYUV_V1H2(const uint8* rgb_buf, 140 static inline void ConvertRGBToYUV_V1H2(const uint8_t* rgb_buf,
139 uint8* y_buf, 141 uint8_t* y_buf,
140 uint8* u_buf, 142 uint8_t* u_buf,
141 uint8* v_buf) { 143 uint8_t* v_buf) {
142 int sum_b = 0; 144 int sum_b = 0;
143 int sum_g = 0; 145 int sum_g = 0;
144 int sum_r = 0; 146 int sum_r = 0;
145 int r, g, b; 147 int r, g, b;
146 148
147 CONVERT_Y(rgb_buf, y_buf); 149 CONVERT_Y(rgb_buf, y_buf);
148 CONVERT_Y(rgb_buf, y_buf); 150 CONVERT_Y(rgb_buf, y_buf);
149 *u_buf++ = RGBToU(sum_r, sum_g, sum_b, 1); 151 *u_buf++ = RGBToU(sum_r, sum_g, sum_b, 1);
150 *v_buf++ = RGBToV(sum_r, sum_g, sum_b, 1); 152 *v_buf++ = RGBToV(sum_r, sum_g, sum_b, 1);
151 } 153 }
152 154
153 static inline void ConvertRGBToYUV_V1H1(const uint8* rgb_buf, 155 static inline void ConvertRGBToYUV_V1H1(const uint8_t* rgb_buf,
154 uint8* y_buf, 156 uint8_t* y_buf,
155 uint8* u_buf, 157 uint8_t* u_buf,
156 uint8* v_buf) { 158 uint8_t* v_buf) {
157 int sum_b = 0; 159 int sum_b = 0;
158 int sum_g = 0; 160 int sum_g = 0;
159 int sum_r = 0; 161 int sum_r = 0;
160 int r, g, b; 162 int r, g, b;
161 163
162 CONVERT_Y(rgb_buf, y_buf); 164 CONVERT_Y(rgb_buf, y_buf);
163 *u_buf++ = RGBToU(r, g, b, 0); 165 *u_buf++ = RGBToU(r, g, b, 0);
164 *v_buf++ = RGBToV(r, g, b, 0); 166 *v_buf++ = RGBToV(r, g, b, 0);
165 } 167 }
166 168
167 static void ConvertRGB32ToYUVRow_SSE2(const uint8* rgb_buf_1, 169 static void ConvertRGB32ToYUVRow_SSE2(const uint8_t* rgb_buf_1,
168 const uint8* rgb_buf_2, 170 const uint8_t* rgb_buf_2,
169 uint8* y_buf_1, 171 uint8_t* y_buf_1,
170 uint8* y_buf_2, 172 uint8_t* y_buf_2,
171 uint8* u_buf, 173 uint8_t* u_buf,
172 uint8* v_buf, 174 uint8_t* v_buf,
173 int width) { 175 int width) {
174 while (width >= 4) { 176 while (width >= 4) {
175 // Name for the Y pixels: 177 // Name for the Y pixels:
176 // Row 1: a b c d 178 // Row 1: a b c d
177 // Row 2: e f g h 179 // Row 2: e f g h
178 // 180 //
179 // First row 4 pixels. 181 // First row 4 pixels.
180 __m128i rgb_row_1 = _mm_loadu_si128( 182 __m128i rgb_row_1 = _mm_loadu_si128(
181 reinterpret_cast<const __m128i*>(rgb_buf_1)); 183 reinterpret_cast<const __m128i*>(rgb_buf_1));
182 __m128i zero_1 = _mm_xor_si128(rgb_row_1, rgb_row_1); 184 __m128i zero_1 = _mm_xor_si128(rgb_row_1, rgb_row_1);
(...skipping 23 matching lines...) Expand all
206 (2 << 6) | (2 << 2))); 208 (2 << 6) | (2 << 2)));
207 __m128i y_abcd = _mm_add_epi32(bg_abcd, r_abcd); 209 __m128i y_abcd = _mm_add_epi32(bg_abcd, r_abcd);
208 210
209 // Down shift back to 8bits range. 211 // Down shift back to 8bits range.
210 __m128i y_offset = _mm_load_si128( 212 __m128i y_offset = _mm_load_si128(
211 reinterpret_cast<const __m128i*>(kYOffset)); 213 reinterpret_cast<const __m128i*>(kYOffset));
212 y_abcd = _mm_srai_epi32(y_abcd, FIX_SHIFT); 214 y_abcd = _mm_srai_epi32(y_abcd, FIX_SHIFT);
213 y_abcd = _mm_add_epi32(y_abcd, y_offset); 215 y_abcd = _mm_add_epi32(y_abcd, y_offset);
214 y_abcd = _mm_packs_epi32(y_abcd, y_abcd); 216 y_abcd = _mm_packs_epi32(y_abcd, y_abcd);
215 y_abcd = _mm_packus_epi16(y_abcd, y_abcd); 217 y_abcd = _mm_packus_epi16(y_abcd, y_abcd);
216 *reinterpret_cast<uint32*>(y_buf_1) = _mm_cvtsi128_si32(y_abcd); 218 *reinterpret_cast<uint32_t*>(y_buf_1) = _mm_cvtsi128_si32(y_abcd);
217 y_buf_1 += 4; 219 y_buf_1 += 4;
218 220
219 // Second row 4 pixels. 221 // Second row 4 pixels.
220 __m128i rgb_row_2 = _mm_loadu_si128( 222 __m128i rgb_row_2 = _mm_loadu_si128(
221 reinterpret_cast<const __m128i*>(rgb_buf_2)); 223 reinterpret_cast<const __m128i*>(rgb_buf_2));
222 __m128i zero_2 = _mm_xor_si128(rgb_row_2, rgb_row_2); 224 __m128i zero_2 = _mm_xor_si128(rgb_row_2, rgb_row_2);
223 __m128i rgb_e_f = _mm_unpackhi_epi8(rgb_row_2, zero_2); 225 __m128i rgb_e_f = _mm_unpackhi_epi8(rgb_row_2, zero_2);
224 __m128i rgb_g_h = _mm_unpacklo_epi8(rgb_row_2, zero_2); 226 __m128i rgb_g_h = _mm_unpacklo_epi8(rgb_row_2, zero_2);
225 227
226 // Add two rows together. 228 // Add two rows together.
(...skipping 12 matching lines...) Expand all
239 (3 << 6) | (1 << 4) | (3 << 2) | 1)); 241 (3 << 6) | (1 << 4) | (3 << 2) | 1));
240 __m128i r_efgh = _mm_castps_si128( 242 __m128i r_efgh = _mm_castps_si128(
241 _mm_shuffle_ps(_mm_castsi128_ps(rgb_g_h), 243 _mm_shuffle_ps(_mm_castsi128_ps(rgb_g_h),
242 _mm_castsi128_ps(rgb_e_f), 244 _mm_castsi128_ps(rgb_e_f),
243 (2 << 6) | (2 << 2))); 245 (2 << 6) | (2 << 2)));
244 __m128i y_efgh = _mm_add_epi32(bg_efgh, r_efgh); 246 __m128i y_efgh = _mm_add_epi32(bg_efgh, r_efgh);
245 y_efgh = _mm_srai_epi32(y_efgh, FIX_SHIFT); 247 y_efgh = _mm_srai_epi32(y_efgh, FIX_SHIFT);
246 y_efgh = _mm_add_epi32(y_efgh, y_offset); 248 y_efgh = _mm_add_epi32(y_efgh, y_offset);
247 y_efgh = _mm_packs_epi32(y_efgh, y_efgh); 249 y_efgh = _mm_packs_epi32(y_efgh, y_efgh);
248 y_efgh = _mm_packus_epi16(y_efgh, y_efgh); 250 y_efgh = _mm_packus_epi16(y_efgh, y_efgh);
249 *reinterpret_cast<uint32*>(y_buf_2) = _mm_cvtsi128_si32(y_efgh); 251 *reinterpret_cast<uint32_t*>(y_buf_2) = _mm_cvtsi128_si32(y_efgh);
250 y_buf_2 += 4; 252 y_buf_2 += 4;
251 253
252 __m128i rgb_ae_cg = _mm_castps_si128( 254 __m128i rgb_ae_cg = _mm_castps_si128(
253 _mm_shuffle_ps(_mm_castsi128_ps(rgb_cg_dh), 255 _mm_shuffle_ps(_mm_castsi128_ps(rgb_cg_dh),
254 _mm_castsi128_ps(rgb_ae_bf), 256 _mm_castsi128_ps(rgb_ae_bf),
255 (3 << 6) | (2 << 4) | (3 << 2) | 2)); 257 (3 << 6) | (2 << 4) | (3 << 2) | 2));
256 __m128i rgb_bf_dh = _mm_castps_si128( 258 __m128i rgb_bf_dh = _mm_castps_si128(
257 _mm_shuffle_ps(_mm_castsi128_ps(rgb_cg_dh), 259 _mm_shuffle_ps(_mm_castsi128_ps(rgb_cg_dh),
258 _mm_castsi128_ps(rgb_ae_bf), 260 _mm_castsi128_ps(rgb_ae_bf),
259 (1 << 6) | (1 << 2))); 261 (1 << 6) | (1 << 2)));
260 262
261 // This is a 2x2 subsampling for 2 pixels. 263 // This is a 2x2 subsampling for 2 pixels.
262 __m128i rgb_abef_cdgh = _mm_add_epi16(rgb_ae_cg, rgb_bf_dh); 264 __m128i rgb_abef_cdgh = _mm_add_epi16(rgb_ae_cg, rgb_bf_dh);
263 265
264 // Do a multiply add with U table. 266 // Do a multiply add with U table.
265 __m128i u_a_b = _mm_madd_epi16( 267 __m128i u_a_b = _mm_madd_epi16(
266 rgb_abef_cdgh, 268 rgb_abef_cdgh,
267 _mm_load_si128( 269 _mm_load_si128(
268 reinterpret_cast<const __m128i*>(ConvertRGBAToYUV_kTable + 8))); 270 reinterpret_cast<const __m128i*>(ConvertRGBAToYUV_kTable + 8)));
269 u_a_b = _mm_add_epi32(_mm_shuffle_epi32(u_a_b, ((3 << 2) | 1)), 271 u_a_b = _mm_add_epi32(_mm_shuffle_epi32(u_a_b, ((3 << 2) | 1)),
270 _mm_shuffle_epi32(u_a_b, (2 << 2))); 272 _mm_shuffle_epi32(u_a_b, (2 << 2)));
271 // Right shift 14 because of 12 from fixed point and 2 from subsampling. 273 // Right shift 14 because of 12 from fixed point and 2 from subsampling.
272 u_a_b = _mm_srai_epi32(u_a_b, FIX_SHIFT + 2); 274 u_a_b = _mm_srai_epi32(u_a_b, FIX_SHIFT + 2);
273 __m128i uv_offset = _mm_slli_epi32(y_offset, 3); 275 __m128i uv_offset = _mm_slli_epi32(y_offset, 3);
274 u_a_b = _mm_add_epi32(u_a_b, uv_offset); 276 u_a_b = _mm_add_epi32(u_a_b, uv_offset);
275 u_a_b = _mm_packs_epi32(u_a_b, u_a_b); 277 u_a_b = _mm_packs_epi32(u_a_b, u_a_b);
276 u_a_b = _mm_packus_epi16(u_a_b, u_a_b); 278 u_a_b = _mm_packus_epi16(u_a_b, u_a_b);
277 *reinterpret_cast<uint16*>(u_buf) = 279 *reinterpret_cast<uint16_t*>(u_buf) =
278 static_cast<uint16>(_mm_extract_epi16(u_a_b, 0)); 280 static_cast<uint16_t>(_mm_extract_epi16(u_a_b, 0));
279 u_buf += 2; 281 u_buf += 2;
280 282
281 __m128i v_a_b = _mm_madd_epi16( 283 __m128i v_a_b = _mm_madd_epi16(
282 rgb_abef_cdgh, 284 rgb_abef_cdgh,
283 _mm_load_si128( 285 _mm_load_si128(
284 reinterpret_cast<const __m128i*>(ConvertRGBAToYUV_kTable + 16))); 286 reinterpret_cast<const __m128i*>(ConvertRGBAToYUV_kTable + 16)));
285 v_a_b = _mm_add_epi32(_mm_shuffle_epi32(v_a_b, ((3 << 2) | 1)), 287 v_a_b = _mm_add_epi32(_mm_shuffle_epi32(v_a_b, ((3 << 2) | 1)),
286 _mm_shuffle_epi32(v_a_b, (2 << 2))); 288 _mm_shuffle_epi32(v_a_b, (2 << 2)));
287 v_a_b = _mm_srai_epi32(v_a_b, FIX_SHIFT + 2); 289 v_a_b = _mm_srai_epi32(v_a_b, FIX_SHIFT + 2);
288 v_a_b = _mm_add_epi32(v_a_b, uv_offset); 290 v_a_b = _mm_add_epi32(v_a_b, uv_offset);
289 v_a_b = _mm_packs_epi32(v_a_b, v_a_b); 291 v_a_b = _mm_packs_epi32(v_a_b, v_a_b);
290 v_a_b = _mm_packus_epi16(v_a_b, v_a_b); 292 v_a_b = _mm_packus_epi16(v_a_b, v_a_b);
291 *reinterpret_cast<uint16*>(v_buf) = 293 *reinterpret_cast<uint16_t*>(v_buf) =
292 static_cast<uint16>(_mm_extract_epi16(v_a_b, 0)); 294 static_cast<uint16_t>(_mm_extract_epi16(v_a_b, 0));
293 v_buf += 2; 295 v_buf += 2;
294 296
295 rgb_buf_1 += 16; 297 rgb_buf_1 += 16;
296 rgb_buf_2 += 16; 298 rgb_buf_2 += 16;
297 299
298 // Move forward by 4 pixels. 300 // Move forward by 4 pixels.
299 width -= 4; 301 width -= 4;
300 } 302 }
301 303
302 // Just use C code to convert the remaining pixels. 304 // Just use C code to convert the remaining pixels.
303 if (width >= 2) { 305 if (width >= 2) {
304 ConvertRGBToYUV_V2H2(rgb_buf_1, rgb_buf_2, y_buf_1, y_buf_2, u_buf, v_buf); 306 ConvertRGBToYUV_V2H2(rgb_buf_1, rgb_buf_2, y_buf_1, y_buf_2, u_buf, v_buf);
305 rgb_buf_1 += 8; 307 rgb_buf_1 += 8;
306 rgb_buf_2 += 8; 308 rgb_buf_2 += 8;
307 y_buf_1 += 2; 309 y_buf_1 += 2;
308 y_buf_2 += 2; 310 y_buf_2 += 2;
309 ++u_buf; 311 ++u_buf;
310 ++v_buf; 312 ++v_buf;
311 width -= 2; 313 width -= 2;
312 } 314 }
313 315
314 if (width) 316 if (width)
315 ConvertRGBToYUV_V2H1(rgb_buf_1, rgb_buf_2, y_buf_1, y_buf_2, u_buf, v_buf); 317 ConvertRGBToYUV_V2H1(rgb_buf_1, rgb_buf_2, y_buf_1, y_buf_2, u_buf, v_buf);
316 } 318 }
317 319
318 extern void ConvertRGB32ToYUV_SSE2(const uint8* rgbframe, 320 extern void ConvertRGB32ToYUV_SSE2(const uint8_t* rgbframe,
319 uint8* yplane, 321 uint8_t* yplane,
320 uint8* uplane, 322 uint8_t* uplane,
321 uint8* vplane, 323 uint8_t* vplane,
322 int width, 324 int width,
323 int height, 325 int height,
324 int rgbstride, 326 int rgbstride,
325 int ystride, 327 int ystride,
326 int uvstride) { 328 int uvstride) {
327 while (height >= 2) { 329 while (height >= 2) {
328 ConvertRGB32ToYUVRow_SSE2(rgbframe, 330 ConvertRGB32ToYUVRow_SSE2(rgbframe,
329 rgbframe + rgbstride, 331 rgbframe + rgbstride,
330 yplane, 332 yplane,
331 yplane + ystride, 333 yplane + ystride,
(...skipping 17 matching lines...) Expand all
349 yplane += 2; 351 yplane += 2;
350 ++uplane; 352 ++uplane;
351 ++vplane; 353 ++vplane;
352 width -= 2; 354 width -= 2;
353 } 355 }
354 356
355 if (width) 357 if (width)
356 ConvertRGBToYUV_V1H1(rgbframe, yplane, uplane, vplane); 358 ConvertRGBToYUV_V1H1(rgbframe, yplane, uplane, vplane);
357 } 359 }
358 360
359 void ConvertRGB32ToYUV_SSE2_Reference(const uint8* rgbframe, 361 void ConvertRGB32ToYUV_SSE2_Reference(const uint8_t* rgbframe,
360 uint8* yplane, 362 uint8_t* yplane,
361 uint8* uplane, 363 uint8_t* uplane,
362 uint8* vplane, 364 uint8_t* vplane,
363 int width, 365 int width,
364 int height, 366 int height,
365 int rgbstride, 367 int rgbstride,
366 int ystride, 368 int ystride,
367 int uvstride) { 369 int uvstride) {
368 while (height >= 2) { 370 while (height >= 2) {
369 int i = 0; 371 int i = 0;
370 372
371 // Convert a 2x2 block. 373 // Convert a 2x2 block.
372 while (i + 2 <= width) { 374 while (i + 2 <= width) {
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after
408 ++vplane; 410 ++vplane;
409 width -= 2; 411 width -= 2;
410 } 412 }
411 413
412 // Handle the last pixel in the last row. 414 // Handle the last pixel in the last row.
413 if (width) 415 if (width)
414 ConvertRGBToYUV_V1H1(rgbframe, yplane, uplane, vplane); 416 ConvertRGBToYUV_V1H1(rgbframe, yplane, uplane, vplane);
415 } 417 }
416 418
417 } // namespace media 419 } // namespace media
OLDNEW
« no previous file with comments | « media/base/simd/convert_rgb_to_yuv_c.cc ('k') | media/base/simd/convert_rgb_to_yuv_ssse3.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698