media/base/simd/convert_rgb_to_yuv_sse2.cc - Issue 1542013004: Switch to standard integer types in media/, take 2.

Side by Side Diff: media/base/simd/convert_rgb_to_yuv_sse2.cc

Issue 1542013004: Switch to standard integer types in media/, take 2. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: more stddef Created 5 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

	5 #include <stdint.h>

	6

5 #include "build/build_config.h"	7 #include "build/build_config.h"

6 #include "media/base/simd/convert_rgb_to_yuv.h"	8 #include "media/base/simd/convert_rgb_to_yuv.h"

7	9

8 #if defined(COMPILER_MSVC)	10 #if defined(COMPILER_MSVC)

9 #include <intrin.h>	11 #include <intrin.h>

10 #else	12 #else

11 #include <mmintrin.h>	13 #include <mmintrin.h>

12 #include <emmintrin.h>	14 #include <emmintrin.h>

13 #endif	15 #endif

14	16

15 #if defined(COMPILER_MSVC)	17 #if defined(COMPILER_MSVC)

16 #define SIMD_ALIGNED(var) __declspec(align(16)) var	18 #define SIMD_ALIGNED(var) __declspec(align(16)) var

17 #else	19 #else

18 #define SIMD_ALIGNED(var) var __attribute__((aligned(16)))	20 #define SIMD_ALIGNED(var) var __attribute__((aligned(16)))

19 #endif	21 #endif

20	22

21 namespace media {	23 namespace media {

22	24

23 #define FIX_SHIFT 12	25 #define FIX_SHIFT 12

24 #define FIX(x) ((x) * (1 << FIX_SHIFT))	26 #define FIX(x) ((x) * (1 << FIX_SHIFT))

25	27

26 // Define a convenient macro to do static cast.	28 // Define a convenient macro to do static cast.

27 #define INT16_FIX(x) static_cast<int16>(FIX(x))	29 #define INT16_FIX(x) static_cast<int16_t>(FIX(x))

28	30

29 // Android's pixel layout is RGBA, while other platforms	31 // Android's pixel layout is RGBA, while other platforms

30 // are BGRA.	32 // are BGRA.

31 #if defined(OS_ANDROID)	33 #if defined(OS_ANDROID)

32 SIMD_ALIGNED(const int16 ConvertRGBAToYUV_kTable[8 * 3]) = {	34 SIMD_ALIGNED(const int16_t ConvertRGBAToYUV_kTable[8 * 3]) = {

33 INT16_FIX(0.257), INT16_FIX(0.504), INT16_FIX(0.098), 0,	35 INT16_FIX(0.257), INT16_FIX(0.504), INT16_FIX(0.098), 0,

34 INT16_FIX(0.257), INT16_FIX(0.504), INT16_FIX(0.098), 0,	36 INT16_FIX(0.257), INT16_FIX(0.504), INT16_FIX(0.098), 0,

35 -INT16_FIX(0.148), -INT16_FIX(0.291), INT16_FIX(0.439), 0,	37 -INT16_FIX(0.148), -INT16_FIX(0.291), INT16_FIX(0.439), 0,

36 -INT16_FIX(0.148), -INT16_FIX(0.291), INT16_FIX(0.439), 0,	38 -INT16_FIX(0.148), -INT16_FIX(0.291), INT16_FIX(0.439), 0,

37 INT16_FIX(0.439), -INT16_FIX(0.368), -INT16_FIX(0.071), 0,	39 INT16_FIX(0.439), -INT16_FIX(0.368), -INT16_FIX(0.071), 0,

38 INT16_FIX(0.439), -INT16_FIX(0.368), -INT16_FIX(0.071), 0,	40 INT16_FIX(0.439), -INT16_FIX(0.368), -INT16_FIX(0.071), 0,

39 };	41 };

40 #else	42 #else

41 SIMD_ALIGNED(const int16 ConvertRGBAToYUV_kTable[8 * 3]) = {	43 SIMD_ALIGNED(const int16_t ConvertRGBAToYUV_kTable[8 * 3]) = {

42 INT16_FIX(0.098), INT16_FIX(0.504), INT16_FIX(0.257), 0,	44 INT16_FIX(0.098), INT16_FIX(0.504), INT16_FIX(0.257), 0,

43 INT16_FIX(0.098), INT16_FIX(0.504), INT16_FIX(0.257), 0,	45 INT16_FIX(0.098), INT16_FIX(0.504), INT16_FIX(0.257), 0,

44 INT16_FIX(0.439), -INT16_FIX(0.291), -INT16_FIX(0.148), 0,	46 INT16_FIX(0.439), -INT16_FIX(0.291), -INT16_FIX(0.148), 0,

45 INT16_FIX(0.439), -INT16_FIX(0.291), -INT16_FIX(0.148), 0,	47 INT16_FIX(0.439), -INT16_FIX(0.291), -INT16_FIX(0.148), 0,

46 -INT16_FIX(0.071), -INT16_FIX(0.368), INT16_FIX(0.439), 0,	48 -INT16_FIX(0.071), -INT16_FIX(0.368), INT16_FIX(0.439), 0,

47 -INT16_FIX(0.071), -INT16_FIX(0.368), INT16_FIX(0.439), 0,	49 -INT16_FIX(0.071), -INT16_FIX(0.368), INT16_FIX(0.439), 0,

48 };	50 };

49 #endif	51 #endif

50	52

51 #undef INT16_FIX	53 #undef INT16_FIX

52	54

53 // This is the final offset for the conversion from signed yuv values to	55 // This is the final offset for the conversion from signed yuv values to

54 // unsigned values. It is arranged so that offset of 16 is applied to Y	56 // unsigned values. It is arranged so that offset of 16 is applied to Y

55 // components and 128 is added to UV components for 2 pixels.	57 // components and 128 is added to UV components for 2 pixels.

56 SIMD_ALIGNED(const int32 kYOffset[4]) = {16, 16, 16, 16};	58 SIMD_ALIGNED(const int32_t kYOffset[4]) = {16, 16, 16, 16};

57	59

58 static inline uint8 Clamp(int value) {	60 static inline uint8_t Clamp(int value) {

59 if (value < 0)	61 if (value < 0)

60 return 0;	62 return 0;

61 if (value > 255)	63 if (value > 255)

62 return 255;	64 return 255;

63 return static_cast<uint8>(value);	65 return static_cast<uint8_t>(value);

64 }	66 }

65	67

66 static inline uint8 RGBToY(int r, int g, int b) {	68 static inline uint8_t RGBToY(int r, int g, int b) {

67 int y = ConvertRGBAToYUV_kTable[0] * b +	69 int y = ConvertRGBAToYUV_kTable[0] * b +

68 ConvertRGBAToYUV_kTable[1] * g +	70 ConvertRGBAToYUV_kTable[1] * g +

69 ConvertRGBAToYUV_kTable[2] * r;	71 ConvertRGBAToYUV_kTable[2] * r;

70 y >>= FIX_SHIFT;	72 y >>= FIX_SHIFT;

71 return Clamp(y + 16);	73 return Clamp(y + 16);

72 }	74 }

73	75

74 static inline uint8 RGBToU(int r, int g, int b, int shift) {	76 static inline uint8_t RGBToU(int r, int g, int b, int shift) {

75 int u = ConvertRGBAToYUV_kTable[8] * b +	77 int u = ConvertRGBAToYUV_kTable[8] * b +

76 ConvertRGBAToYUV_kTable[9] * g +	78 ConvertRGBAToYUV_kTable[9] * g +

77 ConvertRGBAToYUV_kTable[10] * r;	79 ConvertRGBAToYUV_kTable[10] * r;

78 u >>= FIX_SHIFT + shift;	80 u >>= FIX_SHIFT + shift;

79 return Clamp(u + 128);	81 return Clamp(u + 128);

80 }	82 }

81	83

82 static inline uint8 RGBToV(int r, int g, int b, int shift) {	84 static inline uint8_t RGBToV(int r, int g, int b, int shift) {

83 int v = ConvertRGBAToYUV_kTable[16] * b +	85 int v = ConvertRGBAToYUV_kTable[16] * b +

84 ConvertRGBAToYUV_kTable[17] * g +	86 ConvertRGBAToYUV_kTable[17] * g +

85 ConvertRGBAToYUV_kTable[18] * r;	87 ConvertRGBAToYUV_kTable[18] * r;

86 v >>= FIX_SHIFT + shift;	88 v >>= FIX_SHIFT + shift;

87 return Clamp(v + 128);	89 return Clamp(v + 128);

88 }	90 }

89	91

90 #define CONVERT_Y(rgb_buf, y_buf) \	92 #define CONVERT_Y(rgb_buf, y_buf) \

91 b = *rgb_buf++; \	93 b = *rgb_buf++; \

92 g = *rgb_buf++; \	94 g = *rgb_buf++; \

93 r = *rgb_buf++; \	95 r = *rgb_buf++; \

94 ++rgb_buf; \	96 ++rgb_buf; \

95 sum_b += b; \	97 sum_b += b; \

96 sum_g += g; \	98 sum_g += g; \

97 sum_r += r; \	99 sum_r += r; \

98 *y_buf++ = RGBToY(r, g, b);	100 *y_buf++ = RGBToY(r, g, b);

99	101

100 static inline void ConvertRGBToYUV_V2H2(const uint8* rgb_buf_1,	102 static inline void ConvertRGBToYUV_V2H2(const uint8_t* rgb_buf_1,

101 const uint8* rgb_buf_2,	103 const uint8_t* rgb_buf_2,

102 uint8* y_buf_1,	104 uint8_t* y_buf_1,

103 uint8* y_buf_2,	105 uint8_t* y_buf_2,

104 uint8* u_buf,	106 uint8_t* u_buf,

105 uint8* v_buf) {	107 uint8_t* v_buf) {

106 int sum_b = 0;	108 int sum_b = 0;

107 int sum_g = 0;	109 int sum_g = 0;

108 int sum_r = 0;	110 int sum_r = 0;

109 int r, g, b;	111 int r, g, b;

110	112

111	113

112	114

113 CONVERT_Y(rgb_buf_1, y_buf_1);	115 CONVERT_Y(rgb_buf_1, y_buf_1);

114 CONVERT_Y(rgb_buf_1, y_buf_1);	116 CONVERT_Y(rgb_buf_1, y_buf_1);

115 CONVERT_Y(rgb_buf_2, y_buf_2);	117 CONVERT_Y(rgb_buf_2, y_buf_2);

116 CONVERT_Y(rgb_buf_2, y_buf_2);	118 CONVERT_Y(rgb_buf_2, y_buf_2);

117 *u_buf++ = RGBToU(sum_r, sum_g, sum_b, 2);	119 *u_buf++ = RGBToU(sum_r, sum_g, sum_b, 2);

118 *v_buf++ = RGBToV(sum_r, sum_g, sum_b, 2);	120 *v_buf++ = RGBToV(sum_r, sum_g, sum_b, 2);

119 }	121 }

120	122

121 static inline void ConvertRGBToYUV_V2H1(const uint8* rgb_buf_1,	123 static inline void ConvertRGBToYUV_V2H1(const uint8_t* rgb_buf_1,

122 const uint8* rgb_buf_2,	124 const uint8_t* rgb_buf_2,

123 uint8* y_buf_1,	125 uint8_t* y_buf_1,

124 uint8* y_buf_2,	126 uint8_t* y_buf_2,

125 uint8* u_buf,	127 uint8_t* u_buf,

126 uint8* v_buf) {	128 uint8_t* v_buf) {

127 int sum_b = 0;	129 int sum_b = 0;

128 int sum_g = 0;	130 int sum_g = 0;

129 int sum_r = 0;	131 int sum_r = 0;

130 int r, g, b;	132 int r, g, b;

131	133

132 CONVERT_Y(rgb_buf_1, y_buf_1);	134 CONVERT_Y(rgb_buf_1, y_buf_1);

133 CONVERT_Y(rgb_buf_2, y_buf_2);	135 CONVERT_Y(rgb_buf_2, y_buf_2);

134 *u_buf++ = RGBToU(sum_r, sum_g, sum_b, 1);	136 *u_buf++ = RGBToU(sum_r, sum_g, sum_b, 1);

135 *v_buf++ = RGBToV(sum_r, sum_g, sum_b, 1);	137 *v_buf++ = RGBToV(sum_r, sum_g, sum_b, 1);

136 }	138 }

137	139

138 static inline void ConvertRGBToYUV_V1H2(const uint8* rgb_buf,	140 static inline void ConvertRGBToYUV_V1H2(const uint8_t* rgb_buf,

139 uint8* y_buf,	141 uint8_t* y_buf,

140 uint8* u_buf,	142 uint8_t* u_buf,

141 uint8* v_buf) {	143 uint8_t* v_buf) {

142 int sum_b = 0;	144 int sum_b = 0;

143 int sum_g = 0;	145 int sum_g = 0;

144 int sum_r = 0;	146 int sum_r = 0;

145 int r, g, b;	147 int r, g, b;

146	148

147 CONVERT_Y(rgb_buf, y_buf);	149 CONVERT_Y(rgb_buf, y_buf);

148 CONVERT_Y(rgb_buf, y_buf);	150 CONVERT_Y(rgb_buf, y_buf);

149 *u_buf++ = RGBToU(sum_r, sum_g, sum_b, 1);	151 *u_buf++ = RGBToU(sum_r, sum_g, sum_b, 1);

150 *v_buf++ = RGBToV(sum_r, sum_g, sum_b, 1);	152 *v_buf++ = RGBToV(sum_r, sum_g, sum_b, 1);

151 }	153 }

152	154

153 static inline void ConvertRGBToYUV_V1H1(const uint8* rgb_buf,	155 static inline void ConvertRGBToYUV_V1H1(const uint8_t* rgb_buf,

154 uint8* y_buf,	156 uint8_t* y_buf,

155 uint8* u_buf,	157 uint8_t* u_buf,

156 uint8* v_buf) {	158 uint8_t* v_buf) {

157 int sum_b = 0;	159 int sum_b = 0;

158 int sum_g = 0;	160 int sum_g = 0;

159 int sum_r = 0;	161 int sum_r = 0;

160 int r, g, b;	162 int r, g, b;

161	163

162 CONVERT_Y(rgb_buf, y_buf);	164 CONVERT_Y(rgb_buf, y_buf);

163 *u_buf++ = RGBToU(r, g, b, 0);	165 *u_buf++ = RGBToU(r, g, b, 0);

164 *v_buf++ = RGBToV(r, g, b, 0);	166 *v_buf++ = RGBToV(r, g, b, 0);

165 }	167 }

166	168

167 static void ConvertRGB32ToYUVRow_SSE2(const uint8* rgb_buf_1,	169 static void ConvertRGB32ToYUVRow_SSE2(const uint8_t* rgb_buf_1,

168 const uint8* rgb_buf_2,	170 const uint8_t* rgb_buf_2,

169 uint8* y_buf_1,	171 uint8_t* y_buf_1,

170 uint8* y_buf_2,	172 uint8_t* y_buf_2,

171 uint8* u_buf,	173 uint8_t* u_buf,

172 uint8* v_buf,	174 uint8_t* v_buf,

173 int width) {	175 int width) {

174 while (width >= 4) {	176 while (width >= 4) {

175 // Name for the Y pixels:	177 // Name for the Y pixels:

176 // Row 1: a b c d	178 // Row 1: a b c d

177 // Row 2: e f g h	179 // Row 2: e f g h

178 //	180 //

179 // First row 4 pixels.	181 // First row 4 pixels.

180 __m128i rgb_row_1 = _mm_loadu_si128(	182 __m128i rgb_row_1 = _mm_loadu_si128(

181 reinterpret_cast<const __m128i*>(rgb_buf_1));	183 reinterpret_cast<const __m128i*>(rgb_buf_1));

182 __m128i zero_1 = _mm_xor_si128(rgb_row_1, rgb_row_1);	184 __m128i zero_1 = _mm_xor_si128(rgb_row_1, rgb_row_1);

(...skipping 23 matching lines...) Expand all Loading...
206 (2 << 6) \| (2 << 2)));	208 (2 << 6) \| (2 << 2)));

207 __m128i y_abcd = _mm_add_epi32(bg_abcd, r_abcd);	209 __m128i y_abcd = _mm_add_epi32(bg_abcd, r_abcd);

208	210

209 // Down shift back to 8bits range.	211 // Down shift back to 8bits range.

210 __m128i y_offset = _mm_load_si128(	212 __m128i y_offset = _mm_load_si128(

211 reinterpret_cast<const __m128i*>(kYOffset));	213 reinterpret_cast<const __m128i*>(kYOffset));

212 y_abcd = _mm_srai_epi32(y_abcd, FIX_SHIFT);	214 y_abcd = _mm_srai_epi32(y_abcd, FIX_SHIFT);

213 y_abcd = _mm_add_epi32(y_abcd, y_offset);	215 y_abcd = _mm_add_epi32(y_abcd, y_offset);

214 y_abcd = _mm_packs_epi32(y_abcd, y_abcd);	216 y_abcd = _mm_packs_epi32(y_abcd, y_abcd);

215 y_abcd = _mm_packus_epi16(y_abcd, y_abcd);	217 y_abcd = _mm_packus_epi16(y_abcd, y_abcd);

216 reinterpret_cast<uint32>(y_buf_1) = _mm_cvtsi128_si32(y_abcd);	218 reinterpret_cast<uint32_t>(y_buf_1) = _mm_cvtsi128_si32(y_abcd);

217 y_buf_1 += 4;	219 y_buf_1 += 4;

218	220

219 // Second row 4 pixels.	221 // Second row 4 pixels.

220 __m128i rgb_row_2 = _mm_loadu_si128(	222 __m128i rgb_row_2 = _mm_loadu_si128(

221 reinterpret_cast<const __m128i*>(rgb_buf_2));	223 reinterpret_cast<const __m128i*>(rgb_buf_2));

222 __m128i zero_2 = _mm_xor_si128(rgb_row_2, rgb_row_2);	224 __m128i zero_2 = _mm_xor_si128(rgb_row_2, rgb_row_2);

223 __m128i rgb_e_f = _mm_unpackhi_epi8(rgb_row_2, zero_2);	225 __m128i rgb_e_f = _mm_unpackhi_epi8(rgb_row_2, zero_2);

224 __m128i rgb_g_h = _mm_unpacklo_epi8(rgb_row_2, zero_2);	226 __m128i rgb_g_h = _mm_unpacklo_epi8(rgb_row_2, zero_2);

225	227

226 // Add two rows together.	228 // Add two rows together.

(...skipping 12 matching lines...) Expand all Loading...
239 (3 << 6) \| (1 << 4) \| (3 << 2) \| 1));	241 (3 << 6) \| (1 << 4) \| (3 << 2) \| 1));

240 __m128i r_efgh = _mm_castps_si128(	242 __m128i r_efgh = _mm_castps_si128(

241 _mm_shuffle_ps(_mm_castsi128_ps(rgb_g_h),	243 _mm_shuffle_ps(_mm_castsi128_ps(rgb_g_h),

242 _mm_castsi128_ps(rgb_e_f),	244 _mm_castsi128_ps(rgb_e_f),

243 (2 << 6) \| (2 << 2)));	245 (2 << 6) \| (2 << 2)));

244 __m128i y_efgh = _mm_add_epi32(bg_efgh, r_efgh);	246 __m128i y_efgh = _mm_add_epi32(bg_efgh, r_efgh);

245 y_efgh = _mm_srai_epi32(y_efgh, FIX_SHIFT);	247 y_efgh = _mm_srai_epi32(y_efgh, FIX_SHIFT);

246 y_efgh = _mm_add_epi32(y_efgh, y_offset);	248 y_efgh = _mm_add_epi32(y_efgh, y_offset);

247 y_efgh = _mm_packs_epi32(y_efgh, y_efgh);	249 y_efgh = _mm_packs_epi32(y_efgh, y_efgh);

248 y_efgh = _mm_packus_epi16(y_efgh, y_efgh);	250 y_efgh = _mm_packus_epi16(y_efgh, y_efgh);

249 reinterpret_cast<uint32>(y_buf_2) = _mm_cvtsi128_si32(y_efgh);	251 reinterpret_cast<uint32_t>(y_buf_2) = _mm_cvtsi128_si32(y_efgh);

250 y_buf_2 += 4;	252 y_buf_2 += 4;

251	253

252 __m128i rgb_ae_cg = _mm_castps_si128(	254 __m128i rgb_ae_cg = _mm_castps_si128(

253 _mm_shuffle_ps(_mm_castsi128_ps(rgb_cg_dh),	255 _mm_shuffle_ps(_mm_castsi128_ps(rgb_cg_dh),

254 _mm_castsi128_ps(rgb_ae_bf),	256 _mm_castsi128_ps(rgb_ae_bf),

255 (3 << 6) \| (2 << 4) \| (3 << 2) \| 2));	257 (3 << 6) \| (2 << 4) \| (3 << 2) \| 2));

256 __m128i rgb_bf_dh = _mm_castps_si128(	258 __m128i rgb_bf_dh = _mm_castps_si128(

257 _mm_shuffle_ps(_mm_castsi128_ps(rgb_cg_dh),	259 _mm_shuffle_ps(_mm_castsi128_ps(rgb_cg_dh),

258 _mm_castsi128_ps(rgb_ae_bf),	260 _mm_castsi128_ps(rgb_ae_bf),

259 (1 << 6) \| (1 << 2)));	261 (1 << 6) \| (1 << 2)));

260	262

261 // This is a 2x2 subsampling for 2 pixels.	263 // This is a 2x2 subsampling for 2 pixels.

262 __m128i rgb_abef_cdgh = _mm_add_epi16(rgb_ae_cg, rgb_bf_dh);	264 __m128i rgb_abef_cdgh = _mm_add_epi16(rgb_ae_cg, rgb_bf_dh);

263	265

264 // Do a multiply add with U table.	266 // Do a multiply add with U table.

265 __m128i u_a_b = _mm_madd_epi16(	267 __m128i u_a_b = _mm_madd_epi16(

266 rgb_abef_cdgh,	268 rgb_abef_cdgh,

267 _mm_load_si128(	269 _mm_load_si128(

268 reinterpret_cast<const __m128i*>(ConvertRGBAToYUV_kTable + 8)));	270 reinterpret_cast<const __m128i*>(ConvertRGBAToYUV_kTable + 8)));

269 u_a_b = _mm_add_epi32(_mm_shuffle_epi32(u_a_b, ((3 << 2) \| 1)),	271 u_a_b = _mm_add_epi32(_mm_shuffle_epi32(u_a_b, ((3 << 2) \| 1)),

270 _mm_shuffle_epi32(u_a_b, (2 << 2)));	272 _mm_shuffle_epi32(u_a_b, (2 << 2)));

271 // Right shift 14 because of 12 from fixed point and 2 from subsampling.	273 // Right shift 14 because of 12 from fixed point and 2 from subsampling.

272 u_a_b = _mm_srai_epi32(u_a_b, FIX_SHIFT + 2);	274 u_a_b = _mm_srai_epi32(u_a_b, FIX_SHIFT + 2);

273 __m128i uv_offset = _mm_slli_epi32(y_offset, 3);	275 __m128i uv_offset = _mm_slli_epi32(y_offset, 3);

274 u_a_b = _mm_add_epi32(u_a_b, uv_offset);	276 u_a_b = _mm_add_epi32(u_a_b, uv_offset);

275 u_a_b = _mm_packs_epi32(u_a_b, u_a_b);	277 u_a_b = _mm_packs_epi32(u_a_b, u_a_b);

276 u_a_b = _mm_packus_epi16(u_a_b, u_a_b);	278 u_a_b = _mm_packus_epi16(u_a_b, u_a_b);

277 reinterpret_cast<uint16>(u_buf) =	279 reinterpret_cast<uint16_t>(u_buf) =

278 static_cast<uint16>(_mm_extract_epi16(u_a_b, 0));	280 static_cast<uint16_t>(_mm_extract_epi16(u_a_b, 0));

279 u_buf += 2;	281 u_buf += 2;

280	282

281 __m128i v_a_b = _mm_madd_epi16(	283 __m128i v_a_b = _mm_madd_epi16(

282 rgb_abef_cdgh,	284 rgb_abef_cdgh,

283 _mm_load_si128(	285 _mm_load_si128(

284 reinterpret_cast<const __m128i*>(ConvertRGBAToYUV_kTable + 16)));	286 reinterpret_cast<const __m128i*>(ConvertRGBAToYUV_kTable + 16)));

285 v_a_b = _mm_add_epi32(_mm_shuffle_epi32(v_a_b, ((3 << 2) \| 1)),	287 v_a_b = _mm_add_epi32(_mm_shuffle_epi32(v_a_b, ((3 << 2) \| 1)),

286 _mm_shuffle_epi32(v_a_b, (2 << 2)));	288 _mm_shuffle_epi32(v_a_b, (2 << 2)));

287 v_a_b = _mm_srai_epi32(v_a_b, FIX_SHIFT + 2);	289 v_a_b = _mm_srai_epi32(v_a_b, FIX_SHIFT + 2);

288 v_a_b = _mm_add_epi32(v_a_b, uv_offset);	290 v_a_b = _mm_add_epi32(v_a_b, uv_offset);

289 v_a_b = _mm_packs_epi32(v_a_b, v_a_b);	291 v_a_b = _mm_packs_epi32(v_a_b, v_a_b);

290 v_a_b = _mm_packus_epi16(v_a_b, v_a_b);	292 v_a_b = _mm_packus_epi16(v_a_b, v_a_b);

291 reinterpret_cast<uint16>(v_buf) =	293 reinterpret_cast<uint16_t>(v_buf) =

292 static_cast<uint16>(_mm_extract_epi16(v_a_b, 0));	294 static_cast<uint16_t>(_mm_extract_epi16(v_a_b, 0));

293 v_buf += 2;	295 v_buf += 2;

294	296

295 rgb_buf_1 += 16;	297 rgb_buf_1 += 16;

296 rgb_buf_2 += 16;	298 rgb_buf_2 += 16;

297	299

298 // Move forward by 4 pixels.	300 // Move forward by 4 pixels.

299 width -= 4;	301 width -= 4;

300 }	302 }

301	303

302 // Just use C code to convert the remaining pixels.	304 // Just use C code to convert the remaining pixels.

303 if (width >= 2) {	305 if (width >= 2) {

304 ConvertRGBToYUV_V2H2(rgb_buf_1, rgb_buf_2, y_buf_1, y_buf_2, u_buf, v_buf);	306 ConvertRGBToYUV_V2H2(rgb_buf_1, rgb_buf_2, y_buf_1, y_buf_2, u_buf, v_buf);

305 rgb_buf_1 += 8;	307 rgb_buf_1 += 8;

306 rgb_buf_2 += 8;	308 rgb_buf_2 += 8;

307 y_buf_1 += 2;	309 y_buf_1 += 2;

308 y_buf_2 += 2;	310 y_buf_2 += 2;

309 ++u_buf;	311 ++u_buf;

310 ++v_buf;	312 ++v_buf;

311 width -= 2;	313 width -= 2;

312 }	314 }

313	315

314 if (width)	316 if (width)

315 ConvertRGBToYUV_V2H1(rgb_buf_1, rgb_buf_2, y_buf_1, y_buf_2, u_buf, v_buf);	317 ConvertRGBToYUV_V2H1(rgb_buf_1, rgb_buf_2, y_buf_1, y_buf_2, u_buf, v_buf);

316 }	318 }

317	319

318 extern void ConvertRGB32ToYUV_SSE2(const uint8* rgbframe,	320 extern void ConvertRGB32ToYUV_SSE2(const uint8_t* rgbframe,

319 uint8* yplane,	321 uint8_t* yplane,

320 uint8* uplane,	322 uint8_t* uplane,

321 uint8* vplane,	323 uint8_t* vplane,

322 int width,	324 int width,

323 int height,	325 int height,

324 int rgbstride,	326 int rgbstride,

325 int ystride,	327 int ystride,

326 int uvstride) {	328 int uvstride) {

327 while (height >= 2) {	329 while (height >= 2) {

328 ConvertRGB32ToYUVRow_SSE2(rgbframe,	330 ConvertRGB32ToYUVRow_SSE2(rgbframe,

329 rgbframe + rgbstride,	331 rgbframe + rgbstride,

330 yplane,	332 yplane,

331 yplane + ystride,	333 yplane + ystride,

(...skipping 17 matching lines...) Expand all Loading...
349 yplane += 2;	351 yplane += 2;

350 ++uplane;	352 ++uplane;

351 ++vplane;	353 ++vplane;

352 width -= 2;	354 width -= 2;

353 }	355 }

354	356

355 if (width)	357 if (width)

356 ConvertRGBToYUV_V1H1(rgbframe, yplane, uplane, vplane);	358 ConvertRGBToYUV_V1H1(rgbframe, yplane, uplane, vplane);

357 }	359 }

358	360

359 void ConvertRGB32ToYUV_SSE2_Reference(const uint8* rgbframe,	361 void ConvertRGB32ToYUV_SSE2_Reference(const uint8_t* rgbframe,

360 uint8* yplane,	362 uint8_t* yplane,

361 uint8* uplane,	363 uint8_t* uplane,

362 uint8* vplane,	364 uint8_t* vplane,

363 int width,	365 int width,

364 int height,	366 int height,

365 int rgbstride,	367 int rgbstride,

366 int ystride,	368 int ystride,

367 int uvstride) {	369 int uvstride) {

368 while (height >= 2) {	370 while (height >= 2) {

369 int i = 0;	371 int i = 0;

370	372

371 // Convert a 2x2 block.	373 // Convert a 2x2 block.

372 while (i + 2 <= width) {	374 while (i + 2 <= width) {

(...skipping 35 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
408 ++vplane;	410 ++vplane;

409 width -= 2;	411 width -= 2;

410 }	412 }

411	413

412 // Handle the last pixel in the last row.	414 // Handle the last pixel in the last row.

413 if (width)	415 if (width)

414 ConvertRGBToYUV_V1H1(rgbframe, yplane, uplane, vplane);	416 ConvertRGBToYUV_V1H1(rgbframe, yplane, uplane, vplane);

415 }	417 }

416	418

417 } // namespace media	419 } // namespace media

OLD	NEW

« no previous file with comments | « media/base/simd/convert_rgb_to_yuv_c.cc ('k') | media/base/simd/convert_rgb_to_yuv_ssse3.h » ('j') | no next file with comments »