media/base/simd/convert_rgb_to_yuv_sse2.cc - Issue 2694113002: Delete media/base/yuv_convert and dependents. Prefer libyuv.

Side by Side Diff: media/base/simd/convert_rgb_to_yuv_sse2.cc

Issue 2694113002: Delete media/base/yuv_convert and dependents. Prefer libyuv. (Closed)

Patch Set: Fix media_unittests. Created 3 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
	(Empty)
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.

4

5 #include <stdint.h>

6

7 #include "build/build_config.h"

8 #include "media/base/simd/convert_rgb_to_yuv.h"

9

10 #if defined(COMPILER_MSVC)

11 #include <intrin.h>

12 #else

13 #include <mmintrin.h>

14 #include <emmintrin.h>

15 #endif

16

17 #if defined(COMPILER_MSVC)

18 #define SIMD_ALIGNED(var) __declspec(align(16)) var

19 #else

20 #define SIMD_ALIGNED(var) var __attribute__((aligned(16)))

21 #endif

22

23 namespace media {

24

25 #define FIX_SHIFT 12

26 #define FIX(x) ((x) * (1 << FIX_SHIFT))

27

28 // Define a convenient macro to do static cast.

29 #define INT16_FIX(x) static_cast<int16_t>(FIX(x))

30

31 // Android's pixel layout is RGBA, while other platforms

32 // are BGRA.

33 #if defined(OS_ANDROID)

34 SIMD_ALIGNED(const int16_t ConvertRGBAToYUV_kTable[8 * 3]) = {

35 INT16_FIX(0.257), INT16_FIX(0.504), INT16_FIX(0.098), 0,

36 INT16_FIX(0.257), INT16_FIX(0.504), INT16_FIX(0.098), 0,

37 -INT16_FIX(0.148), -INT16_FIX(0.291), INT16_FIX(0.439), 0,

38 -INT16_FIX(0.148), -INT16_FIX(0.291), INT16_FIX(0.439), 0,

39 INT16_FIX(0.439), -INT16_FIX(0.368), -INT16_FIX(0.071), 0,

40 INT16_FIX(0.439), -INT16_FIX(0.368), -INT16_FIX(0.071), 0,

41 };

42 #else

43 SIMD_ALIGNED(const int16_t ConvertRGBAToYUV_kTable[8 * 3]) = {

44 INT16_FIX(0.098), INT16_FIX(0.504), INT16_FIX(0.257), 0,

45 INT16_FIX(0.098), INT16_FIX(0.504), INT16_FIX(0.257), 0,

46 INT16_FIX(0.439), -INT16_FIX(0.291), -INT16_FIX(0.148), 0,

47 INT16_FIX(0.439), -INT16_FIX(0.291), -INT16_FIX(0.148), 0,

48 -INT16_FIX(0.071), -INT16_FIX(0.368), INT16_FIX(0.439), 0,

49 -INT16_FIX(0.071), -INT16_FIX(0.368), INT16_FIX(0.439), 0,

50 };

51 #endif

52

53 #undef INT16_FIX

54

55 // This is the final offset for the conversion from signed yuv values to

56 // unsigned values. It is arranged so that offset of 16 is applied to Y

57 // components and 128 is added to UV components for 2 pixels.

58 SIMD_ALIGNED(const int32_t kYOffset[4]) = {16, 16, 16, 16};

59

60 static inline uint8_t Clamp(int value) {

61 if (value < 0)

62 return 0;

63 if (value > 255)

64 return 255;

65 return static_cast<uint8_t>(value);

66 }

67

68 static inline uint8_t RGBToY(int r, int g, int b) {

69 int y = ConvertRGBAToYUV_kTable[0] * b +

70 ConvertRGBAToYUV_kTable[1] * g +

71 ConvertRGBAToYUV_kTable[2] * r;

72 y >>= FIX_SHIFT;

73 return Clamp(y + 16);

74 }

75

76 static inline uint8_t RGBToU(int r, int g, int b, int shift) {

77 int u = ConvertRGBAToYUV_kTable[8] * b +

78 ConvertRGBAToYUV_kTable[9] * g +

79 ConvertRGBAToYUV_kTable[10] * r;

80 u >>= FIX_SHIFT + shift;

81 return Clamp(u + 128);

82 }

83

84 static inline uint8_t RGBToV(int r, int g, int b, int shift) {

85 int v = ConvertRGBAToYUV_kTable[16] * b +

86 ConvertRGBAToYUV_kTable[17] * g +

87 ConvertRGBAToYUV_kTable[18] * r;

88 v >>= FIX_SHIFT + shift;

89 return Clamp(v + 128);

90 }

91

92 #define CONVERT_Y(rgb_buf, y_buf) \

93 b = *rgb_buf++; \

94 g = *rgb_buf++; \

95 r = *rgb_buf++; \

96 ++rgb_buf; \

97 sum_b += b; \

98 sum_g += g; \

99 sum_r += r; \

100 *y_buf++ = RGBToY(r, g, b);

101

102 static inline void ConvertRGBToYUV_V2H2(const uint8_t* rgb_buf_1,

103 const uint8_t* rgb_buf_2,

104 uint8_t* y_buf_1,

105 uint8_t* y_buf_2,

106 uint8_t* u_buf,

107 uint8_t* v_buf) {

108 int sum_b = 0;

109 int sum_g = 0;

110 int sum_r = 0;

111 int r, g, b;

112

113

114

115 CONVERT_Y(rgb_buf_1, y_buf_1);

116 CONVERT_Y(rgb_buf_1, y_buf_1);

117 CONVERT_Y(rgb_buf_2, y_buf_2);

118 CONVERT_Y(rgb_buf_2, y_buf_2);

119 *u_buf++ = RGBToU(sum_r, sum_g, sum_b, 2);

120 *v_buf++ = RGBToV(sum_r, sum_g, sum_b, 2);

121 }

122

123 static inline void ConvertRGBToYUV_V2H1(const uint8_t* rgb_buf_1,

124 const uint8_t* rgb_buf_2,

125 uint8_t* y_buf_1,

126 uint8_t* y_buf_2,

127 uint8_t* u_buf,

128 uint8_t* v_buf) {

129 int sum_b = 0;

130 int sum_g = 0;

131 int sum_r = 0;

132 int r, g, b;

133

134 CONVERT_Y(rgb_buf_1, y_buf_1);

135 CONVERT_Y(rgb_buf_2, y_buf_2);

136 *u_buf++ = RGBToU(sum_r, sum_g, sum_b, 1);

137 *v_buf++ = RGBToV(sum_r, sum_g, sum_b, 1);

138 }

139

140 static inline void ConvertRGBToYUV_V1H2(const uint8_t* rgb_buf,

141 uint8_t* y_buf,

142 uint8_t* u_buf,

143 uint8_t* v_buf) {

144 int sum_b = 0;

145 int sum_g = 0;

146 int sum_r = 0;

147 int r, g, b;

148

149 CONVERT_Y(rgb_buf, y_buf);

150 CONVERT_Y(rgb_buf, y_buf);

151 *u_buf++ = RGBToU(sum_r, sum_g, sum_b, 1);

152 *v_buf++ = RGBToV(sum_r, sum_g, sum_b, 1);

153 }

154

155 static inline void ConvertRGBToYUV_V1H1(const uint8_t* rgb_buf,

156 uint8_t* y_buf,

157 uint8_t* u_buf,

158 uint8_t* v_buf) {

159 int sum_b = 0;

160 int sum_g = 0;

161 int sum_r = 0;

162 int r, g, b;

163

164 CONVERT_Y(rgb_buf, y_buf);

165 *u_buf++ = RGBToU(r, g, b, 0);

166 *v_buf++ = RGBToV(r, g, b, 0);

167 }

168

169 static void ConvertRGB32ToYUVRow_SSE2(const uint8_t* rgb_buf_1,

170 const uint8_t* rgb_buf_2,

171 uint8_t* y_buf_1,

172 uint8_t* y_buf_2,

173 uint8_t* u_buf,

174 uint8_t* v_buf,

175 int width) {

176 while (width >= 4) {

177 // Name for the Y pixels:

178 // Row 1: a b c d

179 // Row 2: e f g h

180 //

181 // First row 4 pixels.

182 __m128i rgb_row_1 = _mm_loadu_si128(

183 reinterpret_cast<const __m128i*>(rgb_buf_1));

184 __m128i zero_1 = _mm_xor_si128(rgb_row_1, rgb_row_1);

185

186 __m128i y_table = _mm_load_si128(

187 reinterpret_cast<const __m128i*>(ConvertRGBAToYUV_kTable));

188

189 __m128i rgb_a_b = _mm_unpackhi_epi8(rgb_row_1, zero_1);

190 rgb_a_b = _mm_madd_epi16(rgb_a_b, y_table);

191

192 __m128i rgb_c_d = _mm_unpacklo_epi8(rgb_row_1, zero_1);

193 rgb_c_d = _mm_madd_epi16(rgb_c_d, y_table);

194

195 // Do a crazh shuffle so that we get:

196 // v------------ Multiply Add

197 // BG: a b c d

198 // A0: a b c d

199 __m128i bg_abcd = _mm_castps_si128(

200 _mm_shuffle_ps(

201 _mm_castsi128_ps(rgb_c_d),

202 _mm_castsi128_ps(rgb_a_b),

203 (3 << 6) \| (1 << 4) \| (3 << 2) \| 1));

204 __m128i r_abcd = _mm_castps_si128(

205 _mm_shuffle_ps(

206 _mm_castsi128_ps(rgb_c_d),

207 _mm_castsi128_ps(rgb_a_b),

208 (2 << 6) \| (2 << 2)));

209 __m128i y_abcd = _mm_add_epi32(bg_abcd, r_abcd);

210

211 // Down shift back to 8bits range.

212 __m128i y_offset = _mm_load_si128(

213 reinterpret_cast<const __m128i*>(kYOffset));

214 y_abcd = _mm_srai_epi32(y_abcd, FIX_SHIFT);

215 y_abcd = _mm_add_epi32(y_abcd, y_offset);

216 y_abcd = _mm_packs_epi32(y_abcd, y_abcd);

217 y_abcd = _mm_packus_epi16(y_abcd, y_abcd);

218 reinterpret_cast<uint32_t>(y_buf_1) = _mm_cvtsi128_si32(y_abcd);

219 y_buf_1 += 4;

220

221 // Second row 4 pixels.

222 __m128i rgb_row_2 = _mm_loadu_si128(

223 reinterpret_cast<const __m128i*>(rgb_buf_2));

224 __m128i zero_2 = _mm_xor_si128(rgb_row_2, rgb_row_2);

225 __m128i rgb_e_f = _mm_unpackhi_epi8(rgb_row_2, zero_2);

226 __m128i rgb_g_h = _mm_unpacklo_epi8(rgb_row_2, zero_2);

227

228 // Add two rows together.

229 __m128i rgb_ae_bf =

230 _mm_add_epi16(_mm_unpackhi_epi8(rgb_row_1, zero_2), rgb_e_f);

231 __m128i rgb_cg_dh =

232 _mm_add_epi16(_mm_unpacklo_epi8(rgb_row_1, zero_2), rgb_g_h);

233

234 // Multiply add like the previous row.

235 rgb_e_f = _mm_madd_epi16(rgb_e_f, y_table);

236 rgb_g_h = _mm_madd_epi16(rgb_g_h, y_table);

237

238 __m128i bg_efgh = _mm_castps_si128(

239 _mm_shuffle_ps(_mm_castsi128_ps(rgb_g_h),

240 _mm_castsi128_ps(rgb_e_f),

241 (3 << 6) \| (1 << 4) \| (3 << 2) \| 1));

242 __m128i r_efgh = _mm_castps_si128(

243 _mm_shuffle_ps(_mm_castsi128_ps(rgb_g_h),

244 _mm_castsi128_ps(rgb_e_f),

245 (2 << 6) \| (2 << 2)));

246 __m128i y_efgh = _mm_add_epi32(bg_efgh, r_efgh);

247 y_efgh = _mm_srai_epi32(y_efgh, FIX_SHIFT);

248 y_efgh = _mm_add_epi32(y_efgh, y_offset);

249 y_efgh = _mm_packs_epi32(y_efgh, y_efgh);

250 y_efgh = _mm_packus_epi16(y_efgh, y_efgh);

251 reinterpret_cast<uint32_t>(y_buf_2) = _mm_cvtsi128_si32(y_efgh);

252 y_buf_2 += 4;

253

254 __m128i rgb_ae_cg = _mm_castps_si128(

255 _mm_shuffle_ps(_mm_castsi128_ps(rgb_cg_dh),

256 _mm_castsi128_ps(rgb_ae_bf),

257 (3 << 6) \| (2 << 4) \| (3 << 2) \| 2));

258 __m128i rgb_bf_dh = _mm_castps_si128(

259 _mm_shuffle_ps(_mm_castsi128_ps(rgb_cg_dh),

260 _mm_castsi128_ps(rgb_ae_bf),

261 (1 << 6) \| (1 << 2)));

262

263 // This is a 2x2 subsampling for 2 pixels.

264 __m128i rgb_abef_cdgh = _mm_add_epi16(rgb_ae_cg, rgb_bf_dh);

265

266 // Do a multiply add with U table.

267 __m128i u_a_b = _mm_madd_epi16(

268 rgb_abef_cdgh,

269 _mm_load_si128(

270 reinterpret_cast<const __m128i*>(ConvertRGBAToYUV_kTable + 8)));

271 u_a_b = _mm_add_epi32(_mm_shuffle_epi32(u_a_b, ((3 << 2) \| 1)),

272 _mm_shuffle_epi32(u_a_b, (2 << 2)));

273 // Right shift 14 because of 12 from fixed point and 2 from subsampling.

274 u_a_b = _mm_srai_epi32(u_a_b, FIX_SHIFT + 2);

275 __m128i uv_offset = _mm_slli_epi32(y_offset, 3);

276 u_a_b = _mm_add_epi32(u_a_b, uv_offset);

277 u_a_b = _mm_packs_epi32(u_a_b, u_a_b);

278 u_a_b = _mm_packus_epi16(u_a_b, u_a_b);

279 reinterpret_cast<uint16_t>(u_buf) =

280 static_cast<uint16_t>(_mm_extract_epi16(u_a_b, 0));

281 u_buf += 2;

282

283 __m128i v_a_b = _mm_madd_epi16(

284 rgb_abef_cdgh,

285 _mm_load_si128(

286 reinterpret_cast<const __m128i*>(ConvertRGBAToYUV_kTable + 16)));

287 v_a_b = _mm_add_epi32(_mm_shuffle_epi32(v_a_b, ((3 << 2) \| 1)),

288 _mm_shuffle_epi32(v_a_b, (2 << 2)));

289 v_a_b = _mm_srai_epi32(v_a_b, FIX_SHIFT + 2);

290 v_a_b = _mm_add_epi32(v_a_b, uv_offset);

291 v_a_b = _mm_packs_epi32(v_a_b, v_a_b);

292 v_a_b = _mm_packus_epi16(v_a_b, v_a_b);

293 reinterpret_cast<uint16_t>(v_buf) =

294 static_cast<uint16_t>(_mm_extract_epi16(v_a_b, 0));

295 v_buf += 2;

296

297 rgb_buf_1 += 16;

298 rgb_buf_2 += 16;

299

300 // Move forward by 4 pixels.

301 width -= 4;

302 }

303

304 // Just use C code to convert the remaining pixels.

305 if (width >= 2) {

306 ConvertRGBToYUV_V2H2(rgb_buf_1, rgb_buf_2, y_buf_1, y_buf_2, u_buf, v_buf);

307 rgb_buf_1 += 8;

308 rgb_buf_2 += 8;

309 y_buf_1 += 2;

310 y_buf_2 += 2;

311 ++u_buf;

312 ++v_buf;

313 width -= 2;

314 }

315

316 if (width)

317 ConvertRGBToYUV_V2H1(rgb_buf_1, rgb_buf_2, y_buf_1, y_buf_2, u_buf, v_buf);

318 }

319

320 extern void ConvertRGB32ToYUV_SSE2(const uint8_t* rgbframe,

321 uint8_t* yplane,

322 uint8_t* uplane,

323 uint8_t* vplane,

324 int width,

325 int height,

326 int rgbstride,

327 int ystride,

328 int uvstride) {

329 while (height >= 2) {

330 ConvertRGB32ToYUVRow_SSE2(rgbframe,

331 rgbframe + rgbstride,

332 yplane,

333 yplane + ystride,

334 uplane,

335 vplane,

336 width);

337 rgbframe += 2 * rgbstride;

338 yplane += 2 * ystride;

339 uplane += uvstride;

340 vplane += uvstride;

341 height -= 2;

342 }

343

344 if (!height)

345 return;

346

347 // Handle the last row.

348 while (width >= 2) {

349 ConvertRGBToYUV_V1H2(rgbframe, yplane, uplane, vplane);

350 rgbframe += 8;

351 yplane += 2;

352 ++uplane;

353 ++vplane;

354 width -= 2;

355 }

356

357 if (width)

358 ConvertRGBToYUV_V1H1(rgbframe, yplane, uplane, vplane);

359 }

360

361 void ConvertRGB32ToYUV_SSE2_Reference(const uint8_t* rgbframe,

362 uint8_t* yplane,

363 uint8_t* uplane,

364 uint8_t* vplane,

365 int width,

366 int height,

367 int rgbstride,

368 int ystride,

369 int uvstride) {

370 while (height >= 2) {

371 int i = 0;

372

373 // Convert a 2x2 block.

374 while (i + 2 <= width) {

375 ConvertRGBToYUV_V2H2(rgbframe + i * 4,

376 rgbframe + rgbstride + i * 4,

377 yplane + i,

378 yplane + ystride + i,

379 uplane + i / 2,

380 vplane + i / 2);

381 i += 2;

382 }

383

384 // Convert the last pixel of two rows.

385 if (i < width) {

386 ConvertRGBToYUV_V2H1(rgbframe + i * 4,

387 rgbframe + rgbstride + i * 4,

388 yplane + i,

389 yplane + ystride + i,

390 uplane + i / 2,

391 vplane + i / 2);

392 }

393

394 rgbframe += 2 * rgbstride;

395 yplane += 2 * ystride;

396 uplane += uvstride;

397 vplane += uvstride;

398 height -= 2;

399 }

400

401 if (!height)

402 return;

403

404 // Handle the last row.

405 while (width >= 2) {

406 ConvertRGBToYUV_V1H2(rgbframe, yplane, uplane, vplane);

407 rgbframe += 8;

408 yplane += 2;

409 ++uplane;

410 ++vplane;

411 width -= 2;

412 }

413

414 // Handle the last pixel in the last row.

415 if (width)

416 ConvertRGBToYUV_V1H1(rgbframe, yplane, uplane, vplane);

417 }

418

419 } // namespace media

OLD	NEW

« no previous file with comments | « media/base/simd/convert_rgb_to_yuv_c.cc ('k') | media/base/simd/convert_rgb_to_yuv_ssse3.h » ('j') | no next file with comments »