OLD | NEW |
1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "media/base/yuv_row.h" | 5 #include "media/base/yuv_row.h" |
6 | 6 |
| 7 #ifdef _DEBUG |
| 8 #include "base/logging.h" |
| 9 #else |
| 10 #define DCHECK(a) |
| 11 #endif |
| 12 |
| 13 // TODO(fbarchard): Make MMX work in DLLs. Currently only works in unittests. |
7 // TODO(fbarchard): Do 64 bit version. | 14 // TODO(fbarchard): Do 64 bit version. |
8 | 15 |
9 extern "C" { | 16 extern "C" { |
| 17 #if USE_MMX |
10 | 18 |
11 #define RGBY(i) { \ | 19 #define RGBY(i) { \ |
12 static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \ | 20 static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \ |
13 static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \ | 21 static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \ |
14 static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \ | 22 static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \ |
15 0 \ | 23 0 \ |
16 } | 24 } |
17 | 25 |
18 #define RGBU(i) { \ | 26 #define RGBU(i) { \ |
19 static_cast<int16>(2.018 * 64 * (i - 128) + 0.5), \ | 27 static_cast<int16>(2.018 * 64 * (i - 128) + 0.5), \ |
20 static_cast<int16>(-0.391 * 64 * (i - 128) + 0.5), \ | 28 static_cast<int16>(-0.391 * 64 * (i - 128) + 0.5), \ |
21 0, \ | 29 0, \ |
22 static_cast<int16>(256 * 64 - 1) \ | 30 static_cast<int16>(256 * 64 - 1) \ |
23 } | 31 } |
24 | 32 |
25 #define RGBV(i) { \ | 33 #define RGBV(i) { \ |
26 0, \ | 34 0, \ |
27 static_cast<int16>(-0.813 * 64 * (i - 128) + 0.5), \ | 35 static_cast<int16>(-0.813 * 64 * (i - 128) + 0.5), \ |
28 static_cast<int16>(1.596 * 64 * (i - 128) + 0.5), \ | 36 static_cast<int16>(1.596 * 64 * (i - 128) + 0.5), \ |
29 0 \ | 37 0 \ |
30 } | 38 } |
31 | 39 |
32 #define MMX_ALIGNED(var) \ | 40 #define MMX_ALIGNED(var) var __attribute__((aligned(16))) |
33 var __attribute__ ((section ("__TEXT,__text"))) __attribute__ ((aligned(16))) | |
34 | 41 |
35 MMX_ALIGNED(int16 kCoefficientsRgbY[256][4]) = { | 42 MMX_ALIGNED(int16 kCoefficientsRgbY[256][4]) = { |
36 RGBY(0x00), RGBY(0x01), RGBY(0x02), RGBY(0x03), | 43 RGBY(0x00), RGBY(0x01), RGBY(0x02), RGBY(0x03), |
37 RGBY(0x04), RGBY(0x05), RGBY(0x06), RGBY(0x07), | 44 RGBY(0x04), RGBY(0x05), RGBY(0x06), RGBY(0x07), |
38 RGBY(0x08), RGBY(0x09), RGBY(0x0A), RGBY(0x0B), | 45 RGBY(0x08), RGBY(0x09), RGBY(0x0A), RGBY(0x0B), |
39 RGBY(0x0C), RGBY(0x0D), RGBY(0x0E), RGBY(0x0F), | 46 RGBY(0x0C), RGBY(0x0D), RGBY(0x0E), RGBY(0x0F), |
40 RGBY(0x10), RGBY(0x11), RGBY(0x12), RGBY(0x13), | 47 RGBY(0x10), RGBY(0x11), RGBY(0x12), RGBY(0x13), |
41 RGBY(0x14), RGBY(0x15), RGBY(0x16), RGBY(0x17), | 48 RGBY(0x14), RGBY(0x15), RGBY(0x16), RGBY(0x17), |
42 RGBY(0x18), RGBY(0x19), RGBY(0x1A), RGBY(0x1B), | 49 RGBY(0x18), RGBY(0x19), RGBY(0x1A), RGBY(0x1B), |
43 RGBY(0x1C), RGBY(0x1D), RGBY(0x1E), RGBY(0x1F), | 50 RGBY(0x1C), RGBY(0x1D), RGBY(0x1E), RGBY(0x1F), |
(...skipping 187 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
231 RGBV(0xF4), RGBV(0xF5), RGBV(0xF6), RGBV(0xF7), | 238 RGBV(0xF4), RGBV(0xF5), RGBV(0xF6), RGBV(0xF7), |
232 RGBV(0xF8), RGBV(0xF9), RGBV(0xFA), RGBV(0xFB), | 239 RGBV(0xF8), RGBV(0xF9), RGBV(0xFA), RGBV(0xFB), |
233 RGBV(0xFC), RGBV(0xFD), RGBV(0xFE), RGBV(0xFF), | 240 RGBV(0xFC), RGBV(0xFD), RGBV(0xFE), RGBV(0xFF), |
234 }; | 241 }; |
235 | 242 |
236 #undef RGBY | 243 #undef RGBY |
237 #undef RGBU | 244 #undef RGBU |
238 #undef RGBV | 245 #undef RGBV |
239 #undef MMX_ALIGNED | 246 #undef MMX_ALIGNED |
240 | 247 |
241 extern void MacConvertYUVToRGB32Row(const uint8* y_buf, | 248 // TODO(fbarchard): Use the following function instead of |
242 const uint8* u_buf, | 249 // pure assembly to help make code more portable to 64 bit |
243 const uint8* v_buf, | 250 // and Mac, which has different labels. |
244 uint8* rgb_buf, | 251 // no-gcse eliminates the frame pointer, freeing up ebp. |
245 int width, | 252 |
246 int16 *kCoefficientsRgbY); | 253 #if defined(FUTURE_64BIT_VERSION) |
| 254 void __attribute__((optimize("O2", "no-gcse"))) |
| 255 NewFastConvertYUVToRGB32Row(const uint8* y_buf, |
| 256 const uint8* u_buf, |
| 257 const uint8* v_buf, |
| 258 uint8* rgb_buf, |
| 259 int width) { |
| 260 asm( |
| 261 "shr %4\n" |
| 262 "1:\n" |
| 263 "movzb (%1),%%eax\n" |
| 264 "add $0x1,%1\n" |
| 265 "movzb (%2),%%ebx\n" |
| 266 "add $0x1,%2\n" |
| 267 "movq kCoefficientsRgbU(,%%eax,8),%%mm0\n" |
| 268 "movzb (%0),%%eax\n" |
| 269 "paddsw kCoefficientsRgbV(,%%ebx,8),%%mm0\n" |
| 270 "movzb 0x1(%0),%%ebx\n" |
| 271 "movq kCoefficientsRgbY(,%%eax,8),%%mm1\n" |
| 272 "add $0x2,%0\n" |
| 273 "movq kCoefficientsRgbY(,%%ebx,8),%%mm2\n" |
| 274 "paddsw %%mm0,%%mm1\n" |
| 275 "paddsw %%mm0,%%mm2\n" |
| 276 "psraw $0x6,%%mm1\n" |
| 277 "psraw $0x6,%%mm2\n" |
| 278 "packuswb %%mm2,%%mm1\n" |
| 279 "movntq %%mm1,0x0(%3)\n" |
| 280 "add $0x8,%3\n" |
| 281 "sub $0x1,%4\n" |
| 282 "jne 1b\n" |
| 283 : : "r"(y_buf),"r"(u_buf),"r"(v_buf),"r"(rgb_buf),"r"(width) |
| 284 : "eax","ebx"); |
| 285 } |
| 286 #endif |
| 287 |
| 288 extern void FastConvertYUVToRGB32Row(const uint8* y_buf, |
| 289 const uint8* u_buf, |
| 290 const uint8* v_buf, |
| 291 uint8* rgb_buf, |
| 292 int width); |
| 293 |
247 __asm__( | 294 __asm__( |
248 " .globl _MacConvertYUVToRGB32Row\n" | 295 " .globl _FastConvertYUVToRGB32Row\n" |
249 "_MacConvertYUVToRGB32Row:\n" | 296 "_FastConvertYUVToRGB32Row:\n" |
250 "pusha\n" | 297 "pusha\n" |
251 "mov 0x24(%esp),%edx\n" | 298 "mov 0x24(%esp),%edx\n" |
252 "mov 0x28(%esp),%edi\n" | 299 "mov 0x28(%esp),%edi\n" |
253 "mov 0x2c(%esp),%esi\n" | 300 "mov 0x2c(%esp),%esi\n" |
254 "mov 0x30(%esp),%ebp\n" | 301 "mov 0x30(%esp),%ebp\n" |
255 "mov 0x38(%esp),%ecx\n" | 302 "mov 0x34(%esp),%ecx\n" |
| 303 "jmp convertend\n" |
256 | 304 |
257 "jmp Lconvertend\n" | 305 "convertloop:" |
258 | |
259 "Lconvertloop:" | |
260 "movzbl (%edi),%eax\n" | 306 "movzbl (%edi),%eax\n" |
261 "add $0x1,%edi\n" | 307 "add $0x1,%edi\n" |
262 "movzbl (%esi),%ebx\n" | 308 "movzbl (%esi),%ebx\n" |
263 "add $0x1,%esi\n" | 309 "add $0x1,%esi\n" |
264 "movq 2048(%ecx,%eax,8),%mm0\n" | 310 "movq _kCoefficientsRgbU(,%eax,8),%mm0\n" |
265 "movzbl (%edx),%eax\n" | 311 "movzbl (%edx),%eax\n" |
266 "paddsw 4096(%ecx,%ebx,8),%mm0\n" | 312 "paddsw _kCoefficientsRgbV(,%ebx,8),%mm0\n" |
267 "movzbl 0x1(%edx),%ebx\n" | 313 "movzbl 0x1(%edx),%ebx\n" |
268 "movq 0(%ecx,%eax,8),%mm1\n" | 314 "movq _kCoefficientsRgbY(,%eax,8),%mm1\n" |
269 "add $0x2,%edx\n" | 315 "add $0x2,%edx\n" |
270 "movq 0(%ecx,%ebx,8),%mm2\n" | 316 "movq _kCoefficientsRgbY(,%ebx,8),%mm2\n" |
271 "paddsw %mm0,%mm1\n" | 317 "paddsw %mm0,%mm1\n" |
272 "paddsw %mm0,%mm2\n" | 318 "paddsw %mm0,%mm2\n" |
273 "psraw $0x6,%mm1\n" | 319 "psraw $0x6,%mm1\n" |
274 "psraw $0x6,%mm2\n" | 320 "psraw $0x6,%mm2\n" |
275 "packuswb %mm2,%mm1\n" | 321 "packuswb %mm2,%mm1\n" |
276 "movntq %mm1,0x0(%ebp)\n" | 322 "movntq %mm1,0x0(%ebp)\n" |
277 "add $0x8,%ebp\n" | 323 "add $0x8,%ebp\n" |
278 "Lconvertend:" | 324 "convertend:" |
279 "sub $0x2,0x34(%esp)\n" | 325 "sub $0x2,%ecx\n" |
280 "jns Lconvertloop\n" | 326 "jns convertloop\n" |
281 | 327 |
282 "and $0x1,0x34(%esp)\n" | 328 "and $0x1,%ecx\n" |
283 "je Lconvertdone\n" | 329 "je convertdone\n" |
284 | 330 |
285 "movzbl (%edi),%eax\n" | 331 "movzbl (%edi),%eax\n" |
286 "movq 2048(%ecx,%eax,8),%mm0\n" | 332 "movq _kCoefficientsRgbU(,%eax,8),%mm0\n" |
287 "movzbl (%esi),%eax\n" | 333 "movzbl (%esi),%eax\n" |
288 "paddsw 4096(%ecx,%eax,8),%mm0\n" | 334 "paddsw _kCoefficientsRgbV(,%eax,8),%mm0\n" |
289 "movzbl (%edx),%eax\n" | 335 "movzbl (%edx),%eax\n" |
290 "movq 0(%ecx,%eax,8),%mm1\n" | 336 "movq _kCoefficientsRgbY(,%eax,8),%mm1\n" |
291 "paddsw %mm0,%mm1\n" | 337 "paddsw %mm0,%mm1\n" |
292 "psraw $0x6,%mm1\n" | 338 "psraw $0x6,%mm1\n" |
293 "packuswb %mm1,%mm1\n" | 339 "packuswb %mm1,%mm1\n" |
294 "movd %mm1,0x0(%ebp)\n" | 340 "movd %mm1,0x0(%ebp)\n" |
295 "Lconvertdone:\n" | 341 "convertdone:" |
296 "popa\n" | 342 "popa\n" |
297 "ret\n" | 343 "ret\n" |
298 ); | 344 ); |
299 | 345 |
300 void FastConvertYUVToRGB32Row(const uint8* y_buf, | |
301 const uint8* u_buf, | |
302 const uint8* v_buf, | |
303 uint8* rgb_buf, | |
304 int width) { | |
305 MacConvertYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width, | |
306 &kCoefficientsRgbY[0][0]); | |
307 } | |
308 | 346 |
309 extern void MacScaleYUVToRGB32Row(const uint8* y_buf, | 347 extern void ScaleYUVToRGB32Row(const uint8* y_buf, |
310 const uint8* u_buf, | 348 const uint8* u_buf, |
311 const uint8* v_buf, | 349 const uint8* v_buf, |
312 uint8* rgb_buf, | 350 uint8* rgb_buf, |
313 int width, | 351 int width, |
314 int scaled_dx, | 352 int scaled_dx); |
315 int16 *kCoefficientsRgbY); | |
316 | 353 |
317 __asm__( | 354 __asm__( |
318 " .globl _MacScaleYUVToRGB32Row\n" | 355 " .globl _ScaleYUVToRGB32Row\n" |
319 "_MacScaleYUVToRGB32Row:\n" | 356 "_ScaleYUVToRGB32Row:\n" |
320 "pusha\n" | 357 "pusha\n" |
321 "mov 0x24(%esp),%edx\n" | 358 "mov 0x24(%esp),%edx\n" |
322 "mov 0x28(%esp),%edi\n" | 359 "mov 0x28(%esp),%edi\n" |
323 "mov 0x2c(%esp),%esi\n" | 360 "mov 0x2c(%esp),%esi\n" |
324 "mov 0x30(%esp),%ebp\n" | 361 "mov 0x30(%esp),%ebp\n" |
325 "mov 0x3c(%esp),%ecx\n" | 362 "mov 0x34(%esp),%ecx\n" |
326 "xor %ebx,%ebx\n" | 363 "xor %ebx,%ebx\n" |
327 "jmp Lscaleend\n" | 364 "jmp scaleend\n" |
328 | 365 |
329 "Lscaleloop:" | 366 "scaleloop:" |
330 "mov %ebx,%eax\n" | 367 "mov %ebx,%eax\n" |
331 "sar $0x5,%eax\n" | 368 "sar $0x5,%eax\n" |
332 "movzbl (%edi,%eax,1),%eax\n" | 369 "movzbl (%edi,%eax,1),%eax\n" |
333 "movq 2048(%ecx,%eax,8),%mm0\n" | 370 "movq kCoefficientsRgbU(,%eax,8),%mm0\n" |
334 "mov %ebx,%eax\n" | 371 "mov %ebx,%eax\n" |
335 "sar $0x5,%eax\n" | 372 "sar $0x5,%eax\n" |
336 "movzbl (%esi,%eax,1),%eax\n" | 373 "movzbl (%esi,%eax,1),%eax\n" |
337 "paddsw 4096(%ecx,%eax,8),%mm0\n" | 374 "paddsw kCoefficientsRgbV(,%eax,8),%mm0\n" |
338 "mov %ebx,%eax\n" | 375 "mov %ebx,%eax\n" |
339 "add 0x38(%esp),%ebx\n" | 376 "add 0x38(%esp),%ebx\n" |
340 "sar $0x4,%eax\n" | 377 "sar $0x4,%eax\n" |
341 "movzbl (%edx,%eax,1),%eax\n" | 378 "movzbl (%edx,%eax,1),%eax\n" |
342 "movq 0(%ecx,%eax,8),%mm1\n" | 379 "movq kCoefficientsRgbY(,%eax,8),%mm1\n" |
343 "mov %ebx,%eax\n" | 380 "mov %ebx,%eax\n" |
344 "add 0x38(%esp),%ebx\n" | 381 "add 0x38(%esp),%ebx\n" |
345 "sar $0x4,%eax\n" | 382 "sar $0x4,%eax\n" |
346 "movzbl (%edx,%eax,1),%eax\n" | 383 "movzbl (%edx,%eax,1),%eax\n" |
347 "movq 0(%ecx,%eax,8),%mm2\n" | 384 "movq kCoefficientsRgbY(,%eax,8),%mm2\n" |
348 "paddsw %mm0,%mm1\n" | 385 "paddsw %mm0,%mm1\n" |
349 "paddsw %mm0,%mm2\n" | 386 "paddsw %mm0,%mm2\n" |
350 "psraw $0x6,%mm1\n" | 387 "psraw $0x6,%mm1\n" |
351 "psraw $0x6,%mm2\n" | 388 "psraw $0x6,%mm2\n" |
352 "packuswb %mm2,%mm1\n" | 389 "packuswb %mm2,%mm1\n" |
353 "movntq %mm1,0x0(%ebp)\n" | 390 "movntq %mm1,0x0(%ebp)\n" |
354 "add $0x8,%ebp\n" | 391 "add $0x8,%ebp\n" |
355 "Lscaleend:" | 392 "scaleend:" |
356 "sub $0x2,0x34(%esp)\n" | 393 "sub $0x2,%ecx\n" |
357 "jns Lscaleloop\n" | 394 "jns scaleloop\n" |
358 | 395 |
359 "and $0x1,0x34(%esp)\n" | 396 "and $0x1,%ecx\n" |
360 "je Lscaledone\n" | 397 "je scaledone\n" |
361 | 398 |
362 "mov %ebx,%eax\n" | 399 "mov %ebx,%eax\n" |
363 "sar $0x5,%eax\n" | 400 "sar $0x5,%eax\n" |
364 "movzbl (%edi,%eax,1),%eax\n" | 401 "movzbl (%edi,%eax,1),%eax\n" |
365 "movq 2048(%ecx,%eax,8),%mm0\n" | 402 "movq kCoefficientsRgbU(,%eax,8),%mm0\n" |
366 "mov %ebx,%eax\n" | 403 "mov %ebx,%eax\n" |
367 "sar $0x5,%eax\n" | 404 "sar $0x5,%eax\n" |
368 "movzbl (%esi,%eax,1),%eax\n" | 405 "movzbl (%esi,%eax,1),%eax\n" |
369 "paddsw 4096(%ecx,%eax,8),%mm0\n" | 406 "paddsw kCoefficientsRgbV(,%eax,8),%mm0\n" |
370 "mov %ebx,%eax\n" | 407 "mov %ebx,%eax\n" |
371 "sar $0x4,%eax\n" | 408 "sar $0x4,%eax\n" |
372 "movzbl (%edx,%eax,1),%eax\n" | 409 "movzbl (%edx,%eax,1),%eax\n" |
373 "movq 0(%ecx,%eax,8),%mm1\n" | 410 "movq kCoefficientsRgbY(,%eax,8),%mm1\n" |
374 "mov %ebx,%eax\n" | 411 "mov %ebx,%eax\n" |
375 "sar $0x4,%eax\n" | 412 "sar $0x4,%eax\n" |
376 "movzbl (%edx,%eax,1),%eax\n" | 413 "movzbl (%edx,%eax,1),%eax\n" |
377 "movq 0(%ecx,%eax,8),%mm2\n" | 414 "movq kCoefficientsRgbY(,%eax,8),%mm2\n" |
378 "paddsw %mm0,%mm1\n" | 415 "paddsw %mm0,%mm1\n" |
379 "paddsw %mm0,%mm2\n" | 416 "paddsw %mm0,%mm2\n" |
380 "psraw $0x6,%mm1\n" | 417 "psraw $0x6,%mm1\n" |
381 "psraw $0x6,%mm2\n" | 418 "psraw $0x6,%mm2\n" |
382 "packuswb %mm2,%mm1\n" | 419 "packuswb %mm2,%mm1\n" |
383 "movd %mm1,0x0(%ebp)\n" | 420 "movd %mm1,0x0(%ebp)\n" |
384 | 421 |
385 "Lscaledone:" | 422 "scaledone:" |
386 "popa\n" | 423 "popa\n" |
387 "ret\n" | 424 "ret\n" |
388 ); | 425 ); |
389 | 426 |
| 427 #else // USE_MMX |
390 | 428 |
| 429 // Reference version of YUV converter. |
| 430 static const int kClipTableSize = 256; |
| 431 static const int kClipOverflow = 288; // Cb max is 535. |
| 432 |
| 433 static uint8 kRgbClipTable[kClipOverflow + |
| 434 kClipTableSize + |
| 435 kClipOverflow] = { |
| 436 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 288 underflow values |
| 437 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // clipped to 0. |
| 438 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| 439 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| 440 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| 441 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| 442 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| 443 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| 444 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| 445 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| 446 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| 447 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| 448 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| 449 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| 450 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| 451 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| 452 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| 453 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| 454 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| 455 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| 456 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| 457 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| 458 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| 459 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| 460 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| 461 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| 462 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| 463 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| 464 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| 465 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| 466 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| 467 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| 468 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| 469 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| 470 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| 471 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| 472 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, // Unclipped values. |
| 473 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, |
| 474 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, |
| 475 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, |
| 476 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, |
| 477 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, |
| 478 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, |
| 479 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, |
| 480 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, |
| 481 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, |
| 482 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, |
| 483 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, |
| 484 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, |
| 485 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, |
| 486 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, |
| 487 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, |
| 488 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| 489 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, |
| 490 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| 491 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F, |
| 492 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, |
| 493 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, |
| 494 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, |
| 495 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF, |
| 496 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, |
| 497 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, |
| 498 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, |
| 499 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, |
| 500 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, |
| 501 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, |
| 502 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, |
| 503 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, |
| 504 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 288 overflow values |
| 505 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // clipped to 255. |
| 506 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
| 507 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
| 508 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
| 509 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
| 510 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
| 511 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
| 512 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
| 513 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
| 514 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
| 515 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
| 516 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
| 517 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
| 518 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
| 519 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
| 520 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
| 521 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
| 522 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
| 523 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
| 524 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
| 525 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
| 526 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
| 527 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
| 528 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
| 529 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
| 530 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
| 531 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
| 532 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
| 533 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
| 534 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
| 535 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
| 536 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
| 537 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
| 538 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
| 539 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
| 540 }; |
| 541 |
| 542 // Clip an rgb channel value to 0..255 range. |
| 543 // Source is signed fixed point 8.8. |
| 544 // Table allows for values to underflow or overflow by 128. |
| 545 // Therefore source range is -128 to 384. |
| 546 // Output clips to unsigned 0 to 255. |
| 547 static inline uint32 clip(int32 value) { |
| 548 DCHECK(((value >> 8) + kClipOverflow) >= 0); |
| 549 DCHECK(((value >> 8) + kClipOverflow) < |
| 550 (kClipOverflow + kClipTableSize + kClipOverflow)); |
| 551 return static_cast<uint32>(kRgbClipTable[((value) >> 8) + kClipOverflow]); |
| 552 } |
| 553 |
| 554 static inline void YuvPixel(uint8 y, |
| 555 uint8 u, |
| 556 uint8 v, |
| 557 uint8* rgb_buf) { |
| 558 int32 d = static_cast<int32>(u) - 128; |
| 559 int32 e = static_cast<int32>(v) - 128; |
| 560 |
| 561 int32 cb = (516 * d + 128); |
| 562 int32 cg = (- 100 * d - 208 * e + 128); |
| 563 int32 cr = (409 * e + 128); |
| 564 |
| 565 int32 C298a = ((static_cast<int32>(y) - 16) * 298 + 128); |
| 566 *reinterpret_cast<uint32*>(rgb_buf) = (clip(C298a + cb)) | |
| 567 (clip(C298a + cg) << 8) | |
| 568 (clip(C298a + cr) << 16) | |
| 569 (0xff000000); |
| 570 } |
| 571 |
| 572 void FastConvertYUVToRGB32Row(const uint8* y_buf, |
| 573 const uint8* u_buf, |
| 574 const uint8* v_buf, |
| 575 uint8* rgb_buf, |
| 576 int width) { |
| 577 for (int x = 0; x < width; x += 2) { |
| 578 uint8 u = u_buf[x >> 1]; |
| 579 uint8 v = v_buf[x >> 1]; |
| 580 uint8 y0 = y_buf[x]; |
| 581 YuvPixel(y0, u, v, rgb_buf); |
| 582 if ((x + 1) < width) { |
| 583 uint8 y1 = y_buf[x + 1]; |
| 584 YuvPixel(y1, u, v, rgb_buf + 4); |
| 585 } |
| 586 rgb_buf += 8; // Advance 2 pixels. |
| 587 } |
| 588 } |
| 589 |
| 590 // 28.4 fixed point is used. A shift by 4 isolates the integer. |
| 591 // A shift by 5 is used to further subsample the chrominence channels. |
| 592 // & 15 isolates the fixed point fraction. >> 2 to get the upper 2 bits, |
| 593 // for 1/4 pixel accurate interpolation. |
391 void ScaleYUVToRGB32Row(const uint8* y_buf, | 594 void ScaleYUVToRGB32Row(const uint8* y_buf, |
392 const uint8* u_buf, | 595 const uint8* u_buf, |
393 const uint8* v_buf, | 596 const uint8* v_buf, |
394 uint8* rgb_buf, | 597 uint8* rgb_buf, |
395 int width, | 598 int width, |
396 int scaled_dx) { | 599 int scaled_dx) { |
397 | 600 int scaled_x = 0; |
398 MacScaleYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width, scaled_dx, | 601 for (int x = 0; x < width; ++x) { |
399 &kCoefficientsRgbY[0][0]); | 602 uint8 u = u_buf[scaled_x >> 5]; |
| 603 uint8 v = v_buf[scaled_x >> 5]; |
| 604 uint8 y0 = y_buf[scaled_x >> 4]; |
| 605 YuvPixel(y0, u, v, rgb_buf); |
| 606 rgb_buf += 4; |
| 607 scaled_x += scaled_dx; |
| 608 } |
400 } | 609 } |
401 | 610 #endif // USE_MMX |
402 } // extern "C" | 611 } // extern "C" |
403 | 612 |
OLD | NEW |