OLD | NEW |
1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "media/base/yuv_row.h" | 5 #include "media/base/yuv_row.h" |
6 | 6 |
7 #ifdef _DEBUG | |
8 #include "base/logging.h" | |
9 #else | |
10 #define DCHECK(a) | |
11 #endif | |
12 | |
13 // TODO(fbarchard): Make MMX work in DLLs. Currently only works in unittests. | |
14 // TODO(fbarchard): Do 64 bit version. | 7 // TODO(fbarchard): Do 64 bit version. |
15 | 8 |
16 extern "C" { | 9 extern "C" { |
17 #if USE_MMX | |
18 | 10 |
19 #define RGBY(i) { \ | 11 #define RGBY(i) { \ |
20 static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \ | 12 static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \ |
21 static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \ | 13 static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \ |
22 static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \ | 14 static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \ |
23 0 \ | 15 0 \ |
24 } | 16 } |
25 | 17 |
26 #define RGBU(i) { \ | 18 #define RGBU(i) { \ |
27 static_cast<int16>(2.018 * 64 * (i - 128) + 0.5), \ | 19 static_cast<int16>(2.018 * 64 * (i - 128) + 0.5), \ |
28 static_cast<int16>(-0.391 * 64 * (i - 128) + 0.5), \ | 20 static_cast<int16>(-0.391 * 64 * (i - 128) + 0.5), \ |
29 0, \ | 21 0, \ |
30 static_cast<int16>(256 * 64 - 1) \ | 22 static_cast<int16>(256 * 64 - 1) \ |
31 } | 23 } |
32 | 24 |
33 #define RGBV(i) { \ | 25 #define RGBV(i) { \ |
34 0, \ | 26 0, \ |
35 static_cast<int16>(-0.813 * 64 * (i - 128) + 0.5), \ | 27 static_cast<int16>(-0.813 * 64 * (i - 128) + 0.5), \ |
36 static_cast<int16>(1.596 * 64 * (i - 128) + 0.5), \ | 28 static_cast<int16>(1.596 * 64 * (i - 128) + 0.5), \ |
37 0 \ | 29 0 \ |
38 } | 30 } |
39 | 31 |
40 #define MMX_ALIGNED(var) var __attribute__((aligned(16))) | 32 #define MMX_ALIGNED(var) \ |
| 33 var __attribute__ ((section ("__TEXT,__text"))) __attribute__ ((aligned(16))) |
41 | 34 |
42 MMX_ALIGNED(int16 kCoefficientsRgbY[256][4]) = { | 35 MMX_ALIGNED(int16 kCoefficientsRgbY[256][4]) = { |
43 RGBY(0x00), RGBY(0x01), RGBY(0x02), RGBY(0x03), | 36 RGBY(0x00), RGBY(0x01), RGBY(0x02), RGBY(0x03), |
44 RGBY(0x04), RGBY(0x05), RGBY(0x06), RGBY(0x07), | 37 RGBY(0x04), RGBY(0x05), RGBY(0x06), RGBY(0x07), |
45 RGBY(0x08), RGBY(0x09), RGBY(0x0A), RGBY(0x0B), | 38 RGBY(0x08), RGBY(0x09), RGBY(0x0A), RGBY(0x0B), |
46 RGBY(0x0C), RGBY(0x0D), RGBY(0x0E), RGBY(0x0F), | 39 RGBY(0x0C), RGBY(0x0D), RGBY(0x0E), RGBY(0x0F), |
47 RGBY(0x10), RGBY(0x11), RGBY(0x12), RGBY(0x13), | 40 RGBY(0x10), RGBY(0x11), RGBY(0x12), RGBY(0x13), |
48 RGBY(0x14), RGBY(0x15), RGBY(0x16), RGBY(0x17), | 41 RGBY(0x14), RGBY(0x15), RGBY(0x16), RGBY(0x17), |
49 RGBY(0x18), RGBY(0x19), RGBY(0x1A), RGBY(0x1B), | 42 RGBY(0x18), RGBY(0x19), RGBY(0x1A), RGBY(0x1B), |
50 RGBY(0x1C), RGBY(0x1D), RGBY(0x1E), RGBY(0x1F), | 43 RGBY(0x1C), RGBY(0x1D), RGBY(0x1E), RGBY(0x1F), |
(...skipping 187 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
238 RGBV(0xF4), RGBV(0xF5), RGBV(0xF6), RGBV(0xF7), | 231 RGBV(0xF4), RGBV(0xF5), RGBV(0xF6), RGBV(0xF7), |
239 RGBV(0xF8), RGBV(0xF9), RGBV(0xFA), RGBV(0xFB), | 232 RGBV(0xF8), RGBV(0xF9), RGBV(0xFA), RGBV(0xFB), |
240 RGBV(0xFC), RGBV(0xFD), RGBV(0xFE), RGBV(0xFF), | 233 RGBV(0xFC), RGBV(0xFD), RGBV(0xFE), RGBV(0xFF), |
241 }; | 234 }; |
242 | 235 |
243 #undef RGBY | 236 #undef RGBY |
244 #undef RGBU | 237 #undef RGBU |
245 #undef RGBV | 238 #undef RGBV |
246 #undef MMX_ALIGNED | 239 #undef MMX_ALIGNED |
247 | 240 |
248 // TODO(fbarchard): Use the following function instead of | 241 extern void MacConvertYUVToRGB32Row(const uint8* y_buf, |
249 // pure assembly to help make code more portable to 64 bit | 242 const uint8* u_buf, |
250 // and Mac, which has different labels. | 243 const uint8* v_buf, |
251 // no-gcse eliminates the frame pointer, freeing up ebp. | 244 uint8* rgb_buf, |
252 | 245 int width, |
253 #if defined(FUTURE_64BIT_VERSION) | 246 int16 *kCoefficientsRgbY); |
254 void __attribute__((optimize("O2", "no-gcse"))) | |
255 NewFastConvertYUVToRGB32Row(const uint8* y_buf, | |
256 const uint8* u_buf, | |
257 const uint8* v_buf, | |
258 uint8* rgb_buf, | |
259 int width) { | |
260 asm( | |
261 "shr %4\n" | |
262 "1:\n" | |
263 "movzb (%1),%%eax\n" | |
264 "add $0x1,%1\n" | |
265 "movzb (%2),%%ebx\n" | |
266 "add $0x1,%2\n" | |
267 "movq kCoefficientsRgbU(,%%eax,8),%%mm0\n" | |
268 "movzb (%0),%%eax\n" | |
269 "paddsw kCoefficientsRgbV(,%%ebx,8),%%mm0\n" | |
270 "movzb 0x1(%0),%%ebx\n" | |
271 "movq kCoefficientsRgbY(,%%eax,8),%%mm1\n" | |
272 "add $0x2,%0\n" | |
273 "movq kCoefficientsRgbY(,%%ebx,8),%%mm2\n" | |
274 "paddsw %%mm0,%%mm1\n" | |
275 "paddsw %%mm0,%%mm2\n" | |
276 "psraw $0x6,%%mm1\n" | |
277 "psraw $0x6,%%mm2\n" | |
278 "packuswb %%mm2,%%mm1\n" | |
279 "movntq %%mm1,0x0(%3)\n" | |
280 "add $0x8,%3\n" | |
281 "sub $0x1,%4\n" | |
282 "jne 1b\n" | |
283 : : "r"(y_buf),"r"(u_buf),"r"(v_buf),"r"(rgb_buf),"r"(width) | |
284 : "eax","ebx"); | |
285 } | |
286 #endif | |
287 | |
288 extern void FastConvertYUVToRGB32Row(const uint8* y_buf, | |
289 const uint8* u_buf, | |
290 const uint8* v_buf, | |
291 uint8* rgb_buf, | |
292 int width); | |
293 | |
294 __asm__( | 247 __asm__( |
295 " .globl _FastConvertYUVToRGB32Row\n" | 248 " .globl _MacConvertYUVToRGB32Row\n" |
296 "_FastConvertYUVToRGB32Row:\n" | 249 "_MacConvertYUVToRGB32Row:\n" |
297 "pusha\n" | 250 "pusha\n" |
298 "mov 0x24(%esp),%edx\n" | 251 "mov 0x24(%esp),%edx\n" |
299 "mov 0x28(%esp),%edi\n" | 252 "mov 0x28(%esp),%edi\n" |
300 "mov 0x2c(%esp),%esi\n" | 253 "mov 0x2c(%esp),%esi\n" |
301 "mov 0x30(%esp),%ebp\n" | 254 "mov 0x30(%esp),%ebp\n" |
302 "mov 0x34(%esp),%ecx\n" | 255 "mov 0x38(%esp),%ecx\n" |
303 "jmp convertend\n" | |
304 | 256 |
305 "convertloop:" | 257 "jmp Lconvertend\n" |
| 258 |
| 259 "Lconvertloop:" |
306 "movzbl (%edi),%eax\n" | 260 "movzbl (%edi),%eax\n" |
307 "add $0x1,%edi\n" | 261 "add $0x1,%edi\n" |
308 "movzbl (%esi),%ebx\n" | 262 "movzbl (%esi),%ebx\n" |
309 "add $0x1,%esi\n" | 263 "add $0x1,%esi\n" |
310 "movq _kCoefficientsRgbU(,%eax,8),%mm0\n" | 264 "movq 2048(%ecx,%eax,8),%mm0\n" |
311 "movzbl (%edx),%eax\n" | 265 "movzbl (%edx),%eax\n" |
312 "paddsw _kCoefficientsRgbV(,%ebx,8),%mm0\n" | 266 "paddsw 4096(%ecx,%ebx,8),%mm0\n" |
313 "movzbl 0x1(%edx),%ebx\n" | 267 "movzbl 0x1(%edx),%ebx\n" |
314 "movq _kCoefficientsRgbY(,%eax,8),%mm1\n" | 268 "movq 0(%ecx,%eax,8),%mm1\n" |
315 "add $0x2,%edx\n" | 269 "add $0x2,%edx\n" |
316 "movq _kCoefficientsRgbY(,%ebx,8),%mm2\n" | 270 "movq 0(%ecx,%ebx,8),%mm2\n" |
317 "paddsw %mm0,%mm1\n" | 271 "paddsw %mm0,%mm1\n" |
318 "paddsw %mm0,%mm2\n" | 272 "paddsw %mm0,%mm2\n" |
319 "psraw $0x6,%mm1\n" | 273 "psraw $0x6,%mm1\n" |
320 "psraw $0x6,%mm2\n" | 274 "psraw $0x6,%mm2\n" |
321 "packuswb %mm2,%mm1\n" | 275 "packuswb %mm2,%mm1\n" |
322 "movntq %mm1,0x0(%ebp)\n" | 276 "movntq %mm1,0x0(%ebp)\n" |
323 "add $0x8,%ebp\n" | 277 "add $0x8,%ebp\n" |
324 "convertend:" | 278 "Lconvertend:" |
325 "sub $0x2,%ecx\n" | 279 "sub $0x2,0x34(%esp)\n" |
326 "jns convertloop\n" | 280 "jns Lconvertloop\n" |
327 | 281 |
328 "and $0x1,%ecx\n" | 282 "and $0x1,0x34(%esp)\n" |
329 "je convertdone\n" | 283 "je Lconvertdone\n" |
330 | 284 |
331 "movzbl (%edi),%eax\n" | 285 "movzbl (%edi),%eax\n" |
332 "movq _kCoefficientsRgbU(,%eax,8),%mm0\n" | 286 "movq 2048(%ecx,%eax,8),%mm0\n" |
333 "movzbl (%esi),%eax\n" | 287 "movzbl (%esi),%eax\n" |
334 "paddsw _kCoefficientsRgbV(,%eax,8),%mm0\n" | 288 "paddsw 4096(%ecx,%eax,8),%mm0\n" |
335 "movzbl (%edx),%eax\n" | 289 "movzbl (%edx),%eax\n" |
336 "movq _kCoefficientsRgbY(,%eax,8),%mm1\n" | 290 "movq 0(%ecx,%eax,8),%mm1\n" |
337 "paddsw %mm0,%mm1\n" | 291 "paddsw %mm0,%mm1\n" |
338 "psraw $0x6,%mm1\n" | 292 "psraw $0x6,%mm1\n" |
339 "packuswb %mm1,%mm1\n" | 293 "packuswb %mm1,%mm1\n" |
340 "movd %mm1,0x0(%ebp)\n" | 294 "movd %mm1,0x0(%ebp)\n" |
341 "convertdone:" | 295 "Lconvertdone:\n" |
342 "popa\n" | 296 "popa\n" |
343 "ret\n" | 297 "ret\n" |
344 ); | 298 ); |
345 | 299 |
| 300 void FastConvertYUVToRGB32Row(const uint8* y_buf, |
| 301 const uint8* u_buf, |
| 302 const uint8* v_buf, |
| 303 uint8* rgb_buf, |
| 304 int width) { |
| 305 MacConvertYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width, |
| 306 &kCoefficientsRgbY[0][0]); |
| 307 } |
346 | 308 |
347 extern void ScaleYUVToRGB32Row(const uint8* y_buf, | 309 extern void MacScaleYUVToRGB32Row(const uint8* y_buf, |
348 const uint8* u_buf, | 310 const uint8* u_buf, |
349 const uint8* v_buf, | 311 const uint8* v_buf, |
350 uint8* rgb_buf, | 312 uint8* rgb_buf, |
351 int width, | 313 int width, |
352 int scaled_dx); | 314 int scaled_dx, |
| 315 int16 *kCoefficientsRgbY); |
353 | 316 |
354 __asm__( | 317 __asm__( |
355 " .globl _ScaleYUVToRGB32Row\n" | 318 " .globl _MacScaleYUVToRGB32Row\n" |
356 "_ScaleYUVToRGB32Row:\n" | 319 "_MacScaleYUVToRGB32Row:\n" |
357 "pusha\n" | 320 "pusha\n" |
358 "mov 0x24(%esp),%edx\n" | 321 "mov 0x24(%esp),%edx\n" |
359 "mov 0x28(%esp),%edi\n" | 322 "mov 0x28(%esp),%edi\n" |
360 "mov 0x2c(%esp),%esi\n" | 323 "mov 0x2c(%esp),%esi\n" |
361 "mov 0x30(%esp),%ebp\n" | 324 "mov 0x30(%esp),%ebp\n" |
362 "mov 0x34(%esp),%ecx\n" | 325 "mov 0x3c(%esp),%ecx\n" |
363 "xor %ebx,%ebx\n" | 326 "xor %ebx,%ebx\n" |
364 "jmp scaleend\n" | 327 "jmp Lscaleend\n" |
365 | 328 |
366 "scaleloop:" | 329 "Lscaleloop:" |
367 "mov %ebx,%eax\n" | 330 "mov %ebx,%eax\n" |
368 "sar $0x5,%eax\n" | 331 "sar $0x5,%eax\n" |
369 "movzbl (%edi,%eax,1),%eax\n" | 332 "movzbl (%edi,%eax,1),%eax\n" |
370 "movq kCoefficientsRgbU(,%eax,8),%mm0\n" | 333 "movq 2048(%ecx,%eax,8),%mm0\n" |
371 "mov %ebx,%eax\n" | 334 "mov %ebx,%eax\n" |
372 "sar $0x5,%eax\n" | 335 "sar $0x5,%eax\n" |
373 "movzbl (%esi,%eax,1),%eax\n" | 336 "movzbl (%esi,%eax,1),%eax\n" |
374 "paddsw kCoefficientsRgbV(,%eax,8),%mm0\n" | 337 "paddsw 4096(%ecx,%eax,8),%mm0\n" |
375 "mov %ebx,%eax\n" | 338 "mov %ebx,%eax\n" |
376 "add 0x38(%esp),%ebx\n" | 339 "add 0x38(%esp),%ebx\n" |
377 "sar $0x4,%eax\n" | 340 "sar $0x4,%eax\n" |
378 "movzbl (%edx,%eax,1),%eax\n" | 341 "movzbl (%edx,%eax,1),%eax\n" |
379 "movq kCoefficientsRgbY(,%eax,8),%mm1\n" | 342 "movq 0(%ecx,%eax,8),%mm1\n" |
380 "mov %ebx,%eax\n" | 343 "mov %ebx,%eax\n" |
381 "add 0x38(%esp),%ebx\n" | 344 "add 0x38(%esp),%ebx\n" |
382 "sar $0x4,%eax\n" | 345 "sar $0x4,%eax\n" |
383 "movzbl (%edx,%eax,1),%eax\n" | 346 "movzbl (%edx,%eax,1),%eax\n" |
384 "movq kCoefficientsRgbY(,%eax,8),%mm2\n" | 347 "movq 0(%ecx,%eax,8),%mm2\n" |
385 "paddsw %mm0,%mm1\n" | 348 "paddsw %mm0,%mm1\n" |
386 "paddsw %mm0,%mm2\n" | 349 "paddsw %mm0,%mm2\n" |
387 "psraw $0x6,%mm1\n" | 350 "psraw $0x6,%mm1\n" |
388 "psraw $0x6,%mm2\n" | 351 "psraw $0x6,%mm2\n" |
389 "packuswb %mm2,%mm1\n" | 352 "packuswb %mm2,%mm1\n" |
390 "movntq %mm1,0x0(%ebp)\n" | 353 "movntq %mm1,0x0(%ebp)\n" |
391 "add $0x8,%ebp\n" | 354 "add $0x8,%ebp\n" |
392 "scaleend:" | 355 "Lscaleend:" |
393 "sub $0x2,%ecx\n" | 356 "sub $0x2,0x34(%esp)\n" |
394 "jns scaleloop\n" | 357 "jns Lscaleloop\n" |
395 | 358 |
396 "and $0x1,%ecx\n" | 359 "and $0x1,0x34(%esp)\n" |
397 "je scaledone\n" | 360 "je Lscaledone\n" |
398 | 361 |
399 "mov %ebx,%eax\n" | 362 "mov %ebx,%eax\n" |
400 "sar $0x5,%eax\n" | 363 "sar $0x5,%eax\n" |
401 "movzbl (%edi,%eax,1),%eax\n" | 364 "movzbl (%edi,%eax,1),%eax\n" |
402 "movq kCoefficientsRgbU(,%eax,8),%mm0\n" | 365 "movq 2048(%ecx,%eax,8),%mm0\n" |
403 "mov %ebx,%eax\n" | 366 "mov %ebx,%eax\n" |
404 "sar $0x5,%eax\n" | 367 "sar $0x5,%eax\n" |
405 "movzbl (%esi,%eax,1),%eax\n" | 368 "movzbl (%esi,%eax,1),%eax\n" |
406 "paddsw kCoefficientsRgbV(,%eax,8),%mm0\n" | 369 "paddsw 4096(%ecx,%eax,8),%mm0\n" |
407 "mov %ebx,%eax\n" | 370 "mov %ebx,%eax\n" |
408 "sar $0x4,%eax\n" | 371 "sar $0x4,%eax\n" |
409 "movzbl (%edx,%eax,1),%eax\n" | 372 "movzbl (%edx,%eax,1),%eax\n" |
410 "movq kCoefficientsRgbY(,%eax,8),%mm1\n" | 373 "movq 0(%ecx,%eax,8),%mm1\n" |
411 "mov %ebx,%eax\n" | 374 "mov %ebx,%eax\n" |
412 "sar $0x4,%eax\n" | 375 "sar $0x4,%eax\n" |
413 "movzbl (%edx,%eax,1),%eax\n" | 376 "movzbl (%edx,%eax,1),%eax\n" |
414 "movq kCoefficientsRgbY(,%eax,8),%mm2\n" | 377 "movq 0(%ecx,%eax,8),%mm2\n" |
415 "paddsw %mm0,%mm1\n" | 378 "paddsw %mm0,%mm1\n" |
416 "paddsw %mm0,%mm2\n" | 379 "paddsw %mm0,%mm2\n" |
417 "psraw $0x6,%mm1\n" | 380 "psraw $0x6,%mm1\n" |
418 "psraw $0x6,%mm2\n" | 381 "psraw $0x6,%mm2\n" |
419 "packuswb %mm2,%mm1\n" | 382 "packuswb %mm2,%mm1\n" |
420 "movd %mm1,0x0(%ebp)\n" | 383 "movd %mm1,0x0(%ebp)\n" |
421 | 384 |
422 "scaledone:" | 385 "Lscaledone:" |
423 "popa\n" | 386 "popa\n" |
424 "ret\n" | 387 "ret\n" |
425 ); | 388 ); |
426 | 389 |
427 #else // USE_MMX | |
428 | 390 |
429 // Reference version of YUV converter. | |
430 static const int kClipTableSize = 256; | |
431 static const int kClipOverflow = 288; // Cb max is 535. | |
432 | |
433 static uint8 kRgbClipTable[kClipOverflow + | |
434 kClipTableSize + | |
435 kClipOverflow] = { | |
436 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 288 underflow values | |
437 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // clipped to 0. | |
438 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
439 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
440 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
441 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
442 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
443 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
444 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
445 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
446 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
447 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
448 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
449 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
450 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
451 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
452 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
453 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
454 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
455 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
456 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
457 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
458 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
459 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
460 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
461 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
462 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
463 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
464 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
465 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
466 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
467 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
468 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
469 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
470 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
471 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
472 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, // Unclipped values. | |
473 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, | |
474 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, | |
475 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, | |
476 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, | |
477 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, | |
478 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, | |
479 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, | |
480 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, | |
481 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, | |
482 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, | |
483 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, | |
484 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, | |
485 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, | |
486 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, | |
487 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, | |
488 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, | |
489 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, | |
490 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, | |
491 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F, | |
492 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, | |
493 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, | |
494 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, | |
495 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF, | |
496 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, | |
497 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, | |
498 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, | |
499 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, | |
500 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, | |
501 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, | |
502 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, | |
503 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, | |
504 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 288 overflow values | |
505 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // clipped to 255. | |
506 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
507 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
508 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
509 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
510 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
511 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
512 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
513 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
514 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
515 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
516 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
517 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
518 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
519 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
520 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
521 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
522 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
523 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
524 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
525 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
526 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
527 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
528 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
529 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
530 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
531 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
532 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
533 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
534 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
535 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
536 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
537 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
538 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
539 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
540 }; | |
541 | |
542 // Clip an rgb channel value to 0..255 range. | |
543 // Source is signed fixed point 8.8. | |
544 // Table allows for values to underflow or overflow by 128. | |
545 // Therefore source range is -128 to 384. | |
546 // Output clips to unsigned 0 to 255. | |
547 static inline uint32 clip(int32 value) { | |
548 DCHECK(((value >> 8) + kClipOverflow) >= 0); | |
549 DCHECK(((value >> 8) + kClipOverflow) < | |
550 (kClipOverflow + kClipTableSize + kClipOverflow)); | |
551 return static_cast<uint32>(kRgbClipTable[((value) >> 8) + kClipOverflow]); | |
552 } | |
553 | |
554 static inline void YuvPixel(uint8 y, | |
555 uint8 u, | |
556 uint8 v, | |
557 uint8* rgb_buf) { | |
558 int32 d = static_cast<int32>(u) - 128; | |
559 int32 e = static_cast<int32>(v) - 128; | |
560 | |
561 int32 cb = (516 * d + 128); | |
562 int32 cg = (- 100 * d - 208 * e + 128); | |
563 int32 cr = (409 * e + 128); | |
564 | |
565 int32 C298a = ((static_cast<int32>(y) - 16) * 298 + 128); | |
566 *reinterpret_cast<uint32*>(rgb_buf) = (clip(C298a + cb)) | | |
567 (clip(C298a + cg) << 8) | | |
568 (clip(C298a + cr) << 16) | | |
569 (0xff000000); | |
570 } | |
571 | |
572 void FastConvertYUVToRGB32Row(const uint8* y_buf, | |
573 const uint8* u_buf, | |
574 const uint8* v_buf, | |
575 uint8* rgb_buf, | |
576 int width) { | |
577 for (int x = 0; x < width; x += 2) { | |
578 uint8 u = u_buf[x >> 1]; | |
579 uint8 v = v_buf[x >> 1]; | |
580 uint8 y0 = y_buf[x]; | |
581 YuvPixel(y0, u, v, rgb_buf); | |
582 if ((x + 1) < width) { | |
583 uint8 y1 = y_buf[x + 1]; | |
584 YuvPixel(y1, u, v, rgb_buf + 4); | |
585 } | |
586 rgb_buf += 8; // Advance 2 pixels. | |
587 } | |
588 } | |
589 | |
590 // 28.4 fixed point is used. A shift by 4 isolates the integer. | |
591 // A shift by 5 is used to further subsample the chrominence channels. | |
592 // & 15 isolates the fixed point fraction. >> 2 to get the upper 2 bits, | |
593 // for 1/4 pixel accurate interpolation. | |
594 void ScaleYUVToRGB32Row(const uint8* y_buf, | 391 void ScaleYUVToRGB32Row(const uint8* y_buf, |
595 const uint8* u_buf, | 392 const uint8* u_buf, |
596 const uint8* v_buf, | 393 const uint8* v_buf, |
597 uint8* rgb_buf, | 394 uint8* rgb_buf, |
598 int width, | 395 int width, |
599 int scaled_dx) { | 396 int scaled_dx) { |
600 int scaled_x = 0; | 397 |
601 for (int x = 0; x < width; ++x) { | 398 MacScaleYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width, scaled_dx, |
602 uint8 u = u_buf[scaled_x >> 5]; | 399 &kCoefficientsRgbY[0][0]); |
603 uint8 v = v_buf[scaled_x >> 5]; | |
604 uint8 y0 = y_buf[scaled_x >> 4]; | |
605 YuvPixel(y0, u, v, rgb_buf); | |
606 rgb_buf += 4; | |
607 scaled_x += scaled_dx; | |
608 } | |
609 } | 400 } |
610 #endif // USE_MMX | 401 |
611 } // extern "C" | 402 } // extern "C" |
612 | 403 |
OLD | NEW |