| OLD | NEW |
| 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "media/base/yuv_row.h" | 5 #include "media/base/yuv_row.h" |
| 6 | 6 |
| 7 // Enable bilinear filtering by turning on the following macro. | 7 // Enable bilinear filtering by turning on the following macro. |
| 8 // #define MEDIA_BILINEAR_FILTER 1 | 8 // #define MEDIA_BILINEAR_FILTER 1 |
| 9 | 9 |
| 10 namespace media { | 10 namespace media { |
| (...skipping 228 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 239 #undef RGBY | 239 #undef RGBY |
| 240 #undef RGBU | 240 #undef RGBU |
| 241 #undef RGBV | 241 #undef RGBV |
| 242 #undef MMX_ALIGNED | 242 #undef MMX_ALIGNED |
| 243 | 243 |
| 244 // Warning C4799: function has no EMMS instruction. | 244 // Warning C4799: function has no EMMS instruction. |
| 245 // EMMS() is slow and should be called by the calling function once per image. | 245 // EMMS() is slow and should be called by the calling function once per image. |
| 246 #pragma warning(disable: 4799) | 246 #pragma warning(disable: 4799) |
| 247 | 247 |
| 248 __declspec(naked) | 248 __declspec(naked) |
| 249 void ConvertYV12ToRGB32Row(const uint8* y_buf, | 249 void FastConvertYUVToRGB32Row(const uint8* y_buf, |
| 250 const uint8* u_buf, | 250 const uint8* u_buf, |
| 251 const uint8* v_buf, | 251 const uint8* v_buf, |
| 252 uint8* rgb_buf, | 252 uint8* rgb_buf, |
| 253 int width) { | 253 int width) { |
| 254 __asm { | 254 __asm { |
| 255 pushad | 255 pushad |
| 256 mov edx, [esp + 32 + 4] // Y | 256 mov edx, [esp + 32 + 4] // Y |
| 257 mov edi, [esp + 32 + 8] // U | 257 mov edi, [esp + 32 + 8] // U |
| 258 mov esi, [esp + 32 + 12] // V | 258 mov esi, [esp + 32 + 12] // V |
| 259 mov ebp, [esp + 32 + 16] // rgb | 259 mov ebp, [esp + 32 + 16] // rgb |
| 260 mov ecx, [esp + 32 + 20] // width | 260 mov ecx, [esp + 32 + 20] // width |
| 261 shr ecx, 1 | 261 jmp wend |
| 262 | 262 |
| 263 wloop : | 263 wloop : |
| 264 movzx eax, byte ptr [edi] // NOLINT | 264 movzx eax, byte ptr [edi] |
| 265 add edi, 1 | 265 add edi, 1 |
| 266 movzx ebx, byte ptr [esi] // NOLINT | 266 movzx ebx, byte ptr [esi] |
| 267 add esi, 1 | 267 add esi, 1 |
| 268 movq mm0, [coefficients_RGB_U + 8 * eax] | 268 movq mm0, [coefficients_RGB_U + 8 * eax] |
| 269 movzx eax, byte ptr [edx] // NOLINT | 269 movzx eax, byte ptr [edx] |
| 270 paddsw mm0, [coefficients_RGB_V + 8 * ebx] | 270 paddsw mm0, [coefficients_RGB_V + 8 * ebx] |
| 271 movzx ebx, byte ptr [edx + 1] // NOLINT | 271 movzx ebx, byte ptr [edx + 1] |
| 272 movq mm1, [coefficients_RGB_Y + 8 * eax] | 272 movq mm1, [coefficients_RGB_Y + 8 * eax] |
| 273 add edx, 2 | 273 add edx, 2 |
| 274 movq mm2, [coefficients_RGB_Y + 8 * ebx] | 274 movq mm2, [coefficients_RGB_Y + 8 * ebx] |
| 275 paddsw mm1, mm0 | 275 paddsw mm1, mm0 |
| 276 paddsw mm2, mm0 | 276 paddsw mm2, mm0 |
| 277 psraw mm1, 6 | 277 psraw mm1, 6 |
| 278 psraw mm2, 6 | 278 psraw mm2, 6 |
| 279 packuswb mm1, mm2 | 279 packuswb mm1, mm2 |
| 280 movntq [ebp], mm1 // NOLINT | 280 movntq [ebp], mm1 |
| 281 add ebp, 8 | 281 add ebp, 8 |
| 282 sub ecx, 1 | 282 wend : |
| 283 jnz wloop | 283 sub ecx, 2 |
| 284 jns wloop |
| 285 |
| 286 and ecx, 1 // odd number of pixels? |
| 287 jz wdone |
| 288 |
| 289 movzx eax, byte ptr [edi] |
| 290 movq mm0, [coefficients_RGB_U + 8 * eax] |
| 291 movzx eax, byte ptr [esi] |
| 292 paddsw mm0, [coefficients_RGB_V + 8 * eax] |
| 293 movzx eax, byte ptr [edx] |
| 294 movq mm1, [coefficients_RGB_Y + 8 * eax] |
| 295 paddsw mm1, mm0 |
| 296 psraw mm1, 6 |
| 297 packuswb mm1, mm1 |
| 298 movd [ebp], mm1 |
| 299 wdone : |
| 284 | 300 |
| 285 popad | 301 popad |
| 286 ret | 302 ret |
| 287 } | 303 } |
| 288 } | 304 } |
| 289 | 305 |
| 290 __declspec(naked) | 306 __declspec(naked) |
| 291 void HalfYV12ToRGB32Row(const uint8* y_buf, | 307 void ConvertYUVToRGB32Row(const uint8* y_buf, |
| 292 const uint8* u_buf, | 308 const uint8* u_buf, |
| 293 const uint8* v_buf, | 309 const uint8* v_buf, |
| 294 uint8* rgb_buf, | 310 uint8* rgb_buf, |
| 295 int width) { | 311 int width, |
| 312 int step) { |
| 296 __asm { | 313 __asm { |
| 297 pushad | 314 pushad |
| 298 mov edx, [esp + 32 + 4] // Y | 315 mov edx, [esp + 32 + 4] // Y |
| 299 mov edi, [esp + 32 + 8] // U | 316 mov edi, [esp + 32 + 8] // U |
| 300 mov esi, [esp + 32 + 12] // V | 317 mov esi, [esp + 32 + 12] // V |
| 301 mov ebp, [esp + 32 + 16] // rgb | 318 mov ebp, [esp + 32 + 16] // rgb |
| 302 mov ecx, [esp + 32 + 20] // width | 319 mov ecx, [esp + 32 + 20] // width |
| 320 mov ebx, [esp + 32 + 24] // step |
| 321 jmp wend |
| 303 | 322 |
| 304 wloop : | 323 wloop : |
| 305 movzx eax, byte ptr [edi] | 324 movzx eax, byte ptr [edi] |
| 306 add edi, 1 | 325 add edi, ebx |
| 307 movzx ebx, byte ptr [esi] | |
| 308 add esi, 1 | |
| 309 movq mm0, [coefficients_RGB_U + 8 * eax] | 326 movq mm0, [coefficients_RGB_U + 8 * eax] |
| 327 movzx eax, byte ptr [esi] |
| 328 add esi, ebx |
| 329 paddsw mm0, [coefficients_RGB_V + 8 * eax] |
| 310 movzx eax, byte ptr [edx] | 330 movzx eax, byte ptr [edx] |
| 311 paddsw mm0, [coefficients_RGB_V + 8 * ebx] | 331 add edx, ebx |
| 312 #if MEDIA_BILINEAR_FILTER | 332 movq mm1, [coefficients_RGB_Y + 8 * eax] |
| 313 movzx ebx, byte ptr [edx + 1] | 333 movzx eax, byte ptr [edx] |
| 314 add ebx, eax | 334 add edx, ebx |
| 315 shr ebx, 1 | 335 movq mm2, [coefficients_RGB_Y + 8 * eax] |
| 316 #endif | 336 paddsw mm1, mm0 |
| 317 paddsw mm0, [coefficients_RGB_Y + 8 * eax] | 337 paddsw mm2, mm0 |
| 318 add edx, 2 | 338 psraw mm1, 6 |
| 319 psraw mm0, 6 | 339 psraw mm2, 6 |
| 320 packuswb mm0, mm0 | 340 packuswb mm1, mm2 |
| 321 movd [ebp], mm0 | 341 movntq [ebp], mm1 |
| 322 add ebp, 4 | 342 add ebp, 8 |
| 323 sub ecx, 1 | 343 wend : |
| 324 jnz wloop | 344 sub ecx, 2 |
| 345 jns wloop |
| 346 |
| 347 and ecx, 1 // odd number of pixels? |
| 348 jz wdone |
| 349 |
| 350 movzx eax, byte ptr [edi] |
| 351 movq mm0, [coefficients_RGB_U + 8 * eax] |
| 352 movzx eax, byte ptr [esi] |
| 353 paddsw mm0, [coefficients_RGB_V + 8 * eax] |
| 354 movzx eax, byte ptr [edx] |
| 355 movq mm1, [coefficients_RGB_Y + 8 * eax] |
| 356 paddsw mm1, mm0 |
| 357 psraw mm1, 6 |
| 358 packuswb mm1, mm1 |
| 359 movd [ebp], mm1 |
| 360 wdone : |
| 325 | 361 |
| 326 popad | 362 popad |
| 327 ret | 363 ret |
| 328 } | 364 } |
| 329 } | 365 } |
| 330 | 366 |
| 331 __declspec(naked) | 367 __declspec(naked) |
| 332 void ScaleYV12ToRGB32Row(const uint8* y_buf, | 368 void RotateConvertYUVToRGB32Row(const uint8* y_buf, |
| 369 const uint8* u_buf, |
| 370 const uint8* v_buf, |
| 371 uint8* rgb_buf, |
| 372 int width, |
| 373 int ystep, |
| 374 int uvstep) { |
| 375 __asm { |
| 376 pushad |
| 377 mov edx, [esp + 32 + 4] // Y |
| 378 mov edi, [esp + 32 + 8] // U |
| 379 mov esi, [esp + 32 + 12] // V |
| 380 mov ebp, [esp + 32 + 16] // rgb |
| 381 mov ecx, [esp + 32 + 20] // width |
| 382 jmp wend |
| 383 |
| 384 wloop : |
| 385 movzx eax, byte ptr [edi] |
| 386 mov ebx, [esp + 32 + 28] // uvstep |
| 387 add edi, ebx |
| 388 movq mm0, [coefficients_RGB_U + 8 * eax] |
| 389 movzx eax, byte ptr [esi] |
| 390 add esi, ebx |
| 391 paddsw mm0, [coefficients_RGB_V + 8 * eax] |
| 392 movzx eax, byte ptr [edx] |
| 393 mov ebx, [esp + 32 + 24] // ystep |
| 394 add edx, ebx |
| 395 movq mm1, [coefficients_RGB_Y + 8 * eax] |
| 396 movzx eax, byte ptr [edx] |
| 397 add edx, ebx |
| 398 movq mm2, [coefficients_RGB_Y + 8 * eax] |
| 399 paddsw mm1, mm0 |
| 400 paddsw mm2, mm0 |
| 401 psraw mm1, 6 |
| 402 psraw mm2, 6 |
| 403 packuswb mm1, mm2 |
| 404 movntq [ebp], mm1 |
| 405 add ebp, 8 |
| 406 wend : |
| 407 sub ecx, 2 |
| 408 jns wloop |
| 409 |
| 410 and ecx, 1 // odd number of pixels? |
| 411 jz wdone |
| 412 |
| 413 movzx eax, byte ptr [edi] |
| 414 movq mm0, [coefficients_RGB_U + 8 * eax] |
| 415 movzx eax, byte ptr [esi] |
| 416 paddsw mm0, [coefficients_RGB_V + 8 * eax] |
| 417 movzx eax, byte ptr [edx] |
| 418 movq mm1, [coefficients_RGB_Y + 8 * eax] |
| 419 paddsw mm1, mm0 |
| 420 psraw mm1, 6 |
| 421 packuswb mm1, mm1 |
| 422 movd [ebp], mm1 |
| 423 wdone : |
| 424 |
| 425 popad |
| 426 ret |
| 427 } |
| 428 } |
| 429 |
| 430 __declspec(naked) |
| 431 void DoubleYUVToRGB32Row(const uint8* y_buf, |
| 333 const uint8* u_buf, | 432 const uint8* u_buf, |
| 334 const uint8* v_buf, | 433 const uint8* v_buf, |
| 335 uint8* rgb_buf, | 434 uint8* rgb_buf, |
| 336 int width, | 435 int width) { |
| 337 int dx) { | |
| 338 __asm { | 436 __asm { |
| 339 pushad | 437 pushad |
| 340 mov edx, [esp + 32 + 4] // Y | 438 mov edx, [esp + 32 + 4] // Y |
| 341 mov edi, [esp + 32 + 8] // U | 439 mov edi, [esp + 32 + 8] // U |
| 342 mov esi, [esp + 32 + 12] // V | 440 mov esi, [esp + 32 + 12] // V |
| 343 mov ebp, [esp + 32 + 16] // rgb | 441 mov ebp, [esp + 32 + 16] // rgb |
| 344 mov ecx, [esp + 32 + 20] // width | 442 mov ecx, [esp + 32 + 20] // width |
| 345 xor eax, eax // x | 443 jmp wend |
| 346 | 444 |
| 347 wloop : | 445 wloop : |
| 348 mov ebx, eax | 446 movzx eax, byte ptr [edi] |
| 349 sar ebx, 5 | 447 add edi, 1 |
| 350 movzx ebx, byte ptr [edi + ebx] | 448 movzx ebx, byte ptr [esi] |
| 351 movq mm0, [coefficients_RGB_U + 8 * ebx] | 449 add esi, 1 |
| 352 mov ebx, eax | 450 movq mm0, [coefficients_RGB_U + 8 * eax] |
| 353 sar ebx, 5 | 451 movzx eax, byte ptr [edx] |
| 354 movzx ebx, byte ptr [esi + ebx] | |
| 355 paddsw mm0, [coefficients_RGB_V + 8 * ebx] | 452 paddsw mm0, [coefficients_RGB_V + 8 * ebx] |
| 356 mov ebx, eax | 453 movq mm1, [coefficients_RGB_Y + 8 * eax] |
| 357 sar ebx, 4 | 454 paddsw mm1, mm0 |
| 358 movzx ebx, byte ptr [edx + ebx] | 455 psraw mm1, 6 |
| 456 packuswb mm1, mm1 |
| 457 punpckldq mm1, mm1 |
| 458 movntq [ebp], mm1 |
| 459 |
| 460 movzx ebx, byte ptr [edx + 1] |
| 461 add edx, 2 |
| 359 paddsw mm0, [coefficients_RGB_Y + 8 * ebx] | 462 paddsw mm0, [coefficients_RGB_Y + 8 * ebx] |
| 360 psraw mm0, 6 | 463 psraw mm0, 6 |
| 361 packuswb mm0, mm0 | 464 packuswb mm0, mm0 |
| 362 movd [ebp], mm0 | 465 punpckldq mm0, mm0 |
| 466 movntq [ebp+8], mm0 |
| 467 add ebp, 16 |
| 468 wend : |
| 469 sub ecx, 4 |
| 470 jns wloop |
| 471 |
| 472 add ecx, 4 |
| 473 jz wdone |
| 474 |
| 475 movzx eax, byte ptr [edi] |
| 476 movq mm0, [coefficients_RGB_U + 8 * eax] |
| 477 movzx eax, byte ptr [esi] |
| 478 paddsw mm0, [coefficients_RGB_V + 8 * eax] |
| 479 movzx eax, byte ptr [edx] |
| 480 movq mm1, [coefficients_RGB_Y + 8 * eax] |
| 481 paddsw mm1, mm0 |
| 482 psraw mm1, 6 |
| 483 packuswb mm1, mm1 |
| 484 jmp wend1 |
| 485 |
| 486 wloop1 : |
| 487 movd [ebp], mm1 |
| 363 add ebp, 4 | 488 add ebp, 4 |
| 364 add eax, [esp + 32 + 24] // x += dx | 489 wend1 : |
| 365 sub ecx, 1 | 490 sub ecx, 1 |
| 366 jnz wloop | 491 jns wloop1 |
| 367 | 492 wdone : |
| 368 popad | 493 popad |
| 369 ret | 494 ret |
| 370 } | 495 } |
| 371 } | 496 } |
| 372 | 497 |
| 373 | 498 // This version does general purpose scaling by any amount, up or down. |
| 499 // The only thing it can not do it rotation by 90 or 270. |
| 500 // For performance the chroma is under sampled, reducing cost of a 3x |
| 501 // 1080p scale from 8.4 ms to 5.4 ms. |
| 374 __declspec(naked) | 502 __declspec(naked) |
| 375 void Half2Row(const uint8* in_row0, | 503 void ScaleYUVToRGB32Row(const uint8* y_buf, |
| 376 const uint8* in_row1, | 504 const uint8* u_buf, |
| 377 uint8* out_row, | 505 const uint8* v_buf, |
| 378 int out_width) { | 506 uint8* rgb_buf, |
| 507 int width, |
| 508 int dx) { |
| 379 __asm { | 509 __asm { |
| 380 pushad | 510 pushad |
| 381 mov esi, [esp + 32 + 4] // row0 | 511 mov edx, [esp + 32 + 4] // Y |
| 382 mov ebx, [esp + 32 + 8] // row1 | 512 mov edi, [esp + 32 + 8] // U |
| 383 mov edi, [esp + 32 + 12] // out | 513 mov esi, [esp + 32 + 12] // V |
| 384 mov ecx, [esp + 32 + 16] // width | 514 mov ebp, [esp + 32 + 16] // rgb |
| 515 mov ecx, [esp + 32 + 20] // width |
| 516 xor ebx, ebx // x |
| 517 jmp wend |
| 385 | 518 |
| 386 wloop : | 519 wloop : |
| 387 movzx eax, byte ptr [esi] | 520 mov eax, ebx |
| 388 movzx edx, byte ptr [esi+1] | 521 sar eax, 5 |
| 389 add esi, 2 | 522 movzx eax, byte ptr [edi + eax] |
| 390 add eax, edx | 523 movq mm0, [coefficients_RGB_U + 8 * eax] |
| 391 movzx edx, byte ptr [ebx] | 524 mov eax, ebx |
| 392 add eax, edx | 525 sar eax, 5 |
| 393 movzx edx, byte ptr [ebx+1] | 526 movzx eax, byte ptr [esi + eax] |
| 394 add eax, edx | 527 paddsw mm0, [coefficients_RGB_V + 8 * eax] |
| 395 add ebx, 2 | 528 mov eax, ebx |
| 396 shr eax, 2 | 529 add ebx, [esp + 32 + 24] // x += dx |
| 397 mov [edi], al | 530 sar eax, 4 |
| 398 add edi, 1 | 531 movzx eax, byte ptr [edx + eax] |
| 399 sub ecx, 1 | 532 movq mm1, [coefficients_RGB_Y + 8 * eax] |
| 400 jnz wloop | 533 mov eax, ebx |
| 534 add ebx, [esp + 32 + 24] // x += dx |
| 535 sar eax, 4 |
| 536 movzx eax, byte ptr [edx + eax] |
| 537 movq mm2, [coefficients_RGB_Y + 8 * eax] |
| 538 paddsw mm1, mm0 |
| 539 paddsw mm2, mm0 |
| 540 psraw mm1, 6 |
| 541 psraw mm2, 6 |
| 542 packuswb mm1, mm2 |
| 543 movntq [ebp], mm1 |
| 544 add ebp, 8 |
| 545 wend : |
| 546 sub ecx, 2 |
| 547 jns wloop |
| 548 |
| 549 and ecx, 1 // odd number of pixels? |
| 550 jz wdone |
| 551 |
| 552 mov eax, ebx |
| 553 sar eax, 5 |
| 554 movzx eax, byte ptr [edi + eax] |
| 555 movq mm0, [coefficients_RGB_U + 8 * eax] |
| 556 mov eax, ebx |
| 557 sar eax, 5 |
| 558 movzx eax, byte ptr [esi + eax] |
| 559 paddsw mm0, [coefficients_RGB_V + 8 * eax] |
| 560 mov eax, ebx |
| 561 sar eax, 4 |
| 562 movzx eax, byte ptr [edx + eax] |
| 563 movq mm1, [coefficients_RGB_Y + 8 * eax] |
| 564 mov eax, ebx |
| 565 sar eax, 4 |
| 566 movzx eax, byte ptr [edx + eax] |
| 567 movq mm2, [coefficients_RGB_Y + 8 * eax] |
| 568 paddsw mm1, mm0 |
| 569 paddsw mm2, mm0 |
| 570 psraw mm1, 6 |
| 571 psraw mm2, 6 |
| 572 packuswb mm1, mm2 |
| 573 movd [ebp], mm1 |
| 574 |
| 575 wdone : |
| 401 | 576 |
| 402 popad | 577 popad |
| 403 ret | 578 ret |
| 404 } | 579 } |
| 405 } | 580 } |
| 406 | 581 |
| 407 } // namespace media | 582 } // namespace media |
| 408 | 583 |
| OLD | NEW |