| Index: media/base/yuv_row_win.cc
|
| ===================================================================
|
| --- media/base/yuv_row_win.cc (revision 16299)
|
| +++ media/base/yuv_row_win.cc (working copy)
|
| @@ -246,11 +246,11 @@
|
| #pragma warning(disable: 4799)
|
|
|
| __declspec(naked)
|
| -void ConvertYV12ToRGB32Row(const uint8* y_buf,
|
| - const uint8* u_buf,
|
| - const uint8* v_buf,
|
| - uint8* rgb_buf,
|
| - int width) {
|
| +void FastConvertYUVToRGB32Row(const uint8* y_buf,
|
| + const uint8* u_buf,
|
| + const uint8* v_buf,
|
| + uint8* rgb_buf,
|
| + int width) {
|
| __asm {
|
| pushad
|
| mov edx, [esp + 32 + 4] // Y
|
| @@ -258,17 +258,17 @@
|
| mov esi, [esp + 32 + 12] // V
|
| mov ebp, [esp + 32 + 16] // rgb
|
| mov ecx, [esp + 32 + 20] // width
|
| - shr ecx, 1
|
| + jmp wend
|
|
|
| wloop :
|
| - movzx eax, byte ptr [edi] // NOLINT
|
| + movzx eax, byte ptr [edi]
|
| add edi, 1
|
| - movzx ebx, byte ptr [esi] // NOLINT
|
| + movzx ebx, byte ptr [esi]
|
| add esi, 1
|
| movq mm0, [coefficients_RGB_U + 8 * eax]
|
| - movzx eax, byte ptr [edx] // NOLINT
|
| + movzx eax, byte ptr [edx]
|
| paddsw mm0, [coefficients_RGB_V + 8 * ebx]
|
| - movzx ebx, byte ptr [edx + 1] // NOLINT
|
| + movzx ebx, byte ptr [edx + 1]
|
| movq mm1, [coefficients_RGB_Y + 8 * eax]
|
| add edx, 2
|
| movq mm2, [coefficients_RGB_Y + 8 * ebx]
|
| @@ -277,22 +277,39 @@
|
| psraw mm1, 6
|
| psraw mm2, 6
|
| packuswb mm1, mm2
|
| - movntq [ebp], mm1 // NOLINT
|
| + movntq [ebp], mm1
|
| add ebp, 8
|
| - sub ecx, 1
|
| - jnz wloop
|
| + wend :
|
| + sub ecx, 2
|
| + jns wloop
|
|
|
| + and ecx, 1 // odd number of pixels?
|
| + jz wdone
|
| +
|
| + movzx eax, byte ptr [edi]
|
| + movq mm0, [coefficients_RGB_U + 8 * eax]
|
| + movzx eax, byte ptr [esi]
|
| + paddsw mm0, [coefficients_RGB_V + 8 * eax]
|
| + movzx eax, byte ptr [edx]
|
| + movq mm1, [coefficients_RGB_Y + 8 * eax]
|
| + paddsw mm1, mm0
|
| + psraw mm1, 6
|
| + packuswb mm1, mm1
|
| + movd [ebp], mm1
|
| + wdone :
|
| +
|
| popad
|
| ret
|
| }
|
| }
|
|
|
| __declspec(naked)
|
| -void HalfYV12ToRGB32Row(const uint8* y_buf,
|
| - const uint8* u_buf,
|
| - const uint8* v_buf,
|
| - uint8* rgb_buf,
|
| - int width) {
|
| +void ConvertYUVToRGB32Row(const uint8* y_buf,
|
| + const uint8* u_buf,
|
| + const uint8* v_buf,
|
| + uint8* rgb_buf,
|
| + int width,
|
| + int step) {
|
| __asm {
|
| pushad
|
| mov edx, [esp + 32 + 4] // Y
|
| @@ -300,41 +317,122 @@
|
| mov esi, [esp + 32 + 12] // V
|
| mov ebp, [esp + 32 + 16] // rgb
|
| mov ecx, [esp + 32 + 20] // width
|
| + mov ebx, [esp + 32 + 24] // step
|
| + jmp wend
|
|
|
| wloop :
|
| movzx eax, byte ptr [edi]
|
| - add edi, 1
|
| - movzx ebx, byte ptr [esi]
|
| - add esi, 1
|
| + add edi, ebx
|
| movq mm0, [coefficients_RGB_U + 8 * eax]
|
| + movzx eax, byte ptr [esi]
|
| + add esi, ebx
|
| + paddsw mm0, [coefficients_RGB_V + 8 * eax]
|
| movzx eax, byte ptr [edx]
|
| - paddsw mm0, [coefficients_RGB_V + 8 * ebx]
|
| -#if MEDIA_BILINEAR_FILTER
|
| - movzx ebx, byte ptr [edx + 1]
|
| - add ebx, eax
|
| - shr ebx, 1
|
| -#endif
|
| - paddsw mm0, [coefficients_RGB_Y + 8 * eax]
|
| - add edx, 2
|
| - psraw mm0, 6
|
| - packuswb mm0, mm0
|
| - movd [ebp], mm0
|
| - add ebp, 4
|
| - sub ecx, 1
|
| - jnz wloop
|
| + add edx, ebx
|
| + movq mm1, [coefficients_RGB_Y + 8 * eax]
|
| + movzx eax, byte ptr [edx]
|
| + add edx, ebx
|
| + movq mm2, [coefficients_RGB_Y + 8 * eax]
|
| + paddsw mm1, mm0
|
| + paddsw mm2, mm0
|
| + psraw mm1, 6
|
| + psraw mm2, 6
|
| + packuswb mm1, mm2
|
| + movntq [ebp], mm1
|
| + add ebp, 8
|
| + wend :
|
| + sub ecx, 2
|
| + jns wloop
|
|
|
| + and ecx, 1 // odd number of pixels?
|
| + jz wdone
|
| +
|
| + movzx eax, byte ptr [edi]
|
| + movq mm0, [coefficients_RGB_U + 8 * eax]
|
| + movzx eax, byte ptr [esi]
|
| + paddsw mm0, [coefficients_RGB_V + 8 * eax]
|
| + movzx eax, byte ptr [edx]
|
| + movq mm1, [coefficients_RGB_Y + 8 * eax]
|
| + paddsw mm1, mm0
|
| + psraw mm1, 6
|
| + packuswb mm1, mm1
|
| + movd [ebp], mm1
|
| + wdone :
|
| +
|
| popad
|
| ret
|
| }
|
| }
|
|
|
| __declspec(naked)
|
| -void ScaleYV12ToRGB32Row(const uint8* y_buf,
|
| +void RotateConvertYUVToRGB32Row(const uint8* y_buf,
|
| + const uint8* u_buf,
|
| + const uint8* v_buf,
|
| + uint8* rgb_buf,
|
| + int width,
|
| + int ystep,
|
| + int uvstep) {
|
| + __asm {
|
| + pushad
|
| + mov edx, [esp + 32 + 4] // Y
|
| + mov edi, [esp + 32 + 8] // U
|
| + mov esi, [esp + 32 + 12] // V
|
| + mov ebp, [esp + 32 + 16] // rgb
|
| + mov ecx, [esp + 32 + 20] // width
|
| + jmp wend
|
| +
|
| + wloop :
|
| + movzx eax, byte ptr [edi]
|
| + mov ebx, [esp + 32 + 28] // uvstep
|
| + add edi, ebx
|
| + movq mm0, [coefficients_RGB_U + 8 * eax]
|
| + movzx eax, byte ptr [esi]
|
| + add esi, ebx
|
| + paddsw mm0, [coefficients_RGB_V + 8 * eax]
|
| + movzx eax, byte ptr [edx]
|
| + mov ebx, [esp + 32 + 24] // ystep
|
| + add edx, ebx
|
| + movq mm1, [coefficients_RGB_Y + 8 * eax]
|
| + movzx eax, byte ptr [edx]
|
| + add edx, ebx
|
| + movq mm2, [coefficients_RGB_Y + 8 * eax]
|
| + paddsw mm1, mm0
|
| + paddsw mm2, mm0
|
| + psraw mm1, 6
|
| + psraw mm2, 6
|
| + packuswb mm1, mm2
|
| + movntq [ebp], mm1
|
| + add ebp, 8
|
| + wend :
|
| + sub ecx, 2
|
| + jns wloop
|
| +
|
| + and ecx, 1 // odd number of pixels?
|
| + jz wdone
|
| +
|
| + movzx eax, byte ptr [edi]
|
| + movq mm0, [coefficients_RGB_U + 8 * eax]
|
| + movzx eax, byte ptr [esi]
|
| + paddsw mm0, [coefficients_RGB_V + 8 * eax]
|
| + movzx eax, byte ptr [edx]
|
| + movq mm1, [coefficients_RGB_Y + 8 * eax]
|
| + paddsw mm1, mm0
|
| + psraw mm1, 6
|
| + packuswb mm1, mm1
|
| + movd [ebp], mm1
|
| + wdone :
|
| +
|
| + popad
|
| + ret
|
| + }
|
| +}
|
| +
|
| +__declspec(naked)
|
| +void DoubleYUVToRGB32Row(const uint8* y_buf,
|
| const uint8* u_buf,
|
| const uint8* v_buf,
|
| uint8* rgb_buf,
|
| - int width,
|
| - int dx) {
|
| + int width) {
|
| __asm {
|
| pushad
|
| mov edx, [esp + 32 + 4] // Y
|
| @@ -342,63 +440,140 @@
|
| mov esi, [esp + 32 + 12] // V
|
| mov ebp, [esp + 32 + 16] // rgb
|
| mov ecx, [esp + 32 + 20] // width
|
| - xor eax, eax // x
|
| + jmp wend
|
|
|
| wloop :
|
| - mov ebx, eax
|
| - sar ebx, 5
|
| - movzx ebx, byte ptr [edi + ebx]
|
| - movq mm0, [coefficients_RGB_U + 8 * ebx]
|
| - mov ebx, eax
|
| - sar ebx, 5
|
| - movzx ebx, byte ptr [esi + ebx]
|
| + movzx eax, byte ptr [edi]
|
| + add edi, 1
|
| + movzx ebx, byte ptr [esi]
|
| + add esi, 1
|
| + movq mm0, [coefficients_RGB_U + 8 * eax]
|
| + movzx eax, byte ptr [edx]
|
| paddsw mm0, [coefficients_RGB_V + 8 * ebx]
|
| - mov ebx, eax
|
| - sar ebx, 4
|
| - movzx ebx, byte ptr [edx + ebx]
|
| + movq mm1, [coefficients_RGB_Y + 8 * eax]
|
| + paddsw mm1, mm0
|
| + psraw mm1, 6
|
| + packuswb mm1, mm1
|
| + punpckldq mm1, mm1
|
| + movntq [ebp], mm1
|
| +
|
| + movzx ebx, byte ptr [edx + 1]
|
| + add edx, 2
|
| paddsw mm0, [coefficients_RGB_Y + 8 * ebx]
|
| psraw mm0, 6
|
| packuswb mm0, mm0
|
| - movd [ebp], mm0
|
| + punpckldq mm0, mm0
|
| + movntq [ebp+8], mm0
|
| + add ebp, 16
|
| + wend :
|
| + sub ecx, 4
|
| + jns wloop
|
| +
|
| + add ecx, 4
|
| + jz wdone
|
| +
|
| + movzx eax, byte ptr [edi]
|
| + movq mm0, [coefficients_RGB_U + 8 * eax]
|
| + movzx eax, byte ptr [esi]
|
| + paddsw mm0, [coefficients_RGB_V + 8 * eax]
|
| + movzx eax, byte ptr [edx]
|
| + movq mm1, [coefficients_RGB_Y + 8 * eax]
|
| + paddsw mm1, mm0
|
| + psraw mm1, 6
|
| + packuswb mm1, mm1
|
| + jmp wend1
|
| +
|
| + wloop1 :
|
| + movd [ebp], mm1
|
| add ebp, 4
|
| - add eax, [esp + 32 + 24] // x += dx
|
| + wend1 :
|
| sub ecx, 1
|
| - jnz wloop
|
| -
|
| + jns wloop1
|
| + wdone :
|
| popad
|
| ret
|
| }
|
| }
|
|
|
| -
|
| +// This version does general purpose scaling by any amount, up or down.
|
| +// The only thing it can not do it rotation by 90 or 270.
|
| +// For performance the chroma is under sampled, reducing cost of a 3x
|
| +// 1080p scale from 8.4 ms to 5.4 ms.
|
| __declspec(naked)
|
| -void Half2Row(const uint8* in_row0,
|
| - const uint8* in_row1,
|
| - uint8* out_row,
|
| - int out_width) {
|
| +void ScaleYUVToRGB32Row(const uint8* y_buf,
|
| + const uint8* u_buf,
|
| + const uint8* v_buf,
|
| + uint8* rgb_buf,
|
| + int width,
|
| + int dx) {
|
| __asm {
|
| pushad
|
| - mov esi, [esp + 32 + 4] // row0
|
| - mov ebx, [esp + 32 + 8] // row1
|
| - mov edi, [esp + 32 + 12] // out
|
| - mov ecx, [esp + 32 + 16] // width
|
| + mov edx, [esp + 32 + 4] // Y
|
| + mov edi, [esp + 32 + 8] // U
|
| + mov esi, [esp + 32 + 12] // V
|
| + mov ebp, [esp + 32 + 16] // rgb
|
| + mov ecx, [esp + 32 + 20] // width
|
| + xor ebx, ebx // x
|
| + jmp wend
|
|
|
| wloop :
|
| - movzx eax, byte ptr [esi]
|
| - movzx edx, byte ptr [esi+1]
|
| - add esi, 2
|
| - add eax, edx
|
| - movzx edx, byte ptr [ebx]
|
| - add eax, edx
|
| - movzx edx, byte ptr [ebx+1]
|
| - add eax, edx
|
| - add ebx, 2
|
| - shr eax, 2
|
| - mov [edi], al
|
| - add edi, 1
|
| - sub ecx, 1
|
| - jnz wloop
|
| + mov eax, ebx
|
| + sar eax, 5
|
| + movzx eax, byte ptr [edi + eax]
|
| + movq mm0, [coefficients_RGB_U + 8 * eax]
|
| + mov eax, ebx
|
| + sar eax, 5
|
| + movzx eax, byte ptr [esi + eax]
|
| + paddsw mm0, [coefficients_RGB_V + 8 * eax]
|
| + mov eax, ebx
|
| + add ebx, [esp + 32 + 24] // x += dx
|
| + sar eax, 4
|
| + movzx eax, byte ptr [edx + eax]
|
| + movq mm1, [coefficients_RGB_Y + 8 * eax]
|
| + mov eax, ebx
|
| + add ebx, [esp + 32 + 24] // x += dx
|
| + sar eax, 4
|
| + movzx eax, byte ptr [edx + eax]
|
| + movq mm2, [coefficients_RGB_Y + 8 * eax]
|
| + paddsw mm1, mm0
|
| + paddsw mm2, mm0
|
| + psraw mm1, 6
|
| + psraw mm2, 6
|
| + packuswb mm1, mm2
|
| + movntq [ebp], mm1
|
| + add ebp, 8
|
| + wend :
|
| + sub ecx, 2
|
| + jns wloop
|
|
|
| + and ecx, 1 // odd number of pixels?
|
| + jz wdone
|
| +
|
| + mov eax, ebx
|
| + sar eax, 5
|
| + movzx eax, byte ptr [edi + eax]
|
| + movq mm0, [coefficients_RGB_U + 8 * eax]
|
| + mov eax, ebx
|
| + sar eax, 5
|
| + movzx eax, byte ptr [esi + eax]
|
| + paddsw mm0, [coefficients_RGB_V + 8 * eax]
|
| + mov eax, ebx
|
| + sar eax, 4
|
| + movzx eax, byte ptr [edx + eax]
|
| + movq mm1, [coefficients_RGB_Y + 8 * eax]
|
| + mov eax, ebx
|
| + sar eax, 4
|
| + movzx eax, byte ptr [edx + eax]
|
| + movq mm2, [coefficients_RGB_Y + 8 * eax]
|
| + paddsw mm1, mm0
|
| + paddsw mm2, mm0
|
| + psraw mm1, 6
|
| + psraw mm2, 6
|
| + packuswb mm1, mm2
|
| + movd [ebp], mm1
|
| +
|
| + wdone :
|
| +
|
| popad
|
| ret
|
| }
|
|
|