| Index: media/base/yuv_row_win.cc
 | 
| ===================================================================
 | 
| --- media/base/yuv_row_win.cc	(revision 16299)
 | 
| +++ media/base/yuv_row_win.cc	(working copy)
 | 
| @@ -246,11 +246,11 @@
 | 
|  #pragma warning(disable: 4799)
 | 
|  
 | 
|  __declspec(naked)
 | 
| -void ConvertYV12ToRGB32Row(const uint8* y_buf,
 | 
| -                           const uint8* u_buf,
 | 
| -                           const uint8* v_buf,
 | 
| -                           uint8* rgb_buf,
 | 
| -                           int width) {
 | 
| +void FastConvertYUVToRGB32Row(const uint8* y_buf,
 | 
| +                              const uint8* u_buf,
 | 
| +                              const uint8* v_buf,
 | 
| +                              uint8* rgb_buf,
 | 
| +                              int width) {
 | 
|    __asm {
 | 
|      pushad
 | 
|      mov       edx, [esp + 32 + 4]   // Y
 | 
| @@ -258,17 +258,17 @@
 | 
|      mov       esi, [esp + 32 + 12]  // V
 | 
|      mov       ebp, [esp + 32 + 16]  // rgb
 | 
|      mov       ecx, [esp + 32 + 20]  // width
 | 
| -    shr       ecx, 1
 | 
| +    jmp       wend
 | 
|  
 | 
|   wloop :
 | 
| -    movzx     eax, byte ptr [edi]  // NOLINT
 | 
| +    movzx     eax, byte ptr [edi]
 | 
|      add       edi, 1
 | 
| -    movzx     ebx, byte ptr [esi]  // NOLINT
 | 
| +    movzx     ebx, byte ptr [esi]
 | 
|      add       esi, 1
 | 
|      movq      mm0, [coefficients_RGB_U + 8 * eax]
 | 
| -    movzx     eax, byte ptr [edx]  // NOLINT
 | 
| +    movzx     eax, byte ptr [edx]
 | 
|      paddsw    mm0, [coefficients_RGB_V + 8 * ebx]
 | 
| -    movzx     ebx, byte ptr [edx + 1]  // NOLINT
 | 
| +    movzx     ebx, byte ptr [edx + 1]
 | 
|      movq      mm1, [coefficients_RGB_Y + 8 * eax]
 | 
|      add       edx, 2
 | 
|      movq      mm2, [coefficients_RGB_Y + 8 * ebx]
 | 
| @@ -277,22 +277,39 @@
 | 
|      psraw     mm1, 6
 | 
|      psraw     mm2, 6
 | 
|      packuswb  mm1, mm2
 | 
| -    movntq    [ebp], mm1  // NOLINT
 | 
| +    movntq    [ebp], mm1
 | 
|      add       ebp, 8
 | 
| -    sub       ecx, 1
 | 
| -    jnz       wloop
 | 
| + wend :
 | 
| +    sub       ecx, 2
 | 
| +    jns       wloop
 | 
|  
 | 
| +    and       ecx, 1  // odd number of pixels?
 | 
| +    jz        wdone
 | 
| +
 | 
| +    movzx     eax, byte ptr [edi]
 | 
| +    movq      mm0, [coefficients_RGB_U + 8 * eax]
 | 
| +    movzx     eax, byte ptr [esi]
 | 
| +    paddsw    mm0, [coefficients_RGB_V + 8 * eax]
 | 
| +    movzx     eax, byte ptr [edx]
 | 
| +    movq      mm1, [coefficients_RGB_Y + 8 * eax]
 | 
| +    paddsw    mm1, mm0
 | 
| +    psraw     mm1, 6
 | 
| +    packuswb  mm1, mm1
 | 
| +    movd      [ebp], mm1
 | 
| + wdone :
 | 
| +
 | 
|      popad
 | 
|      ret
 | 
|    }
 | 
|  }
 | 
|  
 | 
|  __declspec(naked)
 | 
| -void HalfYV12ToRGB32Row(const uint8* y_buf,
 | 
| -                        const uint8* u_buf,
 | 
| -                        const uint8* v_buf,
 | 
| -                        uint8* rgb_buf,
 | 
| -                        int width) {
 | 
| +void ConvertYUVToRGB32Row(const uint8* y_buf,
 | 
| +                          const uint8* u_buf,
 | 
| +                          const uint8* v_buf,
 | 
| +                          uint8* rgb_buf,
 | 
| +                          int width,
 | 
| +                          int step) {
 | 
|    __asm {
 | 
|      pushad
 | 
|      mov       edx, [esp + 32 + 4]   // Y
 | 
| @@ -300,41 +317,122 @@
 | 
|      mov       esi, [esp + 32 + 12]  // V
 | 
|      mov       ebp, [esp + 32 + 16]  // rgb
 | 
|      mov       ecx, [esp + 32 + 20]  // width
 | 
| +    mov       ebx, [esp + 32 + 24]  // step
 | 
| +    jmp       wend
 | 
|  
 | 
|   wloop :
 | 
|      movzx     eax, byte ptr [edi]
 | 
| -    add       edi, 1
 | 
| -    movzx     ebx, byte ptr [esi]
 | 
| -    add       esi, 1
 | 
| +    add       edi, ebx
 | 
|      movq      mm0, [coefficients_RGB_U + 8 * eax]
 | 
| +    movzx     eax, byte ptr [esi]
 | 
| +    add       esi, ebx
 | 
| +    paddsw    mm0, [coefficients_RGB_V + 8 * eax]
 | 
|      movzx     eax, byte ptr [edx]
 | 
| -    paddsw    mm0, [coefficients_RGB_V + 8 * ebx]
 | 
| -#if MEDIA_BILINEAR_FILTER
 | 
| -    movzx     ebx, byte ptr [edx + 1]
 | 
| -    add       ebx, eax
 | 
| -    shr       ebx, 1
 | 
| -#endif
 | 
| -    paddsw    mm0, [coefficients_RGB_Y + 8 * eax]
 | 
| -    add       edx, 2
 | 
| -    psraw     mm0, 6
 | 
| -    packuswb  mm0, mm0
 | 
| -    movd      [ebp], mm0
 | 
| -    add       ebp, 4
 | 
| -    sub       ecx, 1
 | 
| -    jnz       wloop
 | 
| +    add       edx, ebx
 | 
| +    movq      mm1, [coefficients_RGB_Y + 8 * eax]
 | 
| +    movzx     eax, byte ptr [edx]
 | 
| +    add       edx, ebx
 | 
| +    movq      mm2, [coefficients_RGB_Y + 8 * eax]
 | 
| +    paddsw    mm1, mm0
 | 
| +    paddsw    mm2, mm0
 | 
| +    psraw     mm1, 6
 | 
| +    psraw     mm2, 6
 | 
| +    packuswb  mm1, mm2
 | 
| +    movntq    [ebp], mm1
 | 
| +    add       ebp, 8
 | 
| + wend :
 | 
| +    sub       ecx, 2
 | 
| +    jns       wloop
 | 
|  
 | 
| +    and       ecx, 1  // odd number of pixels?
 | 
| +    jz        wdone
 | 
| +
 | 
| +    movzx     eax, byte ptr [edi]
 | 
| +    movq      mm0, [coefficients_RGB_U + 8 * eax]
 | 
| +    movzx     eax, byte ptr [esi]
 | 
| +    paddsw    mm0, [coefficients_RGB_V + 8 * eax]
 | 
| +    movzx     eax, byte ptr [edx]
 | 
| +    movq      mm1, [coefficients_RGB_Y + 8 * eax]
 | 
| +    paddsw    mm1, mm0
 | 
| +    psraw     mm1, 6
 | 
| +    packuswb  mm1, mm1
 | 
| +    movd      [ebp], mm1
 | 
| + wdone :
 | 
| +
 | 
|      popad
 | 
|      ret
 | 
|    }
 | 
|  }
 | 
|  
 | 
|  __declspec(naked)
 | 
| -void ScaleYV12ToRGB32Row(const uint8* y_buf,
 | 
| +void RotateConvertYUVToRGB32Row(const uint8* y_buf,
 | 
| +                                const uint8* u_buf,
 | 
| +                                const uint8* v_buf,
 | 
| +                                uint8* rgb_buf,
 | 
| +                                int width,
 | 
| +                                int ystep,
 | 
| +                                int uvstep) {
 | 
| +  __asm {
 | 
| +    pushad
 | 
| +    mov       edx, [esp + 32 + 4]   // Y
 | 
| +    mov       edi, [esp + 32 + 8]   // U
 | 
| +    mov       esi, [esp + 32 + 12]  // V
 | 
| +    mov       ebp, [esp + 32 + 16]  // rgb
 | 
| +    mov       ecx, [esp + 32 + 20]  // width
 | 
| +    jmp       wend
 | 
| +
 | 
| + wloop :
 | 
| +    movzx     eax, byte ptr [edi]
 | 
| +    mov       ebx, [esp + 32 + 28]  // uvstep
 | 
| +    add       edi, ebx
 | 
| +    movq      mm0, [coefficients_RGB_U + 8 * eax]
 | 
| +    movzx     eax, byte ptr [esi]
 | 
| +    add       esi, ebx
 | 
| +    paddsw    mm0, [coefficients_RGB_V + 8 * eax]
 | 
| +    movzx     eax, byte ptr [edx]
 | 
| +    mov       ebx, [esp + 32 + 24]  // ystep
 | 
| +    add       edx, ebx
 | 
| +    movq      mm1, [coefficients_RGB_Y + 8 * eax]
 | 
| +    movzx     eax, byte ptr [edx]
 | 
| +    add       edx, ebx
 | 
| +    movq      mm2, [coefficients_RGB_Y + 8 * eax]
 | 
| +    paddsw    mm1, mm0
 | 
| +    paddsw    mm2, mm0
 | 
| +    psraw     mm1, 6
 | 
| +    psraw     mm2, 6
 | 
| +    packuswb  mm1, mm2
 | 
| +    movntq    [ebp], mm1
 | 
| +    add       ebp, 8
 | 
| + wend :
 | 
| +    sub       ecx, 2
 | 
| +    jns       wloop
 | 
| +
 | 
| +    and       ecx, 1  // odd number of pixels?
 | 
| +    jz        wdone
 | 
| +
 | 
| +    movzx     eax, byte ptr [edi]
 | 
| +    movq      mm0, [coefficients_RGB_U + 8 * eax]
 | 
| +    movzx     eax, byte ptr [esi]
 | 
| +    paddsw    mm0, [coefficients_RGB_V + 8 * eax]
 | 
| +    movzx     eax, byte ptr [edx]
 | 
| +    movq      mm1, [coefficients_RGB_Y + 8 * eax]
 | 
| +    paddsw    mm1, mm0
 | 
| +    psraw     mm1, 6
 | 
| +    packuswb  mm1, mm1
 | 
| +    movd      [ebp], mm1
 | 
| + wdone :
 | 
| +
 | 
| +    popad
 | 
| +    ret
 | 
| +  }
 | 
| +}
 | 
| +
 | 
| +__declspec(naked)
 | 
| +void DoubleYUVToRGB32Row(const uint8* y_buf,
 | 
|                           const uint8* u_buf,
 | 
|                           const uint8* v_buf,
 | 
|                           uint8* rgb_buf,
 | 
| -                         int width,
 | 
| -                         int dx) {
 | 
| +                         int width) {
 | 
|    __asm {
 | 
|      pushad
 | 
|      mov       edx, [esp + 32 + 4]   // Y
 | 
| @@ -342,63 +440,140 @@
 | 
|      mov       esi, [esp + 32 + 12]  // V
 | 
|      mov       ebp, [esp + 32 + 16]  // rgb
 | 
|      mov       ecx, [esp + 32 + 20]  // width
 | 
| -    xor       eax, eax              // x
 | 
| +    jmp       wend
 | 
|  
 | 
|   wloop :
 | 
| -    mov       ebx, eax
 | 
| -    sar       ebx, 5
 | 
| -    movzx     ebx, byte ptr [edi + ebx]
 | 
| -    movq      mm0, [coefficients_RGB_U + 8 * ebx]
 | 
| -    mov       ebx, eax
 | 
| -    sar       ebx, 5
 | 
| -    movzx     ebx, byte ptr [esi + ebx]
 | 
| +    movzx     eax, byte ptr [edi]
 | 
| +    add       edi, 1
 | 
| +    movzx     ebx, byte ptr [esi]
 | 
| +    add       esi, 1
 | 
| +    movq      mm0, [coefficients_RGB_U + 8 * eax]
 | 
| +    movzx     eax, byte ptr [edx]
 | 
|      paddsw    mm0, [coefficients_RGB_V + 8 * ebx]
 | 
| -    mov       ebx, eax
 | 
| -    sar       ebx, 4
 | 
| -    movzx     ebx, byte ptr [edx + ebx]
 | 
| +    movq      mm1, [coefficients_RGB_Y + 8 * eax]
 | 
| +    paddsw    mm1, mm0
 | 
| +    psraw     mm1, 6
 | 
| +    packuswb  mm1, mm1
 | 
| +    punpckldq mm1, mm1
 | 
| +    movntq    [ebp], mm1
 | 
| +
 | 
| +    movzx     ebx, byte ptr [edx + 1]
 | 
| +    add       edx, 2
 | 
|      paddsw    mm0, [coefficients_RGB_Y + 8 * ebx]
 | 
|      psraw     mm0, 6
 | 
|      packuswb  mm0, mm0
 | 
| -    movd      [ebp], mm0
 | 
| +    punpckldq mm0, mm0
 | 
| +    movntq    [ebp+8], mm0
 | 
| +    add       ebp, 16
 | 
| + wend :
 | 
| +    sub       ecx, 4
 | 
| +    jns       wloop
 | 
| +
 | 
| +    add       ecx, 4
 | 
| +    jz        wdone
 | 
| +
 | 
| +    movzx     eax, byte ptr [edi]
 | 
| +    movq      mm0, [coefficients_RGB_U + 8 * eax]
 | 
| +    movzx     eax, byte ptr [esi]
 | 
| +    paddsw    mm0, [coefficients_RGB_V + 8 * eax]
 | 
| +    movzx     eax, byte ptr [edx]
 | 
| +    movq      mm1, [coefficients_RGB_Y + 8 * eax]
 | 
| +    paddsw    mm1, mm0
 | 
| +    psraw     mm1, 6
 | 
| +    packuswb  mm1, mm1
 | 
| +    jmp       wend1
 | 
| +
 | 
| + wloop1 :
 | 
| +    movd      [ebp], mm1
 | 
|      add       ebp, 4
 | 
| -    add       eax, [esp + 32 + 24]  // x += dx
 | 
| + wend1 :
 | 
|      sub       ecx, 1
 | 
| -    jnz       wloop
 | 
| -
 | 
| +    jns       wloop1
 | 
| + wdone :
 | 
|      popad
 | 
|      ret
 | 
|    }
 | 
|  }
 | 
|  
 | 
| -
 | 
| +// This version does general purpose scaling by any amount, up or down.
 | 
| +// The only thing it can not do it rotation by 90 or 270.
 | 
| +// For performance the chroma is under sampled, reducing cost of a 3x
 | 
| +// 1080p scale from 8.4 ms to 5.4 ms.
 | 
|  __declspec(naked)
 | 
| -void Half2Row(const uint8* in_row0,
 | 
| -              const uint8* in_row1,
 | 
| -              uint8* out_row,
 | 
| -              int out_width) {
 | 
| +void ScaleYUVToRGB32Row(const uint8* y_buf,
 | 
| +                        const uint8* u_buf,
 | 
| +                        const uint8* v_buf,
 | 
| +                        uint8* rgb_buf,
 | 
| +                        int width,
 | 
| +                        int dx) {
 | 
|    __asm {
 | 
|      pushad
 | 
| -    mov       esi, [esp + 32 + 4]   // row0
 | 
| -    mov       ebx, [esp + 32 + 8]   // row1
 | 
| -    mov       edi, [esp + 32 + 12]  // out
 | 
| -    mov       ecx, [esp + 32 + 16]  // width
 | 
| +    mov       edx, [esp + 32 + 4]   // Y
 | 
| +    mov       edi, [esp + 32 + 8]   // U
 | 
| +    mov       esi, [esp + 32 + 12]  // V
 | 
| +    mov       ebp, [esp + 32 + 16]  // rgb
 | 
| +    mov       ecx, [esp + 32 + 20]  // width
 | 
| +    xor       ebx, ebx              // x
 | 
| +    jmp       wend
 | 
|  
 | 
|   wloop :
 | 
| -    movzx     eax, byte ptr [esi]
 | 
| -    movzx     edx, byte ptr [esi+1]
 | 
| -    add       esi, 2
 | 
| -    add       eax, edx
 | 
| -    movzx     edx, byte ptr [ebx]
 | 
| -    add       eax, edx
 | 
| -    movzx     edx, byte ptr [ebx+1]
 | 
| -    add       eax, edx
 | 
| -    add       ebx, 2
 | 
| -    shr       eax, 2
 | 
| -    mov       [edi], al
 | 
| -    add       edi, 1
 | 
| -    sub       ecx, 1
 | 
| -    jnz       wloop
 | 
| +    mov       eax, ebx
 | 
| +    sar       eax, 5
 | 
| +    movzx     eax, byte ptr [edi + eax]
 | 
| +    movq      mm0, [coefficients_RGB_U + 8 * eax]
 | 
| +    mov       eax, ebx
 | 
| +    sar       eax, 5
 | 
| +    movzx     eax, byte ptr [esi + eax]
 | 
| +    paddsw    mm0, [coefficients_RGB_V + 8 * eax]
 | 
| +    mov       eax, ebx
 | 
| +    add       ebx, [esp + 32 + 24]  // x += dx
 | 
| +    sar       eax, 4
 | 
| +    movzx     eax, byte ptr [edx + eax]
 | 
| +    movq      mm1, [coefficients_RGB_Y + 8 * eax]
 | 
| +    mov       eax, ebx
 | 
| +    add       ebx, [esp + 32 + 24]  // x += dx
 | 
| +    sar       eax, 4
 | 
| +    movzx     eax, byte ptr [edx + eax]
 | 
| +    movq      mm2, [coefficients_RGB_Y + 8 * eax]
 | 
| +    paddsw    mm1, mm0
 | 
| +    paddsw    mm2, mm0
 | 
| +    psraw     mm1, 6
 | 
| +    psraw     mm2, 6
 | 
| +    packuswb  mm1, mm2
 | 
| +    movntq    [ebp], mm1
 | 
| +    add       ebp, 8
 | 
| + wend :
 | 
| +    sub       ecx, 2
 | 
| +    jns       wloop
 | 
|  
 | 
| +    and       ecx, 1  // odd number of pixels?
 | 
| +    jz        wdone
 | 
| +
 | 
| +    mov       eax, ebx
 | 
| +    sar       eax, 5
 | 
| +    movzx     eax, byte ptr [edi + eax]
 | 
| +    movq      mm0, [coefficients_RGB_U + 8 * eax]
 | 
| +    mov       eax, ebx
 | 
| +    sar       eax, 5
 | 
| +    movzx     eax, byte ptr [esi + eax]
 | 
| +    paddsw    mm0, [coefficients_RGB_V + 8 * eax]
 | 
| +    mov       eax, ebx
 | 
| +    sar       eax, 4
 | 
| +    movzx     eax, byte ptr [edx + eax]
 | 
| +    movq      mm1, [coefficients_RGB_Y + 8 * eax]
 | 
| +    mov       eax, ebx
 | 
| +    sar       eax, 4
 | 
| +    movzx     eax, byte ptr [edx + eax]
 | 
| +    movq      mm2, [coefficients_RGB_Y + 8 * eax]
 | 
| +    paddsw    mm1, mm0
 | 
| +    paddsw    mm2, mm0
 | 
| +    psraw     mm1, 6
 | 
| +    psraw     mm2, 6
 | 
| +    packuswb  mm1, mm2
 | 
| +    movd      [ebp], mm1
 | 
| +
 | 
| + wdone :
 | 
| +
 | 
|      popad
 | 
|      ret
 | 
|    }
 | 
| 
 |