| Index: source/libvpx/vp9/common/x86/vp9_subpixel_mmx.asm
|
| ===================================================================
|
| --- source/libvpx/vp9/common/x86/vp9_subpixel_mmx.asm (revision 177019)
|
| +++ source/libvpx/vp9/common/x86/vp9_subpixel_mmx.asm (working copy)
|
| @@ -202,438 +202,6 @@
|
| pop rbp
|
| ret
|
|
|
| -
|
| -;void bilinear_predict8x8_mmx
|
| -;(
|
| -; unsigned char *src_ptr,
|
| -; int src_pixels_per_line,
|
| -; int xoffset,
|
| -; int yoffset,
|
| -; unsigned char *dst_ptr,
|
| -; int dst_pitch
|
| -;)
|
| -global sym(vp9_bilinear_predict8x8_mmx) PRIVATE
|
| -sym(vp9_bilinear_predict8x8_mmx):
|
| - push rbp
|
| - mov rbp, rsp
|
| - SHADOW_ARGS_TO_STACK 6
|
| - GET_GOT rbx
|
| - push rsi
|
| - push rdi
|
| - ; end prolog
|
| -
|
| - ;const short *HFilter = bilinear_filters_mmx[xoffset];
|
| - ;const short *VFilter = bilinear_filters_mmx[yoffset];
|
| -
|
| - movsxd rax, dword ptr arg(2) ;xoffset
|
| - mov rdi, arg(4) ;dst_ptr ;
|
| -
|
| - shl rax, 5 ; offset * 32
|
| - lea rcx, [GLOBAL(sym(vp9_bilinear_filters_8x_mmx))]
|
| -
|
| - add rax, rcx ; HFilter
|
| - mov rsi, arg(0) ;src_ptr ;
|
| -
|
| - movsxd rdx, dword ptr arg(5) ;dst_pitch
|
| - movq mm1, [rax] ;
|
| -
|
| - movq mm2, [rax+16] ;
|
| - movsxd rax, dword ptr arg(3) ;yoffset
|
| -
|
| - pxor mm0, mm0 ;
|
| -
|
| - shl rax, 5 ; offset*32
|
| - add rax, rcx ; VFilter
|
| -
|
| - lea rcx, [rdi+rdx*8] ;
|
| - movsxd rdx, dword ptr arg(1) ;src_pixels_per_line ;
|
| -
|
| -
|
| -
|
| - ; get the first horizontal line done ;
|
| - movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
|
| - movq mm4, mm3 ; make a copy of current line
|
| -
|
| - punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06
|
| - punpckhbw mm4, mm0 ;
|
| -
|
| - pmullw mm3, mm1 ;
|
| - pmullw mm4, mm1 ;
|
| -
|
| - movq mm5, [rsi+1] ;
|
| - movq mm6, mm5 ;
|
| -
|
| - punpcklbw mm5, mm0 ;
|
| - punpckhbw mm6, mm0 ;
|
| -
|
| - pmullw mm5, mm2 ;
|
| - pmullw mm6, mm2 ;
|
| -
|
| - paddw mm3, mm5 ;
|
| - paddw mm4, mm6 ;
|
| -
|
| - paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
|
| - psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128
|
| -
|
| - paddw mm4, [GLOBAL(rd)] ;
|
| - psraw mm4, VP9_FILTER_SHIFT ;
|
| -
|
| - movq mm7, mm3 ;
|
| - packuswb mm7, mm4 ;
|
| -
|
| - add rsi, rdx ; next line
|
| -.next_row_8x8:
|
| - movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
|
| - movq mm4, mm3 ; make a copy of current line
|
| -
|
| - punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06
|
| - punpckhbw mm4, mm0 ;
|
| -
|
| - pmullw mm3, mm1 ;
|
| - pmullw mm4, mm1 ;
|
| -
|
| - movq mm5, [rsi+1] ;
|
| - movq mm6, mm5 ;
|
| -
|
| - punpcklbw mm5, mm0 ;
|
| - punpckhbw mm6, mm0 ;
|
| -
|
| - pmullw mm5, mm2 ;
|
| - pmullw mm6, mm2 ;
|
| -
|
| - paddw mm3, mm5 ;
|
| - paddw mm4, mm6 ;
|
| -
|
| - movq mm5, mm7 ;
|
| - movq mm6, mm7 ;
|
| -
|
| - punpcklbw mm5, mm0 ;
|
| - punpckhbw mm6, mm0
|
| -
|
| - pmullw mm5, [rax] ;
|
| - pmullw mm6, [rax] ;
|
| -
|
| - paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
|
| - psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128
|
| -
|
| - paddw mm4, [GLOBAL(rd)] ;
|
| - psraw mm4, VP9_FILTER_SHIFT ;
|
| -
|
| - movq mm7, mm3 ;
|
| - packuswb mm7, mm4 ;
|
| -
|
| -
|
| - pmullw mm3, [rax+16] ;
|
| - pmullw mm4, [rax+16] ;
|
| -
|
| - paddw mm3, mm5 ;
|
| - paddw mm4, mm6 ;
|
| -
|
| -
|
| - paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
|
| - psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128
|
| -
|
| - paddw mm4, [GLOBAL(rd)] ;
|
| - psraw mm4, VP9_FILTER_SHIFT ;
|
| -
|
| - packuswb mm3, mm4
|
| -
|
| - movq [rdi], mm3 ; store the results in the destination
|
| -
|
| -%if ABI_IS_32BIT
|
| - add rsi, rdx ; next line
|
| - add rdi, dword ptr arg(5) ;dst_pitch ;
|
| -%else
|
| - movsxd r8, dword ptr arg(5) ;dst_pitch
|
| - add rsi, rdx ; next line
|
| - add rdi, r8 ;dst_pitch
|
| -%endif
|
| - cmp rdi, rcx ;
|
| - jne .next_row_8x8
|
| -
|
| - ; begin epilog
|
| - pop rdi
|
| - pop rsi
|
| - RESTORE_GOT
|
| - UNSHADOW_ARGS
|
| - pop rbp
|
| - ret
|
| -
|
| -
|
| -;void bilinear_predict8x4_mmx
|
| -;(
|
| -; unsigned char *src_ptr,
|
| -; int src_pixels_per_line,
|
| -; int xoffset,
|
| -; int yoffset,
|
| -; unsigned char *dst_ptr,
|
| -; int dst_pitch
|
| -;)
|
| -global sym(vp9_bilinear_predict8x4_mmx) PRIVATE
|
| -sym(vp9_bilinear_predict8x4_mmx):
|
| - push rbp
|
| - mov rbp, rsp
|
| - SHADOW_ARGS_TO_STACK 6
|
| - GET_GOT rbx
|
| - push rsi
|
| - push rdi
|
| - ; end prolog
|
| -
|
| - ;const short *HFilter = bilinear_filters_mmx[xoffset];
|
| - ;const short *VFilter = bilinear_filters_mmx[yoffset];
|
| -
|
| - movsxd rax, dword ptr arg(2) ;xoffset
|
| - mov rdi, arg(4) ;dst_ptr ;
|
| -
|
| - lea rcx, [GLOBAL(sym(vp9_bilinear_filters_8x_mmx))]
|
| - shl rax, 5
|
| -
|
| - mov rsi, arg(0) ;src_ptr ;
|
| - add rax, rcx
|
| -
|
| - movsxd rdx, dword ptr arg(5) ;dst_pitch
|
| - movq mm1, [rax] ;
|
| -
|
| - movq mm2, [rax+16] ;
|
| - movsxd rax, dword ptr arg(3) ;yoffset
|
| -
|
| - pxor mm0, mm0 ;
|
| - shl rax, 5
|
| -
|
| - add rax, rcx
|
| - lea rcx, [rdi+rdx*4] ;
|
| -
|
| - movsxd rdx, dword ptr arg(1) ;src_pixels_per_line ;
|
| -
|
| - ; get the first horizontal line done ;
|
| - movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
|
| - movq mm4, mm3 ; make a copy of current line
|
| -
|
| - punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06
|
| - punpckhbw mm4, mm0 ;
|
| -
|
| - pmullw mm3, mm1 ;
|
| - pmullw mm4, mm1 ;
|
| -
|
| - movq mm5, [rsi+1] ;
|
| - movq mm6, mm5 ;
|
| -
|
| - punpcklbw mm5, mm0 ;
|
| - punpckhbw mm6, mm0 ;
|
| -
|
| - pmullw mm5, mm2 ;
|
| - pmullw mm6, mm2 ;
|
| -
|
| - paddw mm3, mm5 ;
|
| - paddw mm4, mm6 ;
|
| -
|
| - paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
|
| - psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128
|
| -
|
| - paddw mm4, [GLOBAL(rd)] ;
|
| - psraw mm4, VP9_FILTER_SHIFT ;
|
| -
|
| - movq mm7, mm3 ;
|
| - packuswb mm7, mm4 ;
|
| -
|
| - add rsi, rdx ; next line
|
| -.next_row_8x4:
|
| - movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
|
| - movq mm4, mm3 ; make a copy of current line
|
| -
|
| - punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06
|
| - punpckhbw mm4, mm0 ;
|
| -
|
| - pmullw mm3, mm1 ;
|
| - pmullw mm4, mm1 ;
|
| -
|
| - movq mm5, [rsi+1] ;
|
| - movq mm6, mm5 ;
|
| -
|
| - punpcklbw mm5, mm0 ;
|
| - punpckhbw mm6, mm0 ;
|
| -
|
| - pmullw mm5, mm2 ;
|
| - pmullw mm6, mm2 ;
|
| -
|
| - paddw mm3, mm5 ;
|
| - paddw mm4, mm6 ;
|
| -
|
| - movq mm5, mm7 ;
|
| - movq mm6, mm7 ;
|
| -
|
| - punpcklbw mm5, mm0 ;
|
| - punpckhbw mm6, mm0
|
| -
|
| - pmullw mm5, [rax] ;
|
| - pmullw mm6, [rax] ;
|
| -
|
| - paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
|
| - psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128
|
| -
|
| - paddw mm4, [GLOBAL(rd)] ;
|
| - psraw mm4, VP9_FILTER_SHIFT ;
|
| -
|
| - movq mm7, mm3 ;
|
| - packuswb mm7, mm4 ;
|
| -
|
| -
|
| - pmullw mm3, [rax+16] ;
|
| - pmullw mm4, [rax+16] ;
|
| -
|
| - paddw mm3, mm5 ;
|
| - paddw mm4, mm6 ;
|
| -
|
| -
|
| - paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
|
| - psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128
|
| -
|
| - paddw mm4, [GLOBAL(rd)] ;
|
| - psraw mm4, VP9_FILTER_SHIFT ;
|
| -
|
| - packuswb mm3, mm4
|
| -
|
| - movq [rdi], mm3 ; store the results in the destination
|
| -
|
| -%if ABI_IS_32BIT
|
| - add rsi, rdx ; next line
|
| - add rdi, dword ptr arg(5) ;dst_pitch ;
|
| -%else
|
| - movsxd r8, dword ptr arg(5) ;dst_pitch
|
| - add rsi, rdx ; next line
|
| - add rdi, r8
|
| -%endif
|
| - cmp rdi, rcx ;
|
| - jne .next_row_8x4
|
| -
|
| - ; begin epilog
|
| - pop rdi
|
| - pop rsi
|
| - RESTORE_GOT
|
| - UNSHADOW_ARGS
|
| - pop rbp
|
| - ret
|
| -
|
| -
|
| -;void bilinear_predict4x4_mmx
|
| -;(
|
| -; unsigned char *src_ptr,
|
| -; int src_pixels_per_line,
|
| -; int xoffset,
|
| -; int yoffset,
|
| -; unsigned char *dst_ptr,
|
| -; int dst_pitch
|
| -;)
|
| -global sym(vp9_bilinear_predict4x4_mmx) PRIVATE
|
| -sym(vp9_bilinear_predict4x4_mmx):
|
| - push rbp
|
| - mov rbp, rsp
|
| - SHADOW_ARGS_TO_STACK 6
|
| - GET_GOT rbx
|
| - push rsi
|
| - push rdi
|
| - ; end prolog
|
| -
|
| - ;const short *HFilter = bilinear_filters_mmx[xoffset];
|
| - ;const short *VFilter = bilinear_filters_mmx[yoffset];
|
| -
|
| - movsxd rax, dword ptr arg(2) ;xoffset
|
| - mov rdi, arg(4) ;dst_ptr ;
|
| -
|
| - lea rcx, [GLOBAL(sym(vp9_bilinear_filters_8x_mmx))]
|
| - shl rax, 5
|
| -
|
| - add rax, rcx ; HFilter
|
| - mov rsi, arg(0) ;src_ptr ;
|
| -
|
| - movsxd rdx, dword ptr arg(5) ;ldst_pitch
|
| - movq mm1, [rax] ;
|
| -
|
| - movq mm2, [rax+16] ;
|
| - movsxd rax, dword ptr arg(3) ;yoffset
|
| -
|
| - pxor mm0, mm0 ;
|
| - shl rax, 5
|
| -
|
| - add rax, rcx
|
| - lea rcx, [rdi+rdx*4] ;
|
| -
|
| - movsxd rdx, dword ptr arg(1) ;src_pixels_per_line ;
|
| -
|
| - ; get the first horizontal line done ;
|
| - movd mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
|
| - punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06
|
| -
|
| - pmullw mm3, mm1 ;
|
| - movd mm5, [rsi+1] ;
|
| -
|
| - punpcklbw mm5, mm0 ;
|
| - pmullw mm5, mm2 ;
|
| -
|
| - paddw mm3, mm5 ;
|
| - paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
|
| -
|
| - psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128
|
| -
|
| - movq mm7, mm3 ;
|
| - packuswb mm7, mm0 ;
|
| -
|
| - add rsi, rdx ; next line
|
| -.next_row_4x4:
|
| - movd mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
|
| - punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06
|
| -
|
| - pmullw mm3, mm1 ;
|
| - movd mm5, [rsi+1] ;
|
| -
|
| - punpcklbw mm5, mm0 ;
|
| - pmullw mm5, mm2 ;
|
| -
|
| - paddw mm3, mm5 ;
|
| -
|
| - movq mm5, mm7 ;
|
| - punpcklbw mm5, mm0 ;
|
| -
|
| - pmullw mm5, [rax] ;
|
| - paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
|
| -
|
| - psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128
|
| - movq mm7, mm3 ;
|
| -
|
| - packuswb mm7, mm0 ;
|
| -
|
| - pmullw mm3, [rax+16] ;
|
| - paddw mm3, mm5 ;
|
| -
|
| -
|
| - paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
|
| - psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128
|
| -
|
| - packuswb mm3, mm0
|
| - movd [rdi], mm3 ; store the results in the destination
|
| -
|
| -%if ABI_IS_32BIT
|
| - add rsi, rdx ; next line
|
| - add rdi, dword ptr arg(5) ;dst_pitch ;
|
| -%else
|
| - movsxd r8, dword ptr arg(5) ;dst_pitch ;
|
| - add rsi, rdx ; next line
|
| - add rdi, r8
|
| -%endif
|
| -
|
| - cmp rdi, rcx ;
|
| - jne .next_row_4x4
|
| -
|
| - ; begin epilog
|
| - pop rdi
|
| - pop rsi
|
| - RESTORE_GOT
|
| - UNSHADOW_ARGS
|
| - pop rbp
|
| - ret
|
| -
|
| -
|
| -
|
| SECTION_RODATA
|
| align 16
|
| rd:
|
| @@ -698,30 +266,3 @@
|
| times 8 dw -6
|
| times 8 dw 0
|
|
|
| -
|
| -align 16
|
| -global HIDDEN_DATA(sym(vp9_bilinear_filters_8x_mmx))
|
| -sym(vp9_bilinear_filters_8x_mmx):
|
| - times 8 dw 128
|
| - times 8 dw 0
|
| -
|
| - times 8 dw 112
|
| - times 8 dw 16
|
| -
|
| - times 8 dw 96
|
| - times 8 dw 32
|
| -
|
| - times 8 dw 80
|
| - times 8 dw 48
|
| -
|
| - times 8 dw 64
|
| - times 8 dw 64
|
| -
|
| - times 8 dw 48
|
| - times 8 dw 80
|
| -
|
| - times 8 dw 32
|
| - times 8 dw 96
|
| -
|
| - times 8 dw 16
|
| - times 8 dw 112
|
|
|