Index: source/libvpx/vp9/common/x86/vp9_subpixel_mmx.asm |
=================================================================== |
--- source/libvpx/vp9/common/x86/vp9_subpixel_mmx.asm (revision 177019) |
+++ source/libvpx/vp9/common/x86/vp9_subpixel_mmx.asm (working copy) |
@@ -202,438 +202,6 @@ |
pop rbp |
ret |
- |
-;void bilinear_predict8x8_mmx |
-;( |
-; unsigned char *src_ptr, |
-; int src_pixels_per_line, |
-; int xoffset, |
-; int yoffset, |
-; unsigned char *dst_ptr, |
-; int dst_pitch |
-;) |
-global sym(vp9_bilinear_predict8x8_mmx) PRIVATE |
-sym(vp9_bilinear_predict8x8_mmx): |
- push rbp |
- mov rbp, rsp |
- SHADOW_ARGS_TO_STACK 6 |
- GET_GOT rbx |
- push rsi |
- push rdi |
- ; end prolog |
- |
- ;const short *HFilter = bilinear_filters_mmx[xoffset]; |
- ;const short *VFilter = bilinear_filters_mmx[yoffset]; |
- |
- movsxd rax, dword ptr arg(2) ;xoffset |
- mov rdi, arg(4) ;dst_ptr ; |
- |
- shl rax, 5 ; offset * 32 |
- lea rcx, [GLOBAL(sym(vp9_bilinear_filters_8x_mmx))] |
- |
- add rax, rcx ; HFilter |
- mov rsi, arg(0) ;src_ptr ; |
- |
- movsxd rdx, dword ptr arg(5) ;dst_pitch |
- movq mm1, [rax] ; |
- |
- movq mm2, [rax+16] ; |
- movsxd rax, dword ptr arg(3) ;yoffset |
- |
- pxor mm0, mm0 ; |
- |
- shl rax, 5 ; offset*32 |
- add rax, rcx ; VFilter |
- |
- lea rcx, [rdi+rdx*8] ; |
- movsxd rdx, dword ptr arg(1) ;src_pixels_per_line ; |
- |
- |
- |
- ; get the first horizontal line done ; |
- movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 |
- movq mm4, mm3 ; make a copy of current line |
- |
- punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06 |
- punpckhbw mm4, mm0 ; |
- |
- pmullw mm3, mm1 ; |
- pmullw mm4, mm1 ; |
- |
- movq mm5, [rsi+1] ; |
- movq mm6, mm5 ; |
- |
- punpcklbw mm5, mm0 ; |
- punpckhbw mm6, mm0 ; |
- |
- pmullw mm5, mm2 ; |
- pmullw mm6, mm2 ; |
- |
- paddw mm3, mm5 ; |
- paddw mm4, mm6 ; |
- |
- paddw mm3, [GLOBAL(rd)] ; xmm3 += round value |
- psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128 |
- |
- paddw mm4, [GLOBAL(rd)] ; |
- psraw mm4, VP9_FILTER_SHIFT ; |
- |
- movq mm7, mm3 ; |
- packuswb mm7, mm4 ; |
- |
- add rsi, rdx ; next line |
-.next_row_8x8: |
- movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 |
- movq mm4, mm3 ; make a copy of current line |
- |
- punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06 |
- punpckhbw mm4, mm0 ; |
- |
- pmullw mm3, mm1 ; |
- pmullw mm4, mm1 ; |
- |
- movq mm5, [rsi+1] ; |
- movq mm6, mm5 ; |
- |
- punpcklbw mm5, mm0 ; |
- punpckhbw mm6, mm0 ; |
- |
- pmullw mm5, mm2 ; |
- pmullw mm6, mm2 ; |
- |
- paddw mm3, mm5 ; |
- paddw mm4, mm6 ; |
- |
- movq mm5, mm7 ; |
- movq mm6, mm7 ; |
- |
- punpcklbw mm5, mm0 ; |
- punpckhbw mm6, mm0 |
- |
- pmullw mm5, [rax] ; |
- pmullw mm6, [rax] ; |
- |
- paddw mm3, [GLOBAL(rd)] ; xmm3 += round value |
- psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128 |
- |
- paddw mm4, [GLOBAL(rd)] ; |
- psraw mm4, VP9_FILTER_SHIFT ; |
- |
- movq mm7, mm3 ; |
- packuswb mm7, mm4 ; |
- |
- |
- pmullw mm3, [rax+16] ; |
- pmullw mm4, [rax+16] ; |
- |
- paddw mm3, mm5 ; |
- paddw mm4, mm6 ; |
- |
- |
- paddw mm3, [GLOBAL(rd)] ; xmm3 += round value |
- psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128 |
- |
- paddw mm4, [GLOBAL(rd)] ; |
- psraw mm4, VP9_FILTER_SHIFT ; |
- |
- packuswb mm3, mm4 |
- |
- movq [rdi], mm3 ; store the results in the destination |
- |
-%if ABI_IS_32BIT |
- add rsi, rdx ; next line |
- add rdi, dword ptr arg(5) ;dst_pitch ; |
-%else |
- movsxd r8, dword ptr arg(5) ;dst_pitch |
- add rsi, rdx ; next line |
- add rdi, r8 ;dst_pitch |
-%endif |
- cmp rdi, rcx ; |
- jne .next_row_8x8 |
- |
- ; begin epilog |
- pop rdi |
- pop rsi |
- RESTORE_GOT |
- UNSHADOW_ARGS |
- pop rbp |
- ret |
- |
- |
-;void bilinear_predict8x4_mmx |
-;( |
-; unsigned char *src_ptr, |
-; int src_pixels_per_line, |
-; int xoffset, |
-; int yoffset, |
-; unsigned char *dst_ptr, |
-; int dst_pitch |
-;) |
-global sym(vp9_bilinear_predict8x4_mmx) PRIVATE |
-sym(vp9_bilinear_predict8x4_mmx): |
- push rbp |
- mov rbp, rsp |
- SHADOW_ARGS_TO_STACK 6 |
- GET_GOT rbx |
- push rsi |
- push rdi |
- ; end prolog |
- |
- ;const short *HFilter = bilinear_filters_mmx[xoffset]; |
- ;const short *VFilter = bilinear_filters_mmx[yoffset]; |
- |
- movsxd rax, dword ptr arg(2) ;xoffset |
- mov rdi, arg(4) ;dst_ptr ; |
- |
- lea rcx, [GLOBAL(sym(vp9_bilinear_filters_8x_mmx))] |
- shl rax, 5 |
- |
- mov rsi, arg(0) ;src_ptr ; |
- add rax, rcx |
- |
- movsxd rdx, dword ptr arg(5) ;dst_pitch |
- movq mm1, [rax] ; |
- |
- movq mm2, [rax+16] ; |
- movsxd rax, dword ptr arg(3) ;yoffset |
- |
- pxor mm0, mm0 ; |
- shl rax, 5 |
- |
- add rax, rcx |
- lea rcx, [rdi+rdx*4] ; |
- |
- movsxd rdx, dword ptr arg(1) ;src_pixels_per_line ; |
- |
- ; get the first horizontal line done ; |
- movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 |
- movq mm4, mm3 ; make a copy of current line |
- |
- punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06 |
- punpckhbw mm4, mm0 ; |
- |
- pmullw mm3, mm1 ; |
- pmullw mm4, mm1 ; |
- |
- movq mm5, [rsi+1] ; |
- movq mm6, mm5 ; |
- |
- punpcklbw mm5, mm0 ; |
- punpckhbw mm6, mm0 ; |
- |
- pmullw mm5, mm2 ; |
- pmullw mm6, mm2 ; |
- |
- paddw mm3, mm5 ; |
- paddw mm4, mm6 ; |
- |
- paddw mm3, [GLOBAL(rd)] ; xmm3 += round value |
- psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128 |
- |
- paddw mm4, [GLOBAL(rd)] ; |
- psraw mm4, VP9_FILTER_SHIFT ; |
- |
- movq mm7, mm3 ; |
- packuswb mm7, mm4 ; |
- |
- add rsi, rdx ; next line |
-.next_row_8x4: |
- movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 |
- movq mm4, mm3 ; make a copy of current line |
- |
- punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06 |
- punpckhbw mm4, mm0 ; |
- |
- pmullw mm3, mm1 ; |
- pmullw mm4, mm1 ; |
- |
- movq mm5, [rsi+1] ; |
- movq mm6, mm5 ; |
- |
- punpcklbw mm5, mm0 ; |
- punpckhbw mm6, mm0 ; |
- |
- pmullw mm5, mm2 ; |
- pmullw mm6, mm2 ; |
- |
- paddw mm3, mm5 ; |
- paddw mm4, mm6 ; |
- |
- movq mm5, mm7 ; |
- movq mm6, mm7 ; |
- |
- punpcklbw mm5, mm0 ; |
- punpckhbw mm6, mm0 |
- |
- pmullw mm5, [rax] ; |
- pmullw mm6, [rax] ; |
- |
- paddw mm3, [GLOBAL(rd)] ; xmm3 += round value |
- psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128 |
- |
- paddw mm4, [GLOBAL(rd)] ; |
- psraw mm4, VP9_FILTER_SHIFT ; |
- |
- movq mm7, mm3 ; |
- packuswb mm7, mm4 ; |
- |
- |
- pmullw mm3, [rax+16] ; |
- pmullw mm4, [rax+16] ; |
- |
- paddw mm3, mm5 ; |
- paddw mm4, mm6 ; |
- |
- |
- paddw mm3, [GLOBAL(rd)] ; xmm3 += round value |
- psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128 |
- |
- paddw mm4, [GLOBAL(rd)] ; |
- psraw mm4, VP9_FILTER_SHIFT ; |
- |
- packuswb mm3, mm4 |
- |
- movq [rdi], mm3 ; store the results in the destination |
- |
-%if ABI_IS_32BIT |
- add rsi, rdx ; next line |
- add rdi, dword ptr arg(5) ;dst_pitch ; |
-%else |
- movsxd r8, dword ptr arg(5) ;dst_pitch |
- add rsi, rdx ; next line |
- add rdi, r8 |
-%endif |
- cmp rdi, rcx ; |
- jne .next_row_8x4 |
- |
- ; begin epilog |
- pop rdi |
- pop rsi |
- RESTORE_GOT |
- UNSHADOW_ARGS |
- pop rbp |
- ret |
- |
- |
-;void bilinear_predict4x4_mmx |
-;( |
-; unsigned char *src_ptr, |
-; int src_pixels_per_line, |
-; int xoffset, |
-; int yoffset, |
-; unsigned char *dst_ptr, |
-; int dst_pitch |
-;) |
-global sym(vp9_bilinear_predict4x4_mmx) PRIVATE |
-sym(vp9_bilinear_predict4x4_mmx): |
- push rbp |
- mov rbp, rsp |
- SHADOW_ARGS_TO_STACK 6 |
- GET_GOT rbx |
- push rsi |
- push rdi |
- ; end prolog |
- |
- ;const short *HFilter = bilinear_filters_mmx[xoffset]; |
- ;const short *VFilter = bilinear_filters_mmx[yoffset]; |
- |
- movsxd rax, dword ptr arg(2) ;xoffset |
- mov rdi, arg(4) ;dst_ptr ; |
- |
- lea rcx, [GLOBAL(sym(vp9_bilinear_filters_8x_mmx))] |
- shl rax, 5 |
- |
- add rax, rcx ; HFilter |
- mov rsi, arg(0) ;src_ptr ; |
- |
- movsxd rdx, dword ptr arg(5) ;ldst_pitch |
- movq mm1, [rax] ; |
- |
- movq mm2, [rax+16] ; |
- movsxd rax, dword ptr arg(3) ;yoffset |
- |
- pxor mm0, mm0 ; |
- shl rax, 5 |
- |
- add rax, rcx |
- lea rcx, [rdi+rdx*4] ; |
- |
- movsxd rdx, dword ptr arg(1) ;src_pixels_per_line ; |
- |
- ; get the first horizontal line done ; |
- movd mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 |
- punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06 |
- |
- pmullw mm3, mm1 ; |
- movd mm5, [rsi+1] ; |
- |
- punpcklbw mm5, mm0 ; |
- pmullw mm5, mm2 ; |
- |
- paddw mm3, mm5 ; |
- paddw mm3, [GLOBAL(rd)] ; xmm3 += round value |
- |
- psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128 |
- |
- movq mm7, mm3 ; |
- packuswb mm7, mm0 ; |
- |
- add rsi, rdx ; next line |
-.next_row_4x4: |
- movd mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 |
- punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06 |
- |
- pmullw mm3, mm1 ; |
- movd mm5, [rsi+1] ; |
- |
- punpcklbw mm5, mm0 ; |
- pmullw mm5, mm2 ; |
- |
- paddw mm3, mm5 ; |
- |
- movq mm5, mm7 ; |
- punpcklbw mm5, mm0 ; |
- |
- pmullw mm5, [rax] ; |
- paddw mm3, [GLOBAL(rd)] ; xmm3 += round value |
- |
- psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128 |
- movq mm7, mm3 ; |
- |
- packuswb mm7, mm0 ; |
- |
- pmullw mm3, [rax+16] ; |
- paddw mm3, mm5 ; |
- |
- |
- paddw mm3, [GLOBAL(rd)] ; xmm3 += round value |
- psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128 |
- |
- packuswb mm3, mm0 |
- movd [rdi], mm3 ; store the results in the destination |
- |
-%if ABI_IS_32BIT |
- add rsi, rdx ; next line |
- add rdi, dword ptr arg(5) ;dst_pitch ; |
-%else |
- movsxd r8, dword ptr arg(5) ;dst_pitch ; |
- add rsi, rdx ; next line |
- add rdi, r8 |
-%endif |
- |
- cmp rdi, rcx ; |
- jne .next_row_4x4 |
- |
- ; begin epilog |
- pop rdi |
- pop rsi |
- RESTORE_GOT |
- UNSHADOW_ARGS |
- pop rbp |
- ret |
- |
- |
- |
SECTION_RODATA |
align 16 |
rd: |
@@ -698,30 +266,3 @@ |
times 8 dw -6 |
times 8 dw 0 |
- |
-align 16 |
-global HIDDEN_DATA(sym(vp9_bilinear_filters_8x_mmx)) |
-sym(vp9_bilinear_filters_8x_mmx): |
- times 8 dw 128 |
- times 8 dw 0 |
- |
- times 8 dw 112 |
- times 8 dw 16 |
- |
- times 8 dw 96 |
- times 8 dw 32 |
- |
- times 8 dw 80 |
- times 8 dw 48 |
- |
- times 8 dw 64 |
- times 8 dw 64 |
- |
- times 8 dw 48 |
- times 8 dw 80 |
- |
- times 8 dw 32 |
- times 8 dw 96 |
- |
- times 8 dw 16 |
- times 8 dw 112 |