Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(29)

Unified Diff: source/libvpx/vp9/encoder/x86/vp9_temporal_filter_apply_sse2.asm

Issue 341293003: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/
Patch Set: Created 6 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: source/libvpx/vp9/encoder/x86/vp9_temporal_filter_apply_sse2.asm
===================================================================
--- source/libvpx/vp9/encoder/x86/vp9_temporal_filter_apply_sse2.asm (revision 278778)
+++ source/libvpx/vp9/encoder/x86/vp9_temporal_filter_apply_sse2.asm (working copy)
@@ -15,41 +15,45 @@
; (unsigned char *frame1, | 0
; unsigned int stride, | 1
; unsigned char *frame2, | 2
-; unsigned int block_size, | 3
-; int strength, | 4
-; int filter_weight, | 5
-; unsigned int *accumulator, | 6
-; unsigned short *count) | 7
+; unsigned int block_width, | 3
+; unsigned int block_height, | 4
+; int strength, | 5
+; int filter_weight, | 6
+; unsigned int *accumulator, | 7
+; unsigned short *count) | 8
global sym(vp9_temporal_filter_apply_sse2) PRIVATE
sym(vp9_temporal_filter_apply_sse2):
push rbp
mov rbp, rsp
- SHADOW_ARGS_TO_STACK 8
+ SHADOW_ARGS_TO_STACK 9
SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
ALIGN_STACK 16, rax
- %define block_size 0
- %define strength 16
- %define filter_weight 32
- %define rounding_bit 48
- %define rbp_backup 64
- %define stack_size 80
+ %define block_width 0
+ %define block_height 16
+ %define strength 32
+ %define filter_weight 48
+ %define rounding_bit 64
+ %define rbp_backup 80
+ %define stack_size 96
sub rsp, stack_size
mov [rsp + rbp_backup], rbp
; end prolog
- mov rdx, arg(3)
- mov [rsp + block_size], rdx
- movd xmm6, arg(4)
+ mov edx, arg(3)
+ mov [rsp + block_width], rdx
+ mov edx, arg(4)
+ mov [rsp + block_height], rdx
+ movd xmm6, arg(5)
movdqa [rsp + strength], xmm6 ; where strength is used, all 16 bytes are read
; calculate the rounding bit outside the loop
; 0x8000 >> (16 - strength)
mov rdx, 16
- sub rdx, arg(4) ; 16 - strength
+ sub rdx, arg(5) ; 16 - strength
movq xmm4, rdx ; can't use rdx w/ shift
movdqa xmm5, [GLOBAL(_const_top_bit)]
psrlw xmm5, xmm4
@@ -57,11 +61,11 @@
mov rsi, arg(0) ; src/frame1
mov rdx, arg(2) ; predictor frame
- mov rdi, arg(6) ; accumulator
- mov rax, arg(7) ; count
+ mov rdi, arg(7) ; accumulator
+ mov rax, arg(8) ; count
; dup the filter weight and store for later
- movd xmm0, arg(5) ; filter_weight
+ movd xmm0, arg(6) ; filter_weight
pshuflw xmm0, xmm0, 0
punpcklwd xmm0, xmm0
movdqa [rsp + filter_weight], xmm0
@@ -69,10 +73,11 @@
mov rbp, arg(1) ; stride
pxor xmm7, xmm7 ; zero for extraction
- lea rcx, [rdx + 16*16*1]
- cmp dword ptr [rsp + block_size], 8
+ mov rcx, [rsp + block_width]
+ imul rcx, [rsp + block_height]
+ add rcx, rdx
+ cmp dword ptr [rsp + block_width], 8
jne .temporal_filter_apply_load_16
- lea rcx, [rdx + 8*8*1]
.temporal_filter_apply_load_8:
movq xmm0, [rsi] ; first row
@@ -178,7 +183,7 @@
cmp rdx, rcx
je .temporal_filter_apply_epilog
pxor xmm7, xmm7 ; zero for extraction
- cmp dword ptr [rsp + block_size], 16
+ cmp dword ptr [rsp + block_width], 16
je .temporal_filter_apply_load_16
jmp .temporal_filter_apply_load_8
« no previous file with comments | « source/libvpx/vp9/encoder/x86/vp9_ssim_opt_x86_64.asm ('k') | source/libvpx/vp9/encoder/x86/vp9_variance_avx2.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698