OLD | NEW |
| (Empty) |
1 ; | |
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. | |
3 ; | |
4 ; Use of this source code is governed by a BSD-style license | |
5 ; that can be found in the LICENSE file in the root of the source | |
6 ; tree. An additional intellectual property rights grant can be found | |
7 ; in the file PATENTS. All contributing project authors may | |
8 ; be found in the AUTHORS file in the root of the source tree. | |
9 ; | |
10 | |
11 %include "vpx_ports/x86_abi_support.asm" | |
12 | |
13 ; tabulate_ssim - sums sum_s,sum_r,sum_sq_s,sum_sq_r, sum_sxr | |
14 %macro TABULATE_SSIM 0 | |
15 paddusw xmm15, xmm3 ; sum_s | |
16 paddusw xmm14, xmm4 ; sum_r | |
17 movdqa xmm1, xmm3 | |
18 pmaddwd xmm1, xmm1 | |
19 paddd xmm13, xmm1 ; sum_sq_s | |
20 movdqa xmm2, xmm4 | |
21 pmaddwd xmm2, xmm2 | |
22 paddd xmm12, xmm2 ; sum_sq_r | |
23 pmaddwd xmm3, xmm4 | |
24 paddd xmm11, xmm3 ; sum_sxr | |
25 %endmacro | |
26 | |
27 ; Sum across the register %1 starting with q words | |
28 %macro SUM_ACROSS_Q 1 | |
29 movdqa xmm2,%1 | |
30 punpckldq %1,xmm0 | |
31 punpckhdq xmm2,xmm0 | |
32 paddq %1,xmm2 | |
33 movdqa xmm2,%1 | |
34 punpcklqdq %1,xmm0 | |
35 punpckhqdq xmm2,xmm0 | |
36 paddq %1,xmm2 | |
37 %endmacro | |
38 | |
39 ; Sum across the register %1 starting with q words | |
40 %macro SUM_ACROSS_W 1 | |
41 movdqa xmm1, %1 | |
42 punpcklwd %1,xmm0 | |
43 punpckhwd xmm1,xmm0 | |
44 paddd %1, xmm1 | |
45 SUM_ACROSS_Q %1 | |
46 %endmacro | |
47 ;void ssim_parms_sse2( | |
48 ; unsigned char *s, | |
49 ; int sp, | |
50 ; unsigned char *r, | |
51 ; int rp | |
52 ; unsigned long *sum_s, | |
53 ; unsigned long *sum_r, | |
54 ; unsigned long *sum_sq_s, | |
55 ; unsigned long *sum_sq_r, | |
56 ; unsigned long *sum_sxr); | |
57 ; | |
58 ; TODO: Use parm passing through structure, probably don't need the pxors | |
59 ; ( calling app will initialize to 0 ) could easily fit everything in sse2 | |
60 ; without too much hastle, and can probably do better estimates with psadw | |
61 ; or pavgb At this point this is just meant to be first pass for calculating | |
62 ; all the parms needed for 16x16 ssim so we can play with dssim as distortion | |
63 ; in mode selection code. | |
64 global sym(vp9_ssim_parms_16x16_sse2) PRIVATE | |
65 sym(vp9_ssim_parms_16x16_sse2): | |
66 push rbp | |
67 mov rbp, rsp | |
68 SHADOW_ARGS_TO_STACK 9 | |
69 SAVE_XMM 15 | |
70 push rsi | |
71 push rdi | |
72 ; end prolog | |
73 | |
74 mov rsi, arg(0) ;s | |
75 mov rcx, arg(1) ;sp | |
76 mov rdi, arg(2) ;r | |
77 mov rax, arg(3) ;rp | |
78 | |
79 pxor xmm0, xmm0 | |
80 pxor xmm15,xmm15 ;sum_s | |
81 pxor xmm14,xmm14 ;sum_r | |
82 pxor xmm13,xmm13 ;sum_sq_s | |
83 pxor xmm12,xmm12 ;sum_sq_r | |
84 pxor xmm11,xmm11 ;sum_sxr | |
85 | |
86 mov rdx, 16 ;row counter | |
87 .NextRow: | |
88 | |
89 ;grab source and reference pixels | |
90 movdqu xmm5, [rsi] | |
91 movdqu xmm6, [rdi] | |
92 movdqa xmm3, xmm5 | |
93 movdqa xmm4, xmm6 | |
94 punpckhbw xmm3, xmm0 ; high_s | |
95 punpckhbw xmm4, xmm0 ; high_r | |
96 | |
97 TABULATE_SSIM | |
98 | |
99 movdqa xmm3, xmm5 | |
100 movdqa xmm4, xmm6 | |
101 punpcklbw xmm3, xmm0 ; low_s | |
102 punpcklbw xmm4, xmm0 ; low_r | |
103 | |
104 TABULATE_SSIM | |
105 | |
106 add rsi, rcx ; next s row | |
107 add rdi, rax ; next r row | |
108 | |
109 dec rdx ; counter | |
110 jnz .NextRow | |
111 | |
112 SUM_ACROSS_W xmm15 | |
113 SUM_ACROSS_W xmm14 | |
114 SUM_ACROSS_Q xmm13 | |
115 SUM_ACROSS_Q xmm12 | |
116 SUM_ACROSS_Q xmm11 | |
117 | |
118 mov rdi,arg(4) | |
119 movd [rdi], xmm15; | |
120 mov rdi,arg(5) | |
121 movd [rdi], xmm14; | |
122 mov rdi,arg(6) | |
123 movd [rdi], xmm13; | |
124 mov rdi,arg(7) | |
125 movd [rdi], xmm12; | |
126 mov rdi,arg(8) | |
127 movd [rdi], xmm11; | |
128 | |
129 ; begin epilog | |
130 pop rdi | |
131 pop rsi | |
132 RESTORE_XMM | |
133 UNSHADOW_ARGS | |
134 pop rbp | |
135 ret | |
136 | |
137 ;void ssim_parms_sse2( | |
138 ; unsigned char *s, | |
139 ; int sp, | |
140 ; unsigned char *r, | |
141 ; int rp | |
142 ; unsigned long *sum_s, | |
143 ; unsigned long *sum_r, | |
144 ; unsigned long *sum_sq_s, | |
145 ; unsigned long *sum_sq_r, | |
146 ; unsigned long *sum_sxr); | |
147 ; | |
148 ; TODO: Use parm passing through structure, probably don't need the pxors | |
149 ; ( calling app will initialize to 0 ) could easily fit everything in sse2 | |
150 ; without too much hastle, and can probably do better estimates with psadw | |
151 ; or pavgb At this point this is just meant to be first pass for calculating | |
152 ; all the parms needed for 16x16 ssim so we can play with dssim as distortion | |
153 ; in mode selection code. | |
154 global sym(vp9_ssim_parms_8x8_sse2) PRIVATE | |
155 sym(vp9_ssim_parms_8x8_sse2): | |
156 push rbp | |
157 mov rbp, rsp | |
158 SHADOW_ARGS_TO_STACK 9 | |
159 SAVE_XMM 15 | |
160 push rsi | |
161 push rdi | |
162 ; end prolog | |
163 | |
164 mov rsi, arg(0) ;s | |
165 mov rcx, arg(1) ;sp | |
166 mov rdi, arg(2) ;r | |
167 mov rax, arg(3) ;rp | |
168 | |
169 pxor xmm0, xmm0 | |
170 pxor xmm15,xmm15 ;sum_s | |
171 pxor xmm14,xmm14 ;sum_r | |
172 pxor xmm13,xmm13 ;sum_sq_s | |
173 pxor xmm12,xmm12 ;sum_sq_r | |
174 pxor xmm11,xmm11 ;sum_sxr | |
175 | |
176 mov rdx, 8 ;row counter | |
177 .NextRow: | |
178 | |
179 ;grab source and reference pixels | |
180 movq xmm3, [rsi] | |
181 movq xmm4, [rdi] | |
182 punpcklbw xmm3, xmm0 ; low_s | |
183 punpcklbw xmm4, xmm0 ; low_r | |
184 | |
185 TABULATE_SSIM | |
186 | |
187 add rsi, rcx ; next s row | |
188 add rdi, rax ; next r row | |
189 | |
190 dec rdx ; counter | |
191 jnz .NextRow | |
192 | |
193 SUM_ACROSS_W xmm15 | |
194 SUM_ACROSS_W xmm14 | |
195 SUM_ACROSS_Q xmm13 | |
196 SUM_ACROSS_Q xmm12 | |
197 SUM_ACROSS_Q xmm11 | |
198 | |
199 mov rdi,arg(4) | |
200 movd [rdi], xmm15; | |
201 mov rdi,arg(5) | |
202 movd [rdi], xmm14; | |
203 mov rdi,arg(6) | |
204 movd [rdi], xmm13; | |
205 mov rdi,arg(7) | |
206 movd [rdi], xmm12; | |
207 mov rdi,arg(8) | |
208 movd [rdi], xmm11; | |
209 | |
210 ; begin epilog | |
211 pop rdi | |
212 pop rsi | |
213 RESTORE_XMM | |
214 UNSHADOW_ARGS | |
215 pop rbp | |
216 ret | |
OLD | NEW |