OLD | NEW |
1 ; | 1 ; |
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
3 ; | 3 ; |
4 ; Use of this source code is governed by a BSD-style license | 4 ; Use of this source code is governed by a BSD-style license |
5 ; that can be found in the LICENSE file in the root of the source | 5 ; that can be found in the LICENSE file in the root of the source |
6 ; tree. An additional intellectual property rights grant can be found | 6 ; tree. An additional intellectual property rights grant can be found |
7 ; in the file PATENTS. All contributing project authors may | 7 ; in the file PATENTS. All contributing project authors may |
8 ; be found in the AUTHORS file in the root of the source tree. | 8 ; be found in the AUTHORS file in the root of the source tree. |
9 ; | 9 ; |
10 | 10 |
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
42 punpcklwd %1,xmm0 | 42 punpcklwd %1,xmm0 |
43 punpckhwd xmm1,xmm0 | 43 punpckhwd xmm1,xmm0 |
44 paddd %1, xmm1 | 44 paddd %1, xmm1 |
45 SUM_ACROSS_Q %1 | 45 SUM_ACROSS_Q %1 |
46 %endmacro | 46 %endmacro |
47 ;void ssim_parms_sse2( | 47 ;void ssim_parms_sse2( |
48 ; unsigned char *s, | 48 ; unsigned char *s, |
49 ; int sp, | 49 ; int sp, |
50 ; unsigned char *r, | 50 ; unsigned char *r, |
51 ; int rp | 51 ; int rp |
52 ; unsigned long *sum_s, | 52 ; uint32_t *sum_s, |
53 ; unsigned long *sum_r, | 53 ; uint32_t *sum_r, |
54 ; unsigned long *sum_sq_s, | 54 ; uint32_t *sum_sq_s, |
55 ; unsigned long *sum_sq_r, | 55 ; uint32_t *sum_sq_r, |
56 ; unsigned long *sum_sxr); | 56 ; uint32_t *sum_sxr); |
57 ; | 57 ; |
58 ; TODO: Use parm passing through structure, probably don't need the pxors | 58 ; TODO: Use parm passing through structure, probably don't need the pxors |
59 ; ( calling app will initialize to 0 ) could easily fit everything in sse2 | 59 ; ( calling app will initialize to 0 ) could easily fit everything in sse2 |
60 ; without too much hastle, and can probably do better estimates with psadw | 60 ; without too much hastle, and can probably do better estimates with psadw |
61 ; or pavgb At this point this is just meant to be first pass for calculating | 61 ; or pavgb At this point this is just meant to be first pass for calculating |
62 ; all the parms needed for 16x16 ssim so we can play with dssim as distortion | 62 ; all the parms needed for 16x16 ssim so we can play with dssim as distortion |
63 ; in mode selection code. | 63 ; in mode selection code. |
64 global sym(vp8_ssim_parms_16x16_sse2) PRIVATE | 64 global sym(vpx_ssim_parms_16x16_sse2) PRIVATE |
65 sym(vp8_ssim_parms_16x16_sse2): | 65 sym(vpx_ssim_parms_16x16_sse2): |
66 push rbp | 66 push rbp |
67 mov rbp, rsp | 67 mov rbp, rsp |
68 SHADOW_ARGS_TO_STACK 9 | 68 SHADOW_ARGS_TO_STACK 9 |
69 SAVE_XMM 15 | 69 SAVE_XMM 15 |
70 push rsi | 70 push rsi |
71 push rdi | 71 push rdi |
72 ; end prolog | 72 ; end prolog |
73 | 73 |
74 mov rsi, arg(0) ;s | 74 mov rsi, arg(0) ;s |
75 mov rcx, arg(1) ;sp | 75 mov rcx, arg(1) ;sp |
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
132 RESTORE_XMM | 132 RESTORE_XMM |
133 UNSHADOW_ARGS | 133 UNSHADOW_ARGS |
134 pop rbp | 134 pop rbp |
135 ret | 135 ret |
136 | 136 |
137 ;void ssim_parms_sse2( | 137 ;void ssim_parms_sse2( |
138 ; unsigned char *s, | 138 ; unsigned char *s, |
139 ; int sp, | 139 ; int sp, |
140 ; unsigned char *r, | 140 ; unsigned char *r, |
141 ; int rp | 141 ; int rp |
142 ; unsigned long *sum_s, | 142 ; uint32_t *sum_s, |
143 ; unsigned long *sum_r, | 143 ; uint32_t *sum_r, |
144 ; unsigned long *sum_sq_s, | 144 ; uint32_t *sum_sq_s, |
145 ; unsigned long *sum_sq_r, | 145 ; uint32_t *sum_sq_r, |
146 ; unsigned long *sum_sxr); | 146 ; uint32_t *sum_sxr); |
147 ; | 147 ; |
148 ; TODO: Use parm passing through structure, probably don't need the pxors | 148 ; TODO: Use parm passing through structure, probably don't need the pxors |
149 ; ( calling app will initialize to 0 ) could easily fit everything in sse2 | 149 ; ( calling app will initialize to 0 ) could easily fit everything in sse2 |
150 ; without too much hastle, and can probably do better estimates with psadw | 150 ; without too much hastle, and can probably do better estimates with psadw |
151 ; or pavgb At this point this is just meant to be first pass for calculating | 151 ; or pavgb At this point this is just meant to be first pass for calculating |
152 ; all the parms needed for 16x16 ssim so we can play with dssim as distortion | 152 ; all the parms needed for 16x16 ssim so we can play with dssim as distortion |
153 ; in mode selection code. | 153 ; in mode selection code. |
154 global sym(vp8_ssim_parms_8x8_sse2) PRIVATE | 154 global sym(vpx_ssim_parms_8x8_sse2) PRIVATE |
155 sym(vp8_ssim_parms_8x8_sse2): | 155 sym(vpx_ssim_parms_8x8_sse2): |
156 push rbp | 156 push rbp |
157 mov rbp, rsp | 157 mov rbp, rsp |
158 SHADOW_ARGS_TO_STACK 9 | 158 SHADOW_ARGS_TO_STACK 9 |
159 SAVE_XMM 15 | 159 SAVE_XMM 15 |
160 push rsi | 160 push rsi |
161 push rdi | 161 push rdi |
162 ; end prolog | 162 ; end prolog |
163 | 163 |
164 mov rsi, arg(0) ;s | 164 mov rsi, arg(0) ;s |
165 mov rcx, arg(1) ;sp | 165 mov rcx, arg(1) ;sp |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
207 mov rdi,arg(8) | 207 mov rdi,arg(8) |
208 movd [rdi], xmm11; | 208 movd [rdi], xmm11; |
209 | 209 |
210 ; begin epilog | 210 ; begin epilog |
211 pop rdi | 211 pop rdi |
212 pop rsi | 212 pop rsi |
213 RESTORE_XMM | 213 RESTORE_XMM |
214 UNSHADOW_ARGS | 214 UNSHADOW_ARGS |
215 pop rbp | 215 pop rbp |
216 ret | 216 ret |
OLD | NEW |