OLD | NEW |
(Empty) | |
| 1 ; |
| 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
| 3 ; |
| 4 ; Use of this source code is governed by a BSD-style license |
| 5 ; that can be found in the LICENSE file in the root of the source |
| 6 ; tree. An additional intellectual property rights grant can be found |
| 7 ; in the file PATENTS. All contributing project authors may |
| 8 ; be found in the AUTHORS file in the root of the source tree. |
| 9 ; |
| 10 |
| 11 |
| 12 %include "vpx_ports/x86_abi_support.asm" |
| 13 |
| 14 ;void vp9_subtract_b_sse2_impl(unsigned char *z, int src_stride, |
| 15 ; short *diff, unsigned char *Predictor, |
| 16 ; int pitch); |
| 17 global sym(vp9_subtract_b_sse2_impl) |
| 18 sym(vp9_subtract_b_sse2_impl): |
| 19 push rbp |
| 20 mov rbp, rsp |
| 21 SHADOW_ARGS_TO_STACK 5 |
| 22 GET_GOT rbx |
| 23 push rsi |
| 24 push rdi |
| 25 ; end prolog |
| 26 |
| 27 mov rdi, arg(2) ;diff |
| 28 mov rax, arg(3) ;Predictor |
| 29 mov rsi, arg(0) ;z |
| 30 movsxd rdx, dword ptr arg(1);src_stride; |
| 31 movsxd rcx, dword ptr arg(4);pitch |
| 32 pxor mm7, mm7 |
| 33 |
| 34 movd mm0, [rsi] |
| 35 movd mm1, [rax] |
| 36 punpcklbw mm0, mm7 |
| 37 punpcklbw mm1, mm7 |
| 38 psubw mm0, mm1 |
| 39 movq MMWORD PTR [rdi], mm0 |
| 40 |
| 41 movd mm0, [rsi+rdx] |
| 42 movd mm1, [rax+rcx] |
| 43 punpcklbw mm0, mm7 |
| 44 punpcklbw mm1, mm7 |
| 45 psubw mm0, mm1 |
| 46 movq MMWORD PTR [rdi+rcx*2], mm0 |
| 47 |
| 48 movd mm0, [rsi+rdx*2] |
| 49 movd mm1, [rax+rcx*2] |
| 50 punpcklbw mm0, mm7 |
| 51 punpcklbw mm1, mm7 |
| 52 psubw mm0, mm1 |
| 53 movq MMWORD PTR [rdi+rcx*4], mm0 |
| 54 |
| 55 lea rsi, [rsi+rdx*2] |
| 56 lea rcx, [rcx+rcx*2] |
| 57 |
| 58 movd mm0, [rsi+rdx] |
| 59 movd mm1, [rax+rcx] |
| 60 punpcklbw mm0, mm7 |
| 61 punpcklbw mm1, mm7 |
| 62 psubw mm0, mm1 |
| 63 movq MMWORD PTR [rdi+rcx*2], mm0 |
| 64 |
| 65 ; begin epilog |
| 66 pop rdi |
| 67 pop rsi |
| 68 RESTORE_GOT |
| 69 UNSHADOW_ARGS |
| 70 pop rbp |
| 71 ret |
| 72 |
| 73 |
| 74 ;void vp9_subtract_mby_sse2(short *diff, unsigned char *src, unsigned char *pred
, int stride) |
| 75 global sym(vp9_subtract_mby_sse2) |
| 76 sym(vp9_subtract_mby_sse2): |
| 77 push rbp |
| 78 mov rbp, rsp |
| 79 SHADOW_ARGS_TO_STACK 4 |
| 80 SAVE_XMM 7 |
| 81 GET_GOT rbx |
| 82 push rsi |
| 83 push rdi |
| 84 ; end prolog |
| 85 |
| 86 mov rsi, arg(1) ;src |
| 87 mov rdi, arg(0) ;diff |
| 88 |
| 89 mov rax, arg(2) ;pred |
| 90 movsxd rdx, dword ptr arg(3) ;stride |
| 91 |
| 92 mov rcx, 8 ; do two lines at one time |
| 93 |
| 94 .submby_loop: |
| 95 movdqa xmm0, XMMWORD PTR [rsi] ; src |
| 96 movdqa xmm1, XMMWORD PTR [rax] ; pred |
| 97 |
| 98 movdqa xmm2, xmm0 |
| 99 psubb xmm0, xmm1 |
| 100 |
| 101 pxor xmm1, [GLOBAL(t80)] ;convert to signed value
s |
| 102 pxor xmm2, [GLOBAL(t80)] |
| 103 pcmpgtb xmm1, xmm2 ; obtain sign informatio
n |
| 104 |
| 105 movdqa xmm2, xmm0 |
| 106 movdqa xmm3, xmm1 |
| 107 punpcklbw xmm0, xmm1 ; put sign back to subtraction |
| 108 punpckhbw xmm2, xmm3 ; put sign back to subtraction |
| 109 |
| 110 movdqa XMMWORD PTR [rdi], xmm0 |
| 111 movdqa XMMWORD PTR [rdi +16], xmm2 |
| 112 |
| 113 movdqa xmm4, XMMWORD PTR [rsi + rdx] |
| 114 movdqa xmm5, XMMWORD PTR [rax + 16] |
| 115 |
| 116 movdqa xmm6, xmm4 |
| 117 psubb xmm4, xmm5 |
| 118 |
| 119 pxor xmm5, [GLOBAL(t80)] ;convert to signed value
s |
| 120 pxor xmm6, [GLOBAL(t80)] |
| 121 pcmpgtb xmm5, xmm6 ; obtain sign informatio
n |
| 122 |
| 123 movdqa xmm6, xmm4 |
| 124 movdqa xmm7, xmm5 |
| 125 punpcklbw xmm4, xmm5 ; put sign back to subtraction |
| 126 punpckhbw xmm6, xmm7 ; put sign back to subtraction |
| 127 |
| 128 movdqa XMMWORD PTR [rdi +32], xmm4 |
| 129 movdqa XMMWORD PTR [rdi +48], xmm6 |
| 130 |
| 131 add rdi, 64 |
| 132 add rax, 32 |
| 133 lea rsi, [rsi+rdx*2] |
| 134 |
| 135 sub rcx, 1 |
| 136 jnz .submby_loop |
| 137 |
| 138 pop rdi |
| 139 pop rsi |
| 140 ; begin epilog |
| 141 RESTORE_GOT |
| 142 RESTORE_XMM |
| 143 UNSHADOW_ARGS |
| 144 pop rbp |
| 145 ret |
| 146 |
| 147 |
| 148 ;void vp9_subtract_mbuv_sse2(short *diff, unsigned char *usrc, unsigned char *vs
rc, unsigned char *pred, int stride) |
| 149 global sym(vp9_subtract_mbuv_sse2) |
| 150 sym(vp9_subtract_mbuv_sse2): |
| 151 push rbp |
| 152 mov rbp, rsp |
| 153 SHADOW_ARGS_TO_STACK 5 |
| 154 GET_GOT rbx |
| 155 push rsi |
| 156 push rdi |
| 157 ; end prolog |
| 158 |
| 159 mov rdi, arg(0) ;diff |
| 160 mov rax, arg(3) ;pred |
| 161 mov rsi, arg(1) ;z = usrc |
| 162 add rdi, 256*2 ;diff = diff + 256 (shorts) |
| 163 add rax, 256 ;Predictor = pred + 256 |
| 164 movsxd rdx, dword ptr arg(4) ;stride; |
| 165 lea rcx, [rdx + rdx*2] |
| 166 |
| 167 ;u |
| 168 ;line 0 1 |
| 169 movq xmm0, MMWORD PTR [rsi] ; src |
| 170 movq xmm2, MMWORD PTR [rsi+rdx] |
| 171 movdqa xmm1, XMMWORD PTR [rax] ; pred |
| 172 punpcklqdq xmm0, xmm2 |
| 173 |
| 174 movdqa xmm2, xmm0 |
| 175 psubb xmm0, xmm1 ; subtraction with sign missed |
| 176 |
| 177 pxor xmm1, [GLOBAL(t80)] ;convert to signed values |
| 178 pxor xmm2, [GLOBAL(t80)] |
| 179 pcmpgtb xmm1, xmm2 ; obtain sign information |
| 180 |
| 181 movdqa xmm2, xmm0 |
| 182 movdqa xmm3, xmm1 |
| 183 punpcklbw xmm0, xmm1 ; put sign back to subtraction |
| 184 punpckhbw xmm2, xmm3 ; put sign back to subtraction |
| 185 |
| 186 movdqa XMMWORD PTR [rdi], xmm0 |
| 187 movdqa XMMWORD PTR [rdi +16], xmm2 |
| 188 |
| 189 ;line 2 3 |
| 190 movq xmm0, MMWORD PTR [rsi+rdx*2] ; src |
| 191 movq xmm2, MMWORD PTR [rsi+rcx] |
| 192 movdqa xmm1, XMMWORD PTR [rax+16] ; pred |
| 193 punpcklqdq xmm0, xmm2 |
| 194 |
| 195 movdqa xmm2, xmm0 |
| 196 psubb xmm0, xmm1 ; subtraction with sign missed |
| 197 |
| 198 pxor xmm1, [GLOBAL(t80)] ;convert to signed values |
| 199 pxor xmm2, [GLOBAL(t80)] |
| 200 pcmpgtb xmm1, xmm2 ; obtain sign information |
| 201 |
| 202 movdqa xmm2, xmm0 |
| 203 movdqa xmm3, xmm1 |
| 204 punpcklbw xmm0, xmm1 ; put sign back to subtraction |
| 205 punpckhbw xmm2, xmm3 ; put sign back to subtraction |
| 206 |
| 207 movdqa XMMWORD PTR [rdi + 32], xmm0 |
| 208 movdqa XMMWORD PTR [rdi + 48], xmm2 |
| 209 |
| 210 ;line 4 5 |
| 211 lea rsi, [rsi + rdx*4] |
| 212 |
| 213 movq xmm0, MMWORD PTR [rsi] ; src |
| 214 movq xmm2, MMWORD PTR [rsi+rdx] |
| 215 movdqa xmm1, XMMWORD PTR [rax + 32] ; pred |
| 216 punpcklqdq xmm0, xmm2 |
| 217 |
| 218 movdqa xmm2, xmm0 |
| 219 psubb xmm0, xmm1 ; subtraction with sign missed |
| 220 |
| 221 pxor xmm1, [GLOBAL(t80)] ;convert to signed values |
| 222 pxor xmm2, [GLOBAL(t80)] |
| 223 pcmpgtb xmm1, xmm2 ; obtain sign information |
| 224 |
| 225 movdqa xmm2, xmm0 |
| 226 movdqa xmm3, xmm1 |
| 227 punpcklbw xmm0, xmm1 ; put sign back to subtraction |
| 228 punpckhbw xmm2, xmm3 ; put sign back to subtraction |
| 229 |
| 230 movdqa XMMWORD PTR [rdi + 64], xmm0 |
| 231 movdqa XMMWORD PTR [rdi + 80], xmm2 |
| 232 |
| 233 ;line 6 7 |
| 234 movq xmm0, MMWORD PTR [rsi+rdx*2] ; src |
| 235 movq xmm2, MMWORD PTR [rsi+rcx] |
| 236 movdqa xmm1, XMMWORD PTR [rax+ 48] ; pred |
| 237 punpcklqdq xmm0, xmm2 |
| 238 |
| 239 movdqa xmm2, xmm0 |
| 240 psubb xmm0, xmm1 ; subtraction with sign missed |
| 241 |
| 242 pxor xmm1, [GLOBAL(t80)] ;convert to signed values |
| 243 pxor xmm2, [GLOBAL(t80)] |
| 244 pcmpgtb xmm1, xmm2 ; obtain sign information |
| 245 |
| 246 movdqa xmm2, xmm0 |
| 247 movdqa xmm3, xmm1 |
| 248 punpcklbw xmm0, xmm1 ; put sign back to subtraction |
| 249 punpckhbw xmm2, xmm3 ; put sign back to subtraction |
| 250 |
| 251 movdqa XMMWORD PTR [rdi + 96], xmm0 |
| 252 movdqa XMMWORD PTR [rdi + 112], xmm2 |
| 253 |
| 254 ;v |
| 255 mov rsi, arg(2) ;z = vsrc |
| 256 add rdi, 64*2 ;diff = diff + 320 (shorts) |
| 257 add rax, 64 ;Predictor = pred + 320 |
| 258 |
| 259 ;line 0 1 |
| 260 movq xmm0, MMWORD PTR [rsi] ; src |
| 261 movq xmm2, MMWORD PTR [rsi+rdx] |
| 262 movdqa xmm1, XMMWORD PTR [rax] ; pred |
| 263 punpcklqdq xmm0, xmm2 |
| 264 |
| 265 movdqa xmm2, xmm0 |
| 266 psubb xmm0, xmm1 ; subtraction with sign missed |
| 267 |
| 268 pxor xmm1, [GLOBAL(t80)] ;convert to signed values |
| 269 pxor xmm2, [GLOBAL(t80)] |
| 270 pcmpgtb xmm1, xmm2 ; obtain sign information |
| 271 |
| 272 movdqa xmm2, xmm0 |
| 273 movdqa xmm3, xmm1 |
| 274 punpcklbw xmm0, xmm1 ; put sign back to subtraction |
| 275 punpckhbw xmm2, xmm3 ; put sign back to subtraction |
| 276 |
| 277 movdqa XMMWORD PTR [rdi], xmm0 |
| 278 movdqa XMMWORD PTR [rdi +16], xmm2 |
| 279 |
| 280 ;line 2 3 |
| 281 movq xmm0, MMWORD PTR [rsi+rdx*2] ; src |
| 282 movq xmm2, MMWORD PTR [rsi+rcx] |
| 283 movdqa xmm1, XMMWORD PTR [rax+16] ; pred |
| 284 punpcklqdq xmm0, xmm2 |
| 285 |
| 286 movdqa xmm2, xmm0 |
| 287 psubb xmm0, xmm1 ; subtraction with sign missed |
| 288 |
| 289 pxor xmm1, [GLOBAL(t80)] ;convert to signed values |
| 290 pxor xmm2, [GLOBAL(t80)] |
| 291 pcmpgtb xmm1, xmm2 ; obtain sign information |
| 292 |
| 293 movdqa xmm2, xmm0 |
| 294 movdqa xmm3, xmm1 |
| 295 punpcklbw xmm0, xmm1 ; put sign back to subtraction |
| 296 punpckhbw xmm2, xmm3 ; put sign back to subtraction |
| 297 |
| 298 movdqa XMMWORD PTR [rdi + 32], xmm0 |
| 299 movdqa XMMWORD PTR [rdi + 48], xmm2 |
| 300 |
| 301 ;line 4 5 |
| 302 lea rsi, [rsi + rdx*4] |
| 303 |
| 304 movq xmm0, MMWORD PTR [rsi] ; src |
| 305 movq xmm2, MMWORD PTR [rsi+rdx] |
| 306 movdqa xmm1, XMMWORD PTR [rax + 32] ; pred |
| 307 punpcklqdq xmm0, xmm2 |
| 308 |
| 309 movdqa xmm2, xmm0 |
| 310 psubb xmm0, xmm1 ; subtraction with sign missed |
| 311 |
| 312 pxor xmm1, [GLOBAL(t80)] ;convert to signed values |
| 313 pxor xmm2, [GLOBAL(t80)] |
| 314 pcmpgtb xmm1, xmm2 ; obtain sign information |
| 315 |
| 316 movdqa xmm2, xmm0 |
| 317 movdqa xmm3, xmm1 |
| 318 punpcklbw xmm0, xmm1 ; put sign back to subtraction |
| 319 punpckhbw xmm2, xmm3 ; put sign back to subtraction |
| 320 |
| 321 movdqa XMMWORD PTR [rdi + 64], xmm0 |
| 322 movdqa XMMWORD PTR [rdi + 80], xmm2 |
| 323 |
| 324 ;line 6 7 |
| 325 movq xmm0, MMWORD PTR [rsi+rdx*2] ; src |
| 326 movq xmm2, MMWORD PTR [rsi+rcx] |
| 327 movdqa xmm1, XMMWORD PTR [rax+ 48] ; pred |
| 328 punpcklqdq xmm0, xmm2 |
| 329 |
| 330 movdqa xmm2, xmm0 |
| 331 psubb xmm0, xmm1 ; subtraction with sign missed |
| 332 |
| 333 pxor xmm1, [GLOBAL(t80)] ;convert to signed values |
| 334 pxor xmm2, [GLOBAL(t80)] |
| 335 pcmpgtb xmm1, xmm2 ; obtain sign information |
| 336 |
| 337 movdqa xmm2, xmm0 |
| 338 movdqa xmm3, xmm1 |
| 339 punpcklbw xmm0, xmm1 ; put sign back to subtraction |
| 340 punpckhbw xmm2, xmm3 ; put sign back to subtraction |
| 341 |
| 342 movdqa XMMWORD PTR [rdi + 96], xmm0 |
| 343 movdqa XMMWORD PTR [rdi + 112], xmm2 |
| 344 |
| 345 ; begin epilog |
| 346 pop rdi |
| 347 pop rsi |
| 348 RESTORE_GOT |
| 349 UNSHADOW_ARGS |
| 350 pop rbp |
| 351 ret |
| 352 |
| 353 SECTION_RODATA |
| 354 align 16 |
| 355 t80: |
| 356 times 16 db 0x80 |
OLD | NEW |