| OLD | NEW |
| 1 ; | 1 ; |
| 2 ; Copyright (c) 2014 The WebM project authors. All Rights Reserved. | 2 ; Copyright (c) 2014 The WebM project authors. All Rights Reserved. |
| 3 ; | 3 ; |
| 4 ; Use of this source code is governed by a BSD-style license | 4 ; Use of this source code is governed by a BSD-style license |
| 5 ; that can be found in the LICENSE file in the root of the source | 5 ; that can be found in the LICENSE file in the root of the source |
| 6 ; tree. An additional intellectual property rights grant can be found | 6 ; tree. An additional intellectual property rights grant can be found |
| 7 ; in the file PATENTS. All contributing project authors may | 7 ; in the file PATENTS. All contributing project authors may |
| 8 ; be found in the AUTHORS file in the root of the source tree. | 8 ; be found in the AUTHORS file in the root of the source tree. |
| 9 ; | 9 ; |
| 10 | 10 |
| 11 %include "third_party/x86inc/x86inc.asm" | 11 %include "third_party/x86inc/x86inc.asm" |
| 12 | 12 |
| 13 SECTION_RODATA | 13 SECTION_RODATA |
| 14 pw_4: times 8 dw 4 | 14 pw_4: times 8 dw 4 |
| 15 pw_8: times 8 dw 8 | 15 pw_8: times 8 dw 8 |
| 16 pw_16: times 4 dd 16 | 16 pw_16: times 4 dd 16 |
| 17 pw_32: times 4 dd 32 | 17 pw_32: times 4 dd 32 |
| 18 | 18 |
| 19 SECTION .text | 19 SECTION .text |
| 20 INIT_MMX sse | 20 INIT_MMX sse |
| 21 cglobal high_dc_predictor_4x4, 4, 5, 4, dst, stride, above, left, goffset | 21 cglobal highbd_dc_predictor_4x4, 4, 5, 4, dst, stride, above, left, goffset |
| 22 GET_GOT goffsetq | 22 GET_GOT goffsetq |
| 23 | 23 |
| 24 movq m0, [aboveq] | 24 movq m0, [aboveq] |
| 25 movq m2, [leftq] | 25 movq m2, [leftq] |
| 26 DEFINE_ARGS dst, stride, one | 26 DEFINE_ARGS dst, stride, one |
| 27 mov oned, 0x0001 | 27 mov oned, 0x0001 |
| 28 pxor m1, m1 | 28 pxor m1, m1 |
| 29 movd m3, oned | 29 movd m3, oned |
| 30 pshufw m3, m3, 0x0 | 30 pshufw m3, m3, 0x0 |
| 31 paddw m0, m2 | 31 paddw m0, m2 |
| 32 pmaddwd m0, m3 | 32 pmaddwd m0, m3 |
| 33 packssdw m0, m1 | 33 packssdw m0, m1 |
| 34 pmaddwd m0, m3 | 34 pmaddwd m0, m3 |
| 35 paddw m0, [GLOBAL(pw_4)] | 35 paddw m0, [GLOBAL(pw_4)] |
| 36 psraw m0, 3 | 36 psraw m0, 3 |
| 37 pshufw m0, m0, 0x0 | 37 pshufw m0, m0, 0x0 |
| 38 movq [dstq ], m0 | 38 movq [dstq ], m0 |
| 39 movq [dstq+strideq*2], m0 | 39 movq [dstq+strideq*2], m0 |
| 40 lea dstq, [dstq+strideq*4] | 40 lea dstq, [dstq+strideq*4] |
| 41 movq [dstq ], m0 | 41 movq [dstq ], m0 |
| 42 movq [dstq+strideq*2], m0 | 42 movq [dstq+strideq*2], m0 |
| 43 | 43 |
| 44 RESTORE_GOT | 44 RESTORE_GOT |
| 45 RET | 45 RET |
| 46 | 46 |
| 47 INIT_XMM sse2 | 47 INIT_XMM sse2 |
| 48 cglobal high_dc_predictor_8x8, 4, 5, 4, dst, stride, above, left, goffset | 48 cglobal highbd_dc_predictor_8x8, 4, 5, 4, dst, stride, above, left, goffset |
| 49 GET_GOT goffsetq | 49 GET_GOT goffsetq |
| 50 | 50 |
| 51 pxor m1, m1 | 51 pxor m1, m1 |
| 52 mova m0, [aboveq] | 52 mova m0, [aboveq] |
| 53 mova m2, [leftq] | 53 mova m2, [leftq] |
| 54 DEFINE_ARGS dst, stride, stride3, one | 54 DEFINE_ARGS dst, stride, stride3, one |
| 55 mov oned, 0x00010001 | 55 mov oned, 0x00010001 |
| 56 lea stride3q, [strideq*3] | 56 lea stride3q, [strideq*3] |
| 57 movd m3, oned | 57 movd m3, oned |
| 58 pshufd m3, m3, 0x0 | 58 pshufd m3, m3, 0x0 |
| (...skipping 14 matching lines...) Expand all Loading... |
| 73 lea dstq, [dstq+strideq*8] | 73 lea dstq, [dstq+strideq*8] |
| 74 mova [dstq ], m0 | 74 mova [dstq ], m0 |
| 75 mova [dstq+strideq*2 ], m0 | 75 mova [dstq+strideq*2 ], m0 |
| 76 mova [dstq+strideq*4 ], m0 | 76 mova [dstq+strideq*4 ], m0 |
| 77 mova [dstq+stride3q*2], m0 | 77 mova [dstq+stride3q*2], m0 |
| 78 | 78 |
| 79 RESTORE_GOT | 79 RESTORE_GOT |
| 80 RET | 80 RET |
| 81 | 81 |
| 82 INIT_XMM sse2 | 82 INIT_XMM sse2 |
| 83 cglobal high_dc_predictor_16x16, 4, 5, 5, dst, stride, above, left, goffset | 83 cglobal highbd_dc_predictor_16x16, 4, 5, 5, dst, stride, above, left, goffset |
| 84 GET_GOT goffsetq | 84 GET_GOT goffsetq |
| 85 | 85 |
| 86 pxor m1, m1 | 86 pxor m1, m1 |
| 87 mova m0, [aboveq] | 87 mova m0, [aboveq] |
| 88 mova m3, [aboveq+16] | 88 mova m3, [aboveq+16] |
| 89 mova m2, [leftq] | 89 mova m2, [leftq] |
| 90 mova m4, [leftq+16] | 90 mova m4, [leftq+16] |
| 91 DEFINE_ARGS dst, stride, stride3, lines4 | 91 DEFINE_ARGS dst, stride, stride3, lines4 |
| 92 lea stride3q, [strideq*3] | 92 lea stride3q, [strideq*3] |
| 93 mov lines4d, 4 | 93 mov lines4d, 4 |
| (...skipping 23 matching lines...) Expand all Loading... |
| 117 mova [dstq+stride3q*2+16], m0 | 117 mova [dstq+stride3q*2+16], m0 |
| 118 lea dstq, [dstq+strideq*8] | 118 lea dstq, [dstq+strideq*8] |
| 119 dec lines4d | 119 dec lines4d |
| 120 jnz .loop | 120 jnz .loop |
| 121 | 121 |
| 122 RESTORE_GOT | 122 RESTORE_GOT |
| 123 REP_RET | 123 REP_RET |
| 124 | 124 |
| 125 %if ARCH_X86_64 | 125 %if ARCH_X86_64 |
| 126 INIT_XMM sse2 | 126 INIT_XMM sse2 |
| 127 cglobal high_dc_predictor_32x32, 4, 5, 9, dst, stride, above, left, goffset | 127 cglobal highbd_dc_predictor_32x32, 4, 5, 9, dst, stride, above, left, goffset |
| 128 GET_GOT goffsetq | 128 GET_GOT goffsetq |
| 129 | 129 |
| 130 pxor m1, m1 | 130 pxor m1, m1 |
| 131 mova m0, [aboveq] | 131 mova m0, [aboveq] |
| 132 mova m2, [aboveq+16] | 132 mova m2, [aboveq+16] |
| 133 mova m3, [aboveq+32] | 133 mova m3, [aboveq+32] |
| 134 mova m4, [aboveq+48] | 134 mova m4, [aboveq+48] |
| 135 mova m5, [leftq] | 135 mova m5, [leftq] |
| 136 mova m6, [leftq+16] | 136 mova m6, [leftq+16] |
| 137 mova m7, [leftq+32] | 137 mova m7, [leftq+32] |
| (...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 177 mova [dstq+stride3q*2 +48], m0 | 177 mova [dstq+stride3q*2 +48], m0 |
| 178 lea dstq, [dstq+strideq*8] | 178 lea dstq, [dstq+strideq*8] |
| 179 dec lines4d | 179 dec lines4d |
| 180 jnz .loop | 180 jnz .loop |
| 181 | 181 |
| 182 RESTORE_GOT | 182 RESTORE_GOT |
| 183 REP_RET | 183 REP_RET |
| 184 %endif | 184 %endif |
| 185 | 185 |
| 186 INIT_MMX sse | 186 INIT_MMX sse |
| 187 cglobal high_v_predictor_4x4, 3, 3, 1, dst, stride, above | 187 cglobal highbd_v_predictor_4x4, 3, 3, 1, dst, stride, above |
| 188 movq m0, [aboveq] | 188 movq m0, [aboveq] |
| 189 movq [dstq ], m0 | 189 movq [dstq ], m0 |
| 190 movq [dstq+strideq*2], m0 | 190 movq [dstq+strideq*2], m0 |
| 191 lea dstq, [dstq+strideq*4] | 191 lea dstq, [dstq+strideq*4] |
| 192 movq [dstq ], m0 | 192 movq [dstq ], m0 |
| 193 movq [dstq+strideq*2], m0 | 193 movq [dstq+strideq*2], m0 |
| 194 RET | 194 RET |
| 195 | 195 |
| 196 INIT_XMM sse2 | 196 INIT_XMM sse2 |
| 197 cglobal high_v_predictor_8x8, 3, 3, 1, dst, stride, above | 197 cglobal highbd_v_predictor_8x8, 3, 3, 1, dst, stride, above |
| 198 mova m0, [aboveq] | 198 mova m0, [aboveq] |
| 199 DEFINE_ARGS dst, stride, stride3 | 199 DEFINE_ARGS dst, stride, stride3 |
| 200 lea stride3q, [strideq*3] | 200 lea stride3q, [strideq*3] |
| 201 mova [dstq ], m0 | 201 mova [dstq ], m0 |
| 202 mova [dstq+strideq*2 ], m0 | 202 mova [dstq+strideq*2 ], m0 |
| 203 mova [dstq+strideq*4 ], m0 | 203 mova [dstq+strideq*4 ], m0 |
| 204 mova [dstq+stride3q*2], m0 | 204 mova [dstq+stride3q*2], m0 |
| 205 lea dstq, [dstq+strideq*8] | 205 lea dstq, [dstq+strideq*8] |
| 206 mova [dstq ], m0 | 206 mova [dstq ], m0 |
| 207 mova [dstq+strideq*2 ], m0 | 207 mova [dstq+strideq*2 ], m0 |
| 208 mova [dstq+strideq*4 ], m0 | 208 mova [dstq+strideq*4 ], m0 |
| 209 mova [dstq+stride3q*2], m0 | 209 mova [dstq+stride3q*2], m0 |
| 210 RET | 210 RET |
| 211 | 211 |
| 212 INIT_XMM sse2 | 212 INIT_XMM sse2 |
| 213 cglobal high_v_predictor_16x16, 3, 4, 2, dst, stride, above | 213 cglobal highbd_v_predictor_16x16, 3, 4, 2, dst, stride, above |
| 214 mova m0, [aboveq] | 214 mova m0, [aboveq] |
| 215 mova m1, [aboveq+16] | 215 mova m1, [aboveq+16] |
| 216 DEFINE_ARGS dst, stride, stride3, nlines4 | 216 DEFINE_ARGS dst, stride, stride3, nlines4 |
| 217 lea stride3q, [strideq*3] | 217 lea stride3q, [strideq*3] |
| 218 mov nlines4d, 4 | 218 mov nlines4d, 4 |
| 219 .loop: | 219 .loop: |
| 220 mova [dstq ], m0 | 220 mova [dstq ], m0 |
| 221 mova [dstq +16], m1 | 221 mova [dstq +16], m1 |
| 222 mova [dstq+strideq*2 ], m0 | 222 mova [dstq+strideq*2 ], m0 |
| 223 mova [dstq+strideq*2 +16], m1 | 223 mova [dstq+strideq*2 +16], m1 |
| 224 mova [dstq+strideq*4 ], m0 | 224 mova [dstq+strideq*4 ], m0 |
| 225 mova [dstq+strideq*4 +16], m1 | 225 mova [dstq+strideq*4 +16], m1 |
| 226 mova [dstq+stride3q*2 ], m0 | 226 mova [dstq+stride3q*2 ], m0 |
| 227 mova [dstq+stride3q*2+16], m1 | 227 mova [dstq+stride3q*2+16], m1 |
| 228 lea dstq, [dstq+strideq*8] | 228 lea dstq, [dstq+strideq*8] |
| 229 dec nlines4d | 229 dec nlines4d |
| 230 jnz .loop | 230 jnz .loop |
| 231 REP_RET | 231 REP_RET |
| 232 | 232 |
| 233 INIT_XMM sse2 | 233 INIT_XMM sse2 |
| 234 cglobal high_v_predictor_32x32, 3, 4, 4, dst, stride, above | 234 cglobal highbd_v_predictor_32x32, 3, 4, 4, dst, stride, above |
| 235 mova m0, [aboveq] | 235 mova m0, [aboveq] |
| 236 mova m1, [aboveq+16] | 236 mova m1, [aboveq+16] |
| 237 mova m2, [aboveq+32] | 237 mova m2, [aboveq+32] |
| 238 mova m3, [aboveq+48] | 238 mova m3, [aboveq+48] |
| 239 DEFINE_ARGS dst, stride, stride3, nlines4 | 239 DEFINE_ARGS dst, stride, stride3, nlines4 |
| 240 lea stride3q, [strideq*3] | 240 lea stride3q, [strideq*3] |
| 241 mov nlines4d, 8 | 241 mov nlines4d, 8 |
| 242 .loop: | 242 .loop: |
| 243 mova [dstq ], m0 | 243 mova [dstq ], m0 |
| 244 mova [dstq +16], m1 | 244 mova [dstq +16], m1 |
| (...skipping 10 matching lines...) Expand all Loading... |
| 255 mova [dstq+stride3q*2 ], m0 | 255 mova [dstq+stride3q*2 ], m0 |
| 256 mova [dstq+stride3q*2 +16], m1 | 256 mova [dstq+stride3q*2 +16], m1 |
| 257 mova [dstq+stride3q*2 +32], m2 | 257 mova [dstq+stride3q*2 +32], m2 |
| 258 mova [dstq+stride3q*2 +48], m3 | 258 mova [dstq+stride3q*2 +48], m3 |
| 259 lea dstq, [dstq+strideq*8] | 259 lea dstq, [dstq+strideq*8] |
| 260 dec nlines4d | 260 dec nlines4d |
| 261 jnz .loop | 261 jnz .loop |
| 262 REP_RET | 262 REP_RET |
| 263 | 263 |
| 264 INIT_MMX sse | 264 INIT_MMX sse |
| 265 cglobal high_tm_predictor_4x4, 5, 6, 5, dst, stride, above, left, bps, one | 265 cglobal highbd_tm_predictor_4x4, 5, 6, 5, dst, stride, above, left, bps, one |
| 266 movd m1, [aboveq-2] | 266 movd m1, [aboveq-2] |
| 267 movq m0, [aboveq] | 267 movq m0, [aboveq] |
| 268 pshufw m1, m1, 0x0 | 268 pshufw m1, m1, 0x0 |
| 269 ; Get the values to compute the maximum value at this bit depth | 269 ; Get the values to compute the maximum value at this bit depth |
| 270 mov oned, 1 | 270 mov oned, 1 |
| 271 movd m3, oned | 271 movd m3, oned |
| 272 movd m4, bpsd | 272 movd m4, bpsd |
| 273 pshufw m3, m3, 0x0 | 273 pshufw m3, m3, 0x0 |
| 274 DEFINE_ARGS dst, stride, line, left | 274 DEFINE_ARGS dst, stride, line, left |
| 275 mov lineq, -2 | 275 mov lineq, -2 |
| (...skipping 17 matching lines...) Expand all Loading... |
| 293 pmaxsw m2, m4 | 293 pmaxsw m2, m4 |
| 294 ;Store the values | 294 ;Store the values |
| 295 movq [dstq ], m1 | 295 movq [dstq ], m1 |
| 296 movq [dstq+strideq*2], m2 | 296 movq [dstq+strideq*2], m2 |
| 297 lea dstq, [dstq+strideq*4] | 297 lea dstq, [dstq+strideq*4] |
| 298 inc lineq | 298 inc lineq |
| 299 jnz .loop | 299 jnz .loop |
| 300 REP_RET | 300 REP_RET |
| 301 | 301 |
| 302 INIT_XMM sse2 | 302 INIT_XMM sse2 |
| 303 cglobal high_tm_predictor_8x8, 5, 6, 5, dst, stride, above, left, bps, one | 303 cglobal highbd_tm_predictor_8x8, 5, 6, 5, dst, stride, above, left, bps, one |
| 304 movd m1, [aboveq-2] | 304 movd m1, [aboveq-2] |
| 305 mova m0, [aboveq] | 305 mova m0, [aboveq] |
| 306 pshuflw m1, m1, 0x0 | 306 pshuflw m1, m1, 0x0 |
| 307 ; Get the values to compute the maximum value at this bit depth | 307 ; Get the values to compute the maximum value at this bit depth |
| 308 mov oned, 1 | 308 mov oned, 1 |
| 309 pxor m3, m3 | 309 pxor m3, m3 |
| 310 pxor m4, m4 | 310 pxor m4, m4 |
| 311 pinsrw m3, oned, 0 | 311 pinsrw m3, oned, 0 |
| 312 pinsrw m4, bpsd, 0 | 312 pinsrw m4, bpsd, 0 |
| 313 pshuflw m3, m3, 0x0 | 313 pshuflw m3, m3, 0x0 |
| (...skipping 24 matching lines...) Expand all Loading... |
| 338 ;Store the values | 338 ;Store the values |
| 339 mova [dstq ], m1 | 339 mova [dstq ], m1 |
| 340 mova [dstq+strideq*2], m2 | 340 mova [dstq+strideq*2], m2 |
| 341 lea dstq, [dstq+strideq*4] | 341 lea dstq, [dstq+strideq*4] |
| 342 inc lineq | 342 inc lineq |
| 343 jnz .loop | 343 jnz .loop |
| 344 REP_RET | 344 REP_RET |
| 345 | 345 |
| 346 %if ARCH_X86_64 | 346 %if ARCH_X86_64 |
| 347 INIT_XMM sse2 | 347 INIT_XMM sse2 |
| 348 cglobal high_tm_predictor_16x16, 5, 6, 8, dst, stride, above, left, bps, one | 348 cglobal highbd_tm_predictor_16x16, 5, 6, 8, dst, stride, above, left, bps, one |
| 349 movd m2, [aboveq-2] | 349 movd m2, [aboveq-2] |
| 350 mova m0, [aboveq] | 350 mova m0, [aboveq] |
| 351 mova m1, [aboveq+16] | 351 mova m1, [aboveq+16] |
| 352 pshuflw m2, m2, 0x0 | 352 pshuflw m2, m2, 0x0 |
| 353 ; Get the values to compute the maximum value at this bit depth | 353 ; Get the values to compute the maximum value at this bit depth |
| 354 mov oned, 1 | 354 mov oned, 1 |
| 355 pxor m7, m7 | 355 pxor m7, m7 |
| 356 pxor m8, m8 | 356 pxor m8, m8 |
| 357 pinsrw m7, oned, 0 | 357 pinsrw m7, oned, 0 |
| 358 pinsrw m8, bpsd, 0 | 358 pinsrw m8, bpsd, 0 |
| (...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 392 mova [dstq ], m4 | 392 mova [dstq ], m4 |
| 393 mova [dstq+strideq*2 ], m5 | 393 mova [dstq+strideq*2 ], m5 |
| 394 mova [dstq +16], m2 | 394 mova [dstq +16], m2 |
| 395 mova [dstq+strideq*2+16], m3 | 395 mova [dstq+strideq*2+16], m3 |
| 396 lea dstq, [dstq+strideq*4] | 396 lea dstq, [dstq+strideq*4] |
| 397 inc lineq | 397 inc lineq |
| 398 jnz .loop | 398 jnz .loop |
| 399 REP_RET | 399 REP_RET |
| 400 | 400 |
| 401 INIT_XMM sse2 | 401 INIT_XMM sse2 |
| 402 cglobal high_tm_predictor_32x32, 5, 6, 12, dst, stride, above, left, bps, one | 402 cglobal highbd_tm_predictor_32x32, 5, 6, 12, dst, stride, above, left, bps, one |
| 403 movd m0, [aboveq-2] | 403 movd m0, [aboveq-2] |
| 404 mova m1, [aboveq] | 404 mova m1, [aboveq] |
| 405 mova m2, [aboveq+16] | 405 mova m2, [aboveq+16] |
| 406 mova m3, [aboveq+32] | 406 mova m3, [aboveq+32] |
| 407 mova m4, [aboveq+48] | 407 mova m4, [aboveq+48] |
| 408 pshuflw m0, m0, 0x0 | 408 pshuflw m0, m0, 0x0 |
| 409 ; Get the values to compute the maximum value at this bit depth | 409 ; Get the values to compute the maximum value at this bit depth |
| 410 mov oned, 1 | 410 mov oned, 1 |
| 411 pxor m10, m10 | 411 pxor m10, m10 |
| 412 pxor m11, m11 | 412 pxor m11, m11 |
| (...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 467 ;Store these values | 467 ;Store these values |
| 468 mova [dstq+strideq*2 ], m7 | 468 mova [dstq+strideq*2 ], m7 |
| 469 mova [dstq+strideq*2+16], m8 | 469 mova [dstq+strideq*2+16], m8 |
| 470 mova [dstq+strideq*2+32], m9 | 470 mova [dstq+strideq*2+32], m9 |
| 471 mova [dstq+strideq*2+48], m6 | 471 mova [dstq+strideq*2+48], m6 |
| 472 lea dstq, [dstq+strideq*4] | 472 lea dstq, [dstq+strideq*4] |
| 473 inc lineq | 473 inc lineq |
| 474 jnz .loop | 474 jnz .loop |
| 475 REP_RET | 475 REP_RET |
| 476 %endif | 476 %endif |
| OLD | NEW |