OLD | NEW |
1 ; | 1 ; |
2 ; Copyright (c) 2014 The WebM project authors. All Rights Reserved. | 2 ; Copyright (c) 2014 The WebM project authors. All Rights Reserved. |
3 ; | 3 ; |
4 ; Use of this source code is governed by a BSD-style license | 4 ; Use of this source code is governed by a BSD-style license |
5 ; that can be found in the LICENSE file in the root of the source | 5 ; that can be found in the LICENSE file in the root of the source |
6 ; tree. An additional intellectual property rights grant can be found | 6 ; tree. An additional intellectual property rights grant can be found |
7 ; in the file PATENTS. All contributing project authors may | 7 ; in the file PATENTS. All contributing project authors may |
8 ; be found in the AUTHORS file in the root of the source tree. | 8 ; be found in the AUTHORS file in the root of the source tree. |
9 ; | 9 ; |
10 | 10 |
11 %include "third_party/x86inc/x86inc.asm" | 11 %include "third_party/x86inc/x86inc.asm" |
12 | 12 |
13 SECTION_RODATA | 13 SECTION_RODATA |
14 pw_4: times 8 dw 4 | 14 pw_4: times 8 dw 4 |
15 pw_8: times 8 dw 8 | 15 pw_8: times 8 dw 8 |
16 pw_16: times 4 dd 16 | 16 pw_16: times 4 dd 16 |
17 pw_32: times 4 dd 32 | 17 pw_32: times 4 dd 32 |
18 | 18 |
19 SECTION .text | 19 SECTION .text |
20 INIT_MMX sse | 20 INIT_MMX sse |
21 cglobal high_dc_predictor_4x4, 4, 5, 4, dst, stride, above, left, goffset | 21 cglobal highbd_dc_predictor_4x4, 4, 5, 4, dst, stride, above, left, goffset |
22 GET_GOT goffsetq | 22 GET_GOT goffsetq |
23 | 23 |
24 movq m0, [aboveq] | 24 movq m0, [aboveq] |
25 movq m2, [leftq] | 25 movq m2, [leftq] |
26 DEFINE_ARGS dst, stride, one | 26 DEFINE_ARGS dst, stride, one |
27 mov oned, 0x0001 | 27 mov oned, 0x0001 |
28 pxor m1, m1 | 28 pxor m1, m1 |
29 movd m3, oned | 29 movd m3, oned |
30 pshufw m3, m3, 0x0 | 30 pshufw m3, m3, 0x0 |
31 paddw m0, m2 | 31 paddw m0, m2 |
32 pmaddwd m0, m3 | 32 pmaddwd m0, m3 |
33 packssdw m0, m1 | 33 packssdw m0, m1 |
34 pmaddwd m0, m3 | 34 pmaddwd m0, m3 |
35 paddw m0, [GLOBAL(pw_4)] | 35 paddw m0, [GLOBAL(pw_4)] |
36 psraw m0, 3 | 36 psraw m0, 3 |
37 pshufw m0, m0, 0x0 | 37 pshufw m0, m0, 0x0 |
38 movq [dstq ], m0 | 38 movq [dstq ], m0 |
39 movq [dstq+strideq*2], m0 | 39 movq [dstq+strideq*2], m0 |
40 lea dstq, [dstq+strideq*4] | 40 lea dstq, [dstq+strideq*4] |
41 movq [dstq ], m0 | 41 movq [dstq ], m0 |
42 movq [dstq+strideq*2], m0 | 42 movq [dstq+strideq*2], m0 |
43 | 43 |
44 RESTORE_GOT | 44 RESTORE_GOT |
45 RET | 45 RET |
46 | 46 |
47 INIT_XMM sse2 | 47 INIT_XMM sse2 |
48 cglobal high_dc_predictor_8x8, 4, 5, 4, dst, stride, above, left, goffset | 48 cglobal highbd_dc_predictor_8x8, 4, 5, 4, dst, stride, above, left, goffset |
49 GET_GOT goffsetq | 49 GET_GOT goffsetq |
50 | 50 |
51 pxor m1, m1 | 51 pxor m1, m1 |
52 mova m0, [aboveq] | 52 mova m0, [aboveq] |
53 mova m2, [leftq] | 53 mova m2, [leftq] |
54 DEFINE_ARGS dst, stride, stride3, one | 54 DEFINE_ARGS dst, stride, stride3, one |
55 mov oned, 0x00010001 | 55 mov oned, 0x00010001 |
56 lea stride3q, [strideq*3] | 56 lea stride3q, [strideq*3] |
57 movd m3, oned | 57 movd m3, oned |
58 pshufd m3, m3, 0x0 | 58 pshufd m3, m3, 0x0 |
(...skipping 14 matching lines...) Expand all Loading... |
73 lea dstq, [dstq+strideq*8] | 73 lea dstq, [dstq+strideq*8] |
74 mova [dstq ], m0 | 74 mova [dstq ], m0 |
75 mova [dstq+strideq*2 ], m0 | 75 mova [dstq+strideq*2 ], m0 |
76 mova [dstq+strideq*4 ], m0 | 76 mova [dstq+strideq*4 ], m0 |
77 mova [dstq+stride3q*2], m0 | 77 mova [dstq+stride3q*2], m0 |
78 | 78 |
79 RESTORE_GOT | 79 RESTORE_GOT |
80 RET | 80 RET |
81 | 81 |
82 INIT_XMM sse2 | 82 INIT_XMM sse2 |
83 cglobal high_dc_predictor_16x16, 4, 5, 5, dst, stride, above, left, goffset | 83 cglobal highbd_dc_predictor_16x16, 4, 5, 5, dst, stride, above, left, goffset |
84 GET_GOT goffsetq | 84 GET_GOT goffsetq |
85 | 85 |
86 pxor m1, m1 | 86 pxor m1, m1 |
87 mova m0, [aboveq] | 87 mova m0, [aboveq] |
88 mova m3, [aboveq+16] | 88 mova m3, [aboveq+16] |
89 mova m2, [leftq] | 89 mova m2, [leftq] |
90 mova m4, [leftq+16] | 90 mova m4, [leftq+16] |
91 DEFINE_ARGS dst, stride, stride3, lines4 | 91 DEFINE_ARGS dst, stride, stride3, lines4 |
92 lea stride3q, [strideq*3] | 92 lea stride3q, [strideq*3] |
93 mov lines4d, 4 | 93 mov lines4d, 4 |
(...skipping 23 matching lines...) Expand all Loading... |
117 mova [dstq+stride3q*2+16], m0 | 117 mova [dstq+stride3q*2+16], m0 |
118 lea dstq, [dstq+strideq*8] | 118 lea dstq, [dstq+strideq*8] |
119 dec lines4d | 119 dec lines4d |
120 jnz .loop | 120 jnz .loop |
121 | 121 |
122 RESTORE_GOT | 122 RESTORE_GOT |
123 REP_RET | 123 REP_RET |
124 | 124 |
125 %if ARCH_X86_64 | 125 %if ARCH_X86_64 |
126 INIT_XMM sse2 | 126 INIT_XMM sse2 |
127 cglobal high_dc_predictor_32x32, 4, 5, 9, dst, stride, above, left, goffset | 127 cglobal highbd_dc_predictor_32x32, 4, 5, 9, dst, stride, above, left, goffset |
128 GET_GOT goffsetq | 128 GET_GOT goffsetq |
129 | 129 |
130 pxor m1, m1 | 130 pxor m1, m1 |
131 mova m0, [aboveq] | 131 mova m0, [aboveq] |
132 mova m2, [aboveq+16] | 132 mova m2, [aboveq+16] |
133 mova m3, [aboveq+32] | 133 mova m3, [aboveq+32] |
134 mova m4, [aboveq+48] | 134 mova m4, [aboveq+48] |
135 mova m5, [leftq] | 135 mova m5, [leftq] |
136 mova m6, [leftq+16] | 136 mova m6, [leftq+16] |
137 mova m7, [leftq+32] | 137 mova m7, [leftq+32] |
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
177 mova [dstq+stride3q*2 +48], m0 | 177 mova [dstq+stride3q*2 +48], m0 |
178 lea dstq, [dstq+strideq*8] | 178 lea dstq, [dstq+strideq*8] |
179 dec lines4d | 179 dec lines4d |
180 jnz .loop | 180 jnz .loop |
181 | 181 |
182 RESTORE_GOT | 182 RESTORE_GOT |
183 REP_RET | 183 REP_RET |
184 %endif | 184 %endif |
185 | 185 |
186 INIT_MMX sse | 186 INIT_MMX sse |
187 cglobal high_v_predictor_4x4, 3, 3, 1, dst, stride, above | 187 cglobal highbd_v_predictor_4x4, 3, 3, 1, dst, stride, above |
188 movq m0, [aboveq] | 188 movq m0, [aboveq] |
189 movq [dstq ], m0 | 189 movq [dstq ], m0 |
190 movq [dstq+strideq*2], m0 | 190 movq [dstq+strideq*2], m0 |
191 lea dstq, [dstq+strideq*4] | 191 lea dstq, [dstq+strideq*4] |
192 movq [dstq ], m0 | 192 movq [dstq ], m0 |
193 movq [dstq+strideq*2], m0 | 193 movq [dstq+strideq*2], m0 |
194 RET | 194 RET |
195 | 195 |
196 INIT_XMM sse2 | 196 INIT_XMM sse2 |
197 cglobal high_v_predictor_8x8, 3, 3, 1, dst, stride, above | 197 cglobal highbd_v_predictor_8x8, 3, 3, 1, dst, stride, above |
198 mova m0, [aboveq] | 198 mova m0, [aboveq] |
199 DEFINE_ARGS dst, stride, stride3 | 199 DEFINE_ARGS dst, stride, stride3 |
200 lea stride3q, [strideq*3] | 200 lea stride3q, [strideq*3] |
201 mova [dstq ], m0 | 201 mova [dstq ], m0 |
202 mova [dstq+strideq*2 ], m0 | 202 mova [dstq+strideq*2 ], m0 |
203 mova [dstq+strideq*4 ], m0 | 203 mova [dstq+strideq*4 ], m0 |
204 mova [dstq+stride3q*2], m0 | 204 mova [dstq+stride3q*2], m0 |
205 lea dstq, [dstq+strideq*8] | 205 lea dstq, [dstq+strideq*8] |
206 mova [dstq ], m0 | 206 mova [dstq ], m0 |
207 mova [dstq+strideq*2 ], m0 | 207 mova [dstq+strideq*2 ], m0 |
208 mova [dstq+strideq*4 ], m0 | 208 mova [dstq+strideq*4 ], m0 |
209 mova [dstq+stride3q*2], m0 | 209 mova [dstq+stride3q*2], m0 |
210 RET | 210 RET |
211 | 211 |
212 INIT_XMM sse2 | 212 INIT_XMM sse2 |
213 cglobal high_v_predictor_16x16, 3, 4, 2, dst, stride, above | 213 cglobal highbd_v_predictor_16x16, 3, 4, 2, dst, stride, above |
214 mova m0, [aboveq] | 214 mova m0, [aboveq] |
215 mova m1, [aboveq+16] | 215 mova m1, [aboveq+16] |
216 DEFINE_ARGS dst, stride, stride3, nlines4 | 216 DEFINE_ARGS dst, stride, stride3, nlines4 |
217 lea stride3q, [strideq*3] | 217 lea stride3q, [strideq*3] |
218 mov nlines4d, 4 | 218 mov nlines4d, 4 |
219 .loop: | 219 .loop: |
220 mova [dstq ], m0 | 220 mova [dstq ], m0 |
221 mova [dstq +16], m1 | 221 mova [dstq +16], m1 |
222 mova [dstq+strideq*2 ], m0 | 222 mova [dstq+strideq*2 ], m0 |
223 mova [dstq+strideq*2 +16], m1 | 223 mova [dstq+strideq*2 +16], m1 |
224 mova [dstq+strideq*4 ], m0 | 224 mova [dstq+strideq*4 ], m0 |
225 mova [dstq+strideq*4 +16], m1 | 225 mova [dstq+strideq*4 +16], m1 |
226 mova [dstq+stride3q*2 ], m0 | 226 mova [dstq+stride3q*2 ], m0 |
227 mova [dstq+stride3q*2+16], m1 | 227 mova [dstq+stride3q*2+16], m1 |
228 lea dstq, [dstq+strideq*8] | 228 lea dstq, [dstq+strideq*8] |
229 dec nlines4d | 229 dec nlines4d |
230 jnz .loop | 230 jnz .loop |
231 REP_RET | 231 REP_RET |
232 | 232 |
233 INIT_XMM sse2 | 233 INIT_XMM sse2 |
234 cglobal high_v_predictor_32x32, 3, 4, 4, dst, stride, above | 234 cglobal highbd_v_predictor_32x32, 3, 4, 4, dst, stride, above |
235 mova m0, [aboveq] | 235 mova m0, [aboveq] |
236 mova m1, [aboveq+16] | 236 mova m1, [aboveq+16] |
237 mova m2, [aboveq+32] | 237 mova m2, [aboveq+32] |
238 mova m3, [aboveq+48] | 238 mova m3, [aboveq+48] |
239 DEFINE_ARGS dst, stride, stride3, nlines4 | 239 DEFINE_ARGS dst, stride, stride3, nlines4 |
240 lea stride3q, [strideq*3] | 240 lea stride3q, [strideq*3] |
241 mov nlines4d, 8 | 241 mov nlines4d, 8 |
242 .loop: | 242 .loop: |
243 mova [dstq ], m0 | 243 mova [dstq ], m0 |
244 mova [dstq +16], m1 | 244 mova [dstq +16], m1 |
(...skipping 10 matching lines...) Expand all Loading... |
255 mova [dstq+stride3q*2 ], m0 | 255 mova [dstq+stride3q*2 ], m0 |
256 mova [dstq+stride3q*2 +16], m1 | 256 mova [dstq+stride3q*2 +16], m1 |
257 mova [dstq+stride3q*2 +32], m2 | 257 mova [dstq+stride3q*2 +32], m2 |
258 mova [dstq+stride3q*2 +48], m3 | 258 mova [dstq+stride3q*2 +48], m3 |
259 lea dstq, [dstq+strideq*8] | 259 lea dstq, [dstq+strideq*8] |
260 dec nlines4d | 260 dec nlines4d |
261 jnz .loop | 261 jnz .loop |
262 REP_RET | 262 REP_RET |
263 | 263 |
264 INIT_MMX sse | 264 INIT_MMX sse |
265 cglobal high_tm_predictor_4x4, 5, 6, 5, dst, stride, above, left, bps, one | 265 cglobal highbd_tm_predictor_4x4, 5, 6, 5, dst, stride, above, left, bps, one |
266 movd m1, [aboveq-2] | 266 movd m1, [aboveq-2] |
267 movq m0, [aboveq] | 267 movq m0, [aboveq] |
268 pshufw m1, m1, 0x0 | 268 pshufw m1, m1, 0x0 |
269 ; Get the values to compute the maximum value at this bit depth | 269 ; Get the values to compute the maximum value at this bit depth |
270 mov oned, 1 | 270 mov oned, 1 |
271 movd m3, oned | 271 movd m3, oned |
272 movd m4, bpsd | 272 movd m4, bpsd |
273 pshufw m3, m3, 0x0 | 273 pshufw m3, m3, 0x0 |
274 DEFINE_ARGS dst, stride, line, left | 274 DEFINE_ARGS dst, stride, line, left |
275 mov lineq, -2 | 275 mov lineq, -2 |
(...skipping 17 matching lines...) Expand all Loading... |
293 pmaxsw m2, m4 | 293 pmaxsw m2, m4 |
294 ;Store the values | 294 ;Store the values |
295 movq [dstq ], m1 | 295 movq [dstq ], m1 |
296 movq [dstq+strideq*2], m2 | 296 movq [dstq+strideq*2], m2 |
297 lea dstq, [dstq+strideq*4] | 297 lea dstq, [dstq+strideq*4] |
298 inc lineq | 298 inc lineq |
299 jnz .loop | 299 jnz .loop |
300 REP_RET | 300 REP_RET |
301 | 301 |
302 INIT_XMM sse2 | 302 INIT_XMM sse2 |
303 cglobal high_tm_predictor_8x8, 5, 6, 5, dst, stride, above, left, bps, one | 303 cglobal highbd_tm_predictor_8x8, 5, 6, 5, dst, stride, above, left, bps, one |
304 movd m1, [aboveq-2] | 304 movd m1, [aboveq-2] |
305 mova m0, [aboveq] | 305 mova m0, [aboveq] |
306 pshuflw m1, m1, 0x0 | 306 pshuflw m1, m1, 0x0 |
307 ; Get the values to compute the maximum value at this bit depth | 307 ; Get the values to compute the maximum value at this bit depth |
308 mov oned, 1 | 308 mov oned, 1 |
309 pxor m3, m3 | 309 pxor m3, m3 |
310 pxor m4, m4 | 310 pxor m4, m4 |
311 pinsrw m3, oned, 0 | 311 pinsrw m3, oned, 0 |
312 pinsrw m4, bpsd, 0 | 312 pinsrw m4, bpsd, 0 |
313 pshuflw m3, m3, 0x0 | 313 pshuflw m3, m3, 0x0 |
(...skipping 24 matching lines...) Expand all Loading... |
338 ;Store the values | 338 ;Store the values |
339 mova [dstq ], m1 | 339 mova [dstq ], m1 |
340 mova [dstq+strideq*2], m2 | 340 mova [dstq+strideq*2], m2 |
341 lea dstq, [dstq+strideq*4] | 341 lea dstq, [dstq+strideq*4] |
342 inc lineq | 342 inc lineq |
343 jnz .loop | 343 jnz .loop |
344 REP_RET | 344 REP_RET |
345 | 345 |
346 %if ARCH_X86_64 | 346 %if ARCH_X86_64 |
347 INIT_XMM sse2 | 347 INIT_XMM sse2 |
348 cglobal high_tm_predictor_16x16, 5, 6, 8, dst, stride, above, left, bps, one | 348 cglobal highbd_tm_predictor_16x16, 5, 6, 8, dst, stride, above, left, bps, one |
349 movd m2, [aboveq-2] | 349 movd m2, [aboveq-2] |
350 mova m0, [aboveq] | 350 mova m0, [aboveq] |
351 mova m1, [aboveq+16] | 351 mova m1, [aboveq+16] |
352 pshuflw m2, m2, 0x0 | 352 pshuflw m2, m2, 0x0 |
353 ; Get the values to compute the maximum value at this bit depth | 353 ; Get the values to compute the maximum value at this bit depth |
354 mov oned, 1 | 354 mov oned, 1 |
355 pxor m7, m7 | 355 pxor m7, m7 |
356 pxor m8, m8 | 356 pxor m8, m8 |
357 pinsrw m7, oned, 0 | 357 pinsrw m7, oned, 0 |
358 pinsrw m8, bpsd, 0 | 358 pinsrw m8, bpsd, 0 |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
392 mova [dstq ], m4 | 392 mova [dstq ], m4 |
393 mova [dstq+strideq*2 ], m5 | 393 mova [dstq+strideq*2 ], m5 |
394 mova [dstq +16], m2 | 394 mova [dstq +16], m2 |
395 mova [dstq+strideq*2+16], m3 | 395 mova [dstq+strideq*2+16], m3 |
396 lea dstq, [dstq+strideq*4] | 396 lea dstq, [dstq+strideq*4] |
397 inc lineq | 397 inc lineq |
398 jnz .loop | 398 jnz .loop |
399 REP_RET | 399 REP_RET |
400 | 400 |
401 INIT_XMM sse2 | 401 INIT_XMM sse2 |
402 cglobal high_tm_predictor_32x32, 5, 6, 12, dst, stride, above, left, bps, one | 402 cglobal highbd_tm_predictor_32x32, 5, 6, 12, dst, stride, above, left, bps, one |
403 movd m0, [aboveq-2] | 403 movd m0, [aboveq-2] |
404 mova m1, [aboveq] | 404 mova m1, [aboveq] |
405 mova m2, [aboveq+16] | 405 mova m2, [aboveq+16] |
406 mova m3, [aboveq+32] | 406 mova m3, [aboveq+32] |
407 mova m4, [aboveq+48] | 407 mova m4, [aboveq+48] |
408 pshuflw m0, m0, 0x0 | 408 pshuflw m0, m0, 0x0 |
409 ; Get the values to compute the maximum value at this bit depth | 409 ; Get the values to compute the maximum value at this bit depth |
410 mov oned, 1 | 410 mov oned, 1 |
411 pxor m10, m10 | 411 pxor m10, m10 |
412 pxor m11, m11 | 412 pxor m11, m11 |
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
467 ;Store these values | 467 ;Store these values |
468 mova [dstq+strideq*2 ], m7 | 468 mova [dstq+strideq*2 ], m7 |
469 mova [dstq+strideq*2+16], m8 | 469 mova [dstq+strideq*2+16], m8 |
470 mova [dstq+strideq*2+32], m9 | 470 mova [dstq+strideq*2+32], m9 |
471 mova [dstq+strideq*2+48], m6 | 471 mova [dstq+strideq*2+48], m6 |
472 lea dstq, [dstq+strideq*4] | 472 lea dstq, [dstq+strideq*4] |
473 inc lineq | 473 inc lineq |
474 jnz .loop | 474 jnz .loop |
475 REP_RET | 475 REP_RET |
476 %endif | 476 %endif |
OLD | NEW |