Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(658)

Side by Side Diff: source/libvpx/vp9/common/x86/vp9_high_intrapred_sse2.asm

Issue 668403002: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/
Patch Set: Created 6 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 ; 1 ;
2 ; Copyright (c) 2014 The WebM project authors. All Rights Reserved. 2 ; Copyright (c) 2014 The WebM project authors. All Rights Reserved.
3 ; 3 ;
4 ; Use of this source code is governed by a BSD-style license 4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source 5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found 6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may 7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree. 8 ; be found in the AUTHORS file in the root of the source tree.
9 ; 9 ;
10 10
11 %include "third_party/x86inc/x86inc.asm" 11 %include "third_party/x86inc/x86inc.asm"
12 12
13 SECTION_RODATA 13 SECTION_RODATA
14 pw_4: times 8 dw 4 14 pw_4: times 8 dw 4
15 pw_8: times 8 dw 8 15 pw_8: times 8 dw 8
16 pw_16: times 4 dd 16 16 pw_16: times 4 dd 16
17 pw_32: times 4 dd 32 17 pw_32: times 4 dd 32
18 18
19 SECTION .text 19 SECTION .text
20 INIT_MMX sse 20 INIT_MMX sse
21 cglobal high_dc_predictor_4x4, 4, 5, 4, dst, stride, above, left, goffset 21 cglobal highbd_dc_predictor_4x4, 4, 5, 4, dst, stride, above, left, goffset
22 GET_GOT goffsetq 22 GET_GOT goffsetq
23 23
24 movq m0, [aboveq] 24 movq m0, [aboveq]
25 movq m2, [leftq] 25 movq m2, [leftq]
26 DEFINE_ARGS dst, stride, one 26 DEFINE_ARGS dst, stride, one
27 mov oned, 0x0001 27 mov oned, 0x0001
28 pxor m1, m1 28 pxor m1, m1
29 movd m3, oned 29 movd m3, oned
30 pshufw m3, m3, 0x0 30 pshufw m3, m3, 0x0
31 paddw m0, m2 31 paddw m0, m2
32 pmaddwd m0, m3 32 pmaddwd m0, m3
33 packssdw m0, m1 33 packssdw m0, m1
34 pmaddwd m0, m3 34 pmaddwd m0, m3
35 paddw m0, [GLOBAL(pw_4)] 35 paddw m0, [GLOBAL(pw_4)]
36 psraw m0, 3 36 psraw m0, 3
37 pshufw m0, m0, 0x0 37 pshufw m0, m0, 0x0
38 movq [dstq ], m0 38 movq [dstq ], m0
39 movq [dstq+strideq*2], m0 39 movq [dstq+strideq*2], m0
40 lea dstq, [dstq+strideq*4] 40 lea dstq, [dstq+strideq*4]
41 movq [dstq ], m0 41 movq [dstq ], m0
42 movq [dstq+strideq*2], m0 42 movq [dstq+strideq*2], m0
43 43
44 RESTORE_GOT 44 RESTORE_GOT
45 RET 45 RET
46 46
47 INIT_XMM sse2 47 INIT_XMM sse2
48 cglobal high_dc_predictor_8x8, 4, 5, 4, dst, stride, above, left, goffset 48 cglobal highbd_dc_predictor_8x8, 4, 5, 4, dst, stride, above, left, goffset
49 GET_GOT goffsetq 49 GET_GOT goffsetq
50 50
51 pxor m1, m1 51 pxor m1, m1
52 mova m0, [aboveq] 52 mova m0, [aboveq]
53 mova m2, [leftq] 53 mova m2, [leftq]
54 DEFINE_ARGS dst, stride, stride3, one 54 DEFINE_ARGS dst, stride, stride3, one
55 mov oned, 0x00010001 55 mov oned, 0x00010001
56 lea stride3q, [strideq*3] 56 lea stride3q, [strideq*3]
57 movd m3, oned 57 movd m3, oned
58 pshufd m3, m3, 0x0 58 pshufd m3, m3, 0x0
(...skipping 14 matching lines...) Expand all
73 lea dstq, [dstq+strideq*8] 73 lea dstq, [dstq+strideq*8]
74 mova [dstq ], m0 74 mova [dstq ], m0
75 mova [dstq+strideq*2 ], m0 75 mova [dstq+strideq*2 ], m0
76 mova [dstq+strideq*4 ], m0 76 mova [dstq+strideq*4 ], m0
77 mova [dstq+stride3q*2], m0 77 mova [dstq+stride3q*2], m0
78 78
79 RESTORE_GOT 79 RESTORE_GOT
80 RET 80 RET
81 81
82 INIT_XMM sse2 82 INIT_XMM sse2
83 cglobal high_dc_predictor_16x16, 4, 5, 5, dst, stride, above, left, goffset 83 cglobal highbd_dc_predictor_16x16, 4, 5, 5, dst, stride, above, left, goffset
84 GET_GOT goffsetq 84 GET_GOT goffsetq
85 85
86 pxor m1, m1 86 pxor m1, m1
87 mova m0, [aboveq] 87 mova m0, [aboveq]
88 mova m3, [aboveq+16] 88 mova m3, [aboveq+16]
89 mova m2, [leftq] 89 mova m2, [leftq]
90 mova m4, [leftq+16] 90 mova m4, [leftq+16]
91 DEFINE_ARGS dst, stride, stride3, lines4 91 DEFINE_ARGS dst, stride, stride3, lines4
92 lea stride3q, [strideq*3] 92 lea stride3q, [strideq*3]
93 mov lines4d, 4 93 mov lines4d, 4
(...skipping 23 matching lines...) Expand all
117 mova [dstq+stride3q*2+16], m0 117 mova [dstq+stride3q*2+16], m0
118 lea dstq, [dstq+strideq*8] 118 lea dstq, [dstq+strideq*8]
119 dec lines4d 119 dec lines4d
120 jnz .loop 120 jnz .loop
121 121
122 RESTORE_GOT 122 RESTORE_GOT
123 REP_RET 123 REP_RET
124 124
125 %if ARCH_X86_64 125 %if ARCH_X86_64
126 INIT_XMM sse2 126 INIT_XMM sse2
127 cglobal high_dc_predictor_32x32, 4, 5, 9, dst, stride, above, left, goffset 127 cglobal highbd_dc_predictor_32x32, 4, 5, 9, dst, stride, above, left, goffset
128 GET_GOT goffsetq 128 GET_GOT goffsetq
129 129
130 pxor m1, m1 130 pxor m1, m1
131 mova m0, [aboveq] 131 mova m0, [aboveq]
132 mova m2, [aboveq+16] 132 mova m2, [aboveq+16]
133 mova m3, [aboveq+32] 133 mova m3, [aboveq+32]
134 mova m4, [aboveq+48] 134 mova m4, [aboveq+48]
135 mova m5, [leftq] 135 mova m5, [leftq]
136 mova m6, [leftq+16] 136 mova m6, [leftq+16]
137 mova m7, [leftq+32] 137 mova m7, [leftq+32]
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
177 mova [dstq+stride3q*2 +48], m0 177 mova [dstq+stride3q*2 +48], m0
178 lea dstq, [dstq+strideq*8] 178 lea dstq, [dstq+strideq*8]
179 dec lines4d 179 dec lines4d
180 jnz .loop 180 jnz .loop
181 181
182 RESTORE_GOT 182 RESTORE_GOT
183 REP_RET 183 REP_RET
184 %endif 184 %endif
185 185
186 INIT_MMX sse 186 INIT_MMX sse
187 cglobal high_v_predictor_4x4, 3, 3, 1, dst, stride, above 187 cglobal highbd_v_predictor_4x4, 3, 3, 1, dst, stride, above
188 movq m0, [aboveq] 188 movq m0, [aboveq]
189 movq [dstq ], m0 189 movq [dstq ], m0
190 movq [dstq+strideq*2], m0 190 movq [dstq+strideq*2], m0
191 lea dstq, [dstq+strideq*4] 191 lea dstq, [dstq+strideq*4]
192 movq [dstq ], m0 192 movq [dstq ], m0
193 movq [dstq+strideq*2], m0 193 movq [dstq+strideq*2], m0
194 RET 194 RET
195 195
196 INIT_XMM sse2 196 INIT_XMM sse2
197 cglobal high_v_predictor_8x8, 3, 3, 1, dst, stride, above 197 cglobal highbd_v_predictor_8x8, 3, 3, 1, dst, stride, above
198 mova m0, [aboveq] 198 mova m0, [aboveq]
199 DEFINE_ARGS dst, stride, stride3 199 DEFINE_ARGS dst, stride, stride3
200 lea stride3q, [strideq*3] 200 lea stride3q, [strideq*3]
201 mova [dstq ], m0 201 mova [dstq ], m0
202 mova [dstq+strideq*2 ], m0 202 mova [dstq+strideq*2 ], m0
203 mova [dstq+strideq*4 ], m0 203 mova [dstq+strideq*4 ], m0
204 mova [dstq+stride3q*2], m0 204 mova [dstq+stride3q*2], m0
205 lea dstq, [dstq+strideq*8] 205 lea dstq, [dstq+strideq*8]
206 mova [dstq ], m0 206 mova [dstq ], m0
207 mova [dstq+strideq*2 ], m0 207 mova [dstq+strideq*2 ], m0
208 mova [dstq+strideq*4 ], m0 208 mova [dstq+strideq*4 ], m0
209 mova [dstq+stride3q*2], m0 209 mova [dstq+stride3q*2], m0
210 RET 210 RET
211 211
212 INIT_XMM sse2 212 INIT_XMM sse2
213 cglobal high_v_predictor_16x16, 3, 4, 2, dst, stride, above 213 cglobal highbd_v_predictor_16x16, 3, 4, 2, dst, stride, above
214 mova m0, [aboveq] 214 mova m0, [aboveq]
215 mova m1, [aboveq+16] 215 mova m1, [aboveq+16]
216 DEFINE_ARGS dst, stride, stride3, nlines4 216 DEFINE_ARGS dst, stride, stride3, nlines4
217 lea stride3q, [strideq*3] 217 lea stride3q, [strideq*3]
218 mov nlines4d, 4 218 mov nlines4d, 4
219 .loop: 219 .loop:
220 mova [dstq ], m0 220 mova [dstq ], m0
221 mova [dstq +16], m1 221 mova [dstq +16], m1
222 mova [dstq+strideq*2 ], m0 222 mova [dstq+strideq*2 ], m0
223 mova [dstq+strideq*2 +16], m1 223 mova [dstq+strideq*2 +16], m1
224 mova [dstq+strideq*4 ], m0 224 mova [dstq+strideq*4 ], m0
225 mova [dstq+strideq*4 +16], m1 225 mova [dstq+strideq*4 +16], m1
226 mova [dstq+stride3q*2 ], m0 226 mova [dstq+stride3q*2 ], m0
227 mova [dstq+stride3q*2+16], m1 227 mova [dstq+stride3q*2+16], m1
228 lea dstq, [dstq+strideq*8] 228 lea dstq, [dstq+strideq*8]
229 dec nlines4d 229 dec nlines4d
230 jnz .loop 230 jnz .loop
231 REP_RET 231 REP_RET
232 232
233 INIT_XMM sse2 233 INIT_XMM sse2
234 cglobal high_v_predictor_32x32, 3, 4, 4, dst, stride, above 234 cglobal highbd_v_predictor_32x32, 3, 4, 4, dst, stride, above
235 mova m0, [aboveq] 235 mova m0, [aboveq]
236 mova m1, [aboveq+16] 236 mova m1, [aboveq+16]
237 mova m2, [aboveq+32] 237 mova m2, [aboveq+32]
238 mova m3, [aboveq+48] 238 mova m3, [aboveq+48]
239 DEFINE_ARGS dst, stride, stride3, nlines4 239 DEFINE_ARGS dst, stride, stride3, nlines4
240 lea stride3q, [strideq*3] 240 lea stride3q, [strideq*3]
241 mov nlines4d, 8 241 mov nlines4d, 8
242 .loop: 242 .loop:
243 mova [dstq ], m0 243 mova [dstq ], m0
244 mova [dstq +16], m1 244 mova [dstq +16], m1
(...skipping 10 matching lines...) Expand all
255 mova [dstq+stride3q*2 ], m0 255 mova [dstq+stride3q*2 ], m0
256 mova [dstq+stride3q*2 +16], m1 256 mova [dstq+stride3q*2 +16], m1
257 mova [dstq+stride3q*2 +32], m2 257 mova [dstq+stride3q*2 +32], m2
258 mova [dstq+stride3q*2 +48], m3 258 mova [dstq+stride3q*2 +48], m3
259 lea dstq, [dstq+strideq*8] 259 lea dstq, [dstq+strideq*8]
260 dec nlines4d 260 dec nlines4d
261 jnz .loop 261 jnz .loop
262 REP_RET 262 REP_RET
263 263
264 INIT_MMX sse 264 INIT_MMX sse
265 cglobal high_tm_predictor_4x4, 5, 6, 5, dst, stride, above, left, bps, one 265 cglobal highbd_tm_predictor_4x4, 5, 6, 5, dst, stride, above, left, bps, one
266 movd m1, [aboveq-2] 266 movd m1, [aboveq-2]
267 movq m0, [aboveq] 267 movq m0, [aboveq]
268 pshufw m1, m1, 0x0 268 pshufw m1, m1, 0x0
269 ; Get the values to compute the maximum value at this bit depth 269 ; Get the values to compute the maximum value at this bit depth
270 mov oned, 1 270 mov oned, 1
271 movd m3, oned 271 movd m3, oned
272 movd m4, bpsd 272 movd m4, bpsd
273 pshufw m3, m3, 0x0 273 pshufw m3, m3, 0x0
274 DEFINE_ARGS dst, stride, line, left 274 DEFINE_ARGS dst, stride, line, left
275 mov lineq, -2 275 mov lineq, -2
(...skipping 17 matching lines...) Expand all
293 pmaxsw m2, m4 293 pmaxsw m2, m4
294 ;Store the values 294 ;Store the values
295 movq [dstq ], m1 295 movq [dstq ], m1
296 movq [dstq+strideq*2], m2 296 movq [dstq+strideq*2], m2
297 lea dstq, [dstq+strideq*4] 297 lea dstq, [dstq+strideq*4]
298 inc lineq 298 inc lineq
299 jnz .loop 299 jnz .loop
300 REP_RET 300 REP_RET
301 301
302 INIT_XMM sse2 302 INIT_XMM sse2
303 cglobal high_tm_predictor_8x8, 5, 6, 5, dst, stride, above, left, bps, one 303 cglobal highbd_tm_predictor_8x8, 5, 6, 5, dst, stride, above, left, bps, one
304 movd m1, [aboveq-2] 304 movd m1, [aboveq-2]
305 mova m0, [aboveq] 305 mova m0, [aboveq]
306 pshuflw m1, m1, 0x0 306 pshuflw m1, m1, 0x0
307 ; Get the values to compute the maximum value at this bit depth 307 ; Get the values to compute the maximum value at this bit depth
308 mov oned, 1 308 mov oned, 1
309 pxor m3, m3 309 pxor m3, m3
310 pxor m4, m4 310 pxor m4, m4
311 pinsrw m3, oned, 0 311 pinsrw m3, oned, 0
312 pinsrw m4, bpsd, 0 312 pinsrw m4, bpsd, 0
313 pshuflw m3, m3, 0x0 313 pshuflw m3, m3, 0x0
(...skipping 24 matching lines...) Expand all
338 ;Store the values 338 ;Store the values
339 mova [dstq ], m1 339 mova [dstq ], m1
340 mova [dstq+strideq*2], m2 340 mova [dstq+strideq*2], m2
341 lea dstq, [dstq+strideq*4] 341 lea dstq, [dstq+strideq*4]
342 inc lineq 342 inc lineq
343 jnz .loop 343 jnz .loop
344 REP_RET 344 REP_RET
345 345
346 %if ARCH_X86_64 346 %if ARCH_X86_64
347 INIT_XMM sse2 347 INIT_XMM sse2
348 cglobal high_tm_predictor_16x16, 5, 6, 8, dst, stride, above, left, bps, one 348 cglobal highbd_tm_predictor_16x16, 5, 6, 8, dst, stride, above, left, bps, one
349 movd m2, [aboveq-2] 349 movd m2, [aboveq-2]
350 mova m0, [aboveq] 350 mova m0, [aboveq]
351 mova m1, [aboveq+16] 351 mova m1, [aboveq+16]
352 pshuflw m2, m2, 0x0 352 pshuflw m2, m2, 0x0
353 ; Get the values to compute the maximum value at this bit depth 353 ; Get the values to compute the maximum value at this bit depth
354 mov oned, 1 354 mov oned, 1
355 pxor m7, m7 355 pxor m7, m7
356 pxor m8, m8 356 pxor m8, m8
357 pinsrw m7, oned, 0 357 pinsrw m7, oned, 0
358 pinsrw m8, bpsd, 0 358 pinsrw m8, bpsd, 0
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
392 mova [dstq ], m4 392 mova [dstq ], m4
393 mova [dstq+strideq*2 ], m5 393 mova [dstq+strideq*2 ], m5
394 mova [dstq +16], m2 394 mova [dstq +16], m2
395 mova [dstq+strideq*2+16], m3 395 mova [dstq+strideq*2+16], m3
396 lea dstq, [dstq+strideq*4] 396 lea dstq, [dstq+strideq*4]
397 inc lineq 397 inc lineq
398 jnz .loop 398 jnz .loop
399 REP_RET 399 REP_RET
400 400
401 INIT_XMM sse2 401 INIT_XMM sse2
402 cglobal high_tm_predictor_32x32, 5, 6, 12, dst, stride, above, left, bps, one 402 cglobal highbd_tm_predictor_32x32, 5, 6, 12, dst, stride, above, left, bps, one
403 movd m0, [aboveq-2] 403 movd m0, [aboveq-2]
404 mova m1, [aboveq] 404 mova m1, [aboveq]
405 mova m2, [aboveq+16] 405 mova m2, [aboveq+16]
406 mova m3, [aboveq+32] 406 mova m3, [aboveq+32]
407 mova m4, [aboveq+48] 407 mova m4, [aboveq+48]
408 pshuflw m0, m0, 0x0 408 pshuflw m0, m0, 0x0
409 ; Get the values to compute the maximum value at this bit depth 409 ; Get the values to compute the maximum value at this bit depth
410 mov oned, 1 410 mov oned, 1
411 pxor m10, m10 411 pxor m10, m10
412 pxor m11, m11 412 pxor m11, m11
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after
467 ;Store these values 467 ;Store these values
468 mova [dstq+strideq*2 ], m7 468 mova [dstq+strideq*2 ], m7
469 mova [dstq+strideq*2+16], m8 469 mova [dstq+strideq*2+16], m8
470 mova [dstq+strideq*2+32], m9 470 mova [dstq+strideq*2+32], m9
471 mova [dstq+strideq*2+48], m6 471 mova [dstq+strideq*2+48], m6
472 lea dstq, [dstq+strideq*4] 472 lea dstq, [dstq+strideq*4]
473 inc lineq 473 inc lineq
474 jnz .loop 474 jnz .loop
475 REP_RET 475 REP_RET
476 %endif 476 %endif
OLDNEW
« no previous file with comments | « source/libvpx/vp9/common/x86/vp9_asm_stubs.c ('k') | source/libvpx/vp9/common/x86/vp9_high_loopfilter_intrin_sse2.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698