Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(675)

Side by Side Diff: source/libvpx/vp9/common/x86/vp9_subpixel_bilinear_ssse3.asm

Issue 168343002: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/
Patch Set: libvpx: Pull from upstream Created 6 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 ;
2 ; Copyright (c) 2014 The WebM project authors. All Rights Reserved.
3 ;
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
9 ;
10
11 %include "vpx_ports/x86_abi_support.asm"
12
13 %macro GET_PARAM_4 0
14 mov rdx, arg(5) ;filter ptr
15 mov rsi, arg(0) ;src_ptr
16 mov rdi, arg(2) ;output_ptr
17 mov rcx, 0x0400040
18
19 movdqa xmm3, [rdx] ;load filters
20 psrldq xmm3, 6
21 packsswb xmm3, xmm3
22 pshuflw xmm3, xmm3, 0b ;k3_k4
23
24 movq xmm2, rcx ;rounding
25 pshufd xmm2, xmm2, 0
26
27 movsxd rax, DWORD PTR arg(1) ;pixels_per_line
28 movsxd rdx, DWORD PTR arg(3) ;out_pitch
29 movsxd rcx, DWORD PTR arg(4) ;output_height
30 %endm
31
32 %macro APPLY_FILTER_4 1
33 punpcklbw xmm0, xmm1
34 pmaddubsw xmm0, xmm3
35
36 paddsw xmm0, xmm2 ;rounding
37 psraw xmm0, 7 ;shift
38 packuswb xmm0, xmm0 ;pack to byte
39
40 %if %1
41 movd xmm1, [rdi]
42 pavgb xmm0, xmm1
43 %endif
44 movd [rdi], xmm0
45 lea rsi, [rsi + rax]
46 lea rdi, [rdi + rdx]
47 dec rcx
48 %endm
49
50 %macro GET_PARAM 0
51 mov rdx, arg(5) ;filter ptr
52 mov rsi, arg(0) ;src_ptr
53 mov rdi, arg(2) ;output_ptr
54 mov rcx, 0x0400040
55
56 movdqa xmm7, [rdx] ;load filters
57 psrldq xmm7, 6
58 packsswb xmm7, xmm7
59 pshuflw xmm7, xmm7, 0b ;k3_k4
60 punpcklwd xmm7, xmm7
61
62 movq xmm6, rcx ;rounding
63 pshufd xmm6, xmm6, 0
64
65 movsxd rax, DWORD PTR arg(1) ;pixels_per_line
66 movsxd rdx, DWORD PTR arg(3) ;out_pitch
67 movsxd rcx, DWORD PTR arg(4) ;output_height
68 %endm
69
70 %macro APPLY_FILTER_8 1
71 punpcklbw xmm0, xmm1
72 pmaddubsw xmm0, xmm7
73
74 paddsw xmm0, xmm6 ;rounding
75 psraw xmm0, 7 ;shift
76 packuswb xmm0, xmm0 ;pack back to byte
77
78 %if %1
79 movq xmm1, [rdi]
80 pavgb xmm0, xmm1
81 %endif
82 movq [rdi], xmm0 ;store the result
83
84 lea rsi, [rsi + rax]
85 lea rdi, [rdi + rdx]
86 dec rcx
87 %endm
88
89 %macro APPLY_FILTER_16 1
90 punpcklbw xmm0, xmm1
91 punpckhbw xmm2, xmm1
92 pmaddubsw xmm0, xmm7
93 pmaddubsw xmm2, xmm7
94
95 paddsw xmm0, xmm6 ;rounding
96 paddsw xmm2, xmm6
97 psraw xmm0, 7 ;shift
98 psraw xmm2, 7
99 packuswb xmm0, xmm2 ;pack back to byte
100
101 %if %1
102 movdqu xmm1, [rdi]
103 pavgb xmm0, xmm1
104 %endif
105 movdqu [rdi], xmm0 ;store the result
106
107 lea rsi, [rsi + rax]
108 lea rdi, [rdi + rdx]
109 dec rcx
110 %endm
111
112 global sym(vp9_filter_block1d4_v2_ssse3) PRIVATE
113 sym(vp9_filter_block1d4_v2_ssse3):
114 push rbp
115 mov rbp, rsp
116 SHADOW_ARGS_TO_STACK 6
117 push rsi
118 push rdi
119 ; end prolog
120
121 GET_PARAM_4
122 .loop:
123 movd xmm0, [rsi] ;load src
124 movd xmm1, [rsi + rax]
125
126 APPLY_FILTER_4 0
127 jnz .loop
128
129 ; begin epilog
130 pop rdi
131 pop rsi
132 UNSHADOW_ARGS
133 pop rbp
134 ret
135
136 global sym(vp9_filter_block1d8_v2_ssse3) PRIVATE
137 sym(vp9_filter_block1d8_v2_ssse3):
138 push rbp
139 mov rbp, rsp
140 SHADOW_ARGS_TO_STACK 6
141 SAVE_XMM 7
142 push rsi
143 push rdi
144 ; end prolog
145
146 GET_PARAM
147 .loop:
148 movq xmm0, [rsi] ;0
149 movq xmm1, [rsi + rax] ;1
150
151 APPLY_FILTER_8 0
152 jnz .loop
153
154 ; begin epilog
155 pop rdi
156 pop rsi
157 RESTORE_XMM
158 UNSHADOW_ARGS
159 pop rbp
160 ret
161
162 global sym(vp9_filter_block1d16_v2_ssse3) PRIVATE
163 sym(vp9_filter_block1d16_v2_ssse3):
164 push rbp
165 mov rbp, rsp
166 SHADOW_ARGS_TO_STACK 6
167 SAVE_XMM 7
168 push rsi
169 push rdi
170 ; end prolog
171
172 GET_PARAM
173 .loop:
174 movdqu xmm0, [rsi] ;0
175 movdqu xmm1, [rsi + rax] ;1
176 movdqa xmm2, xmm0
177
178 APPLY_FILTER_16 0
179 jnz .loop
180
181 ; begin epilog
182 pop rdi
183 pop rsi
184 RESTORE_XMM
185 UNSHADOW_ARGS
186 pop rbp
187 ret
188
189 global sym(vp9_filter_block1d4_v2_avg_ssse3) PRIVATE
190 sym(vp9_filter_block1d4_v2_avg_ssse3):
191 push rbp
192 mov rbp, rsp
193 SHADOW_ARGS_TO_STACK 6
194 push rsi
195 push rdi
196 ; end prolog
197
198 GET_PARAM_4
199 .loop:
200 movd xmm0, [rsi] ;load src
201 movd xmm1, [rsi + rax]
202
203 APPLY_FILTER_4 1
204 jnz .loop
205
206 ; begin epilog
207 pop rdi
208 pop rsi
209 UNSHADOW_ARGS
210 pop rbp
211 ret
212
213 global sym(vp9_filter_block1d8_v2_avg_ssse3) PRIVATE
214 sym(vp9_filter_block1d8_v2_avg_ssse3):
215 push rbp
216 mov rbp, rsp
217 SHADOW_ARGS_TO_STACK 6
218 SAVE_XMM 7
219 push rsi
220 push rdi
221 ; end prolog
222
223 GET_PARAM
224 .loop:
225 movq xmm0, [rsi] ;0
226 movq xmm1, [rsi + rax] ;1
227
228 APPLY_FILTER_8 1
229 jnz .loop
230
231 ; begin epilog
232 pop rdi
233 pop rsi
234 RESTORE_XMM
235 UNSHADOW_ARGS
236 pop rbp
237 ret
238
239 global sym(vp9_filter_block1d16_v2_avg_ssse3) PRIVATE
240 sym(vp9_filter_block1d16_v2_avg_ssse3):
241 push rbp
242 mov rbp, rsp
243 SHADOW_ARGS_TO_STACK 6
244 SAVE_XMM 7
245 push rsi
246 push rdi
247 ; end prolog
248
249 GET_PARAM
250 .loop:
251 movdqu xmm0, [rsi] ;0
252 movdqu xmm1, [rsi + rax] ;1
253 movdqa xmm2, xmm0
254
255 APPLY_FILTER_16 1
256 jnz .loop
257
258 ; begin epilog
259 pop rdi
260 pop rsi
261 RESTORE_XMM
262 UNSHADOW_ARGS
263 pop rbp
264 ret
265
266 global sym(vp9_filter_block1d4_h2_ssse3) PRIVATE
267 sym(vp9_filter_block1d4_h2_ssse3):
268 push rbp
269 mov rbp, rsp
270 SHADOW_ARGS_TO_STACK 6
271 push rsi
272 push rdi
273 ; end prolog
274
275 GET_PARAM_4
276 .loop:
277 movdqu xmm0, [rsi] ;load src
278 movdqa xmm1, xmm0
279 psrldq xmm1, 1
280
281 APPLY_FILTER_4 0
282 jnz .loop
283
284 ; begin epilog
285 pop rdi
286 pop rsi
287 UNSHADOW_ARGS
288 pop rbp
289 ret
290
291 global sym(vp9_filter_block1d8_h2_ssse3) PRIVATE
292 sym(vp9_filter_block1d8_h2_ssse3):
293 push rbp
294 mov rbp, rsp
295 SHADOW_ARGS_TO_STACK 6
296 SAVE_XMM 7
297 push rsi
298 push rdi
299 ; end prolog
300
301 GET_PARAM
302 .loop:
303 movdqu xmm0, [rsi] ;load src
304 movdqa xmm1, xmm0
305 psrldq xmm1, 1
306
307 APPLY_FILTER_8 0
308 jnz .loop
309
310 ; begin epilog
311 pop rdi
312 pop rsi
313 RESTORE_XMM
314 UNSHADOW_ARGS
315 pop rbp
316 ret
317
318 global sym(vp9_filter_block1d16_h2_ssse3) PRIVATE
319 sym(vp9_filter_block1d16_h2_ssse3):
320 push rbp
321 mov rbp, rsp
322 SHADOW_ARGS_TO_STACK 6
323 SAVE_XMM 7
324 push rsi
325 push rdi
326 ; end prolog
327
328 GET_PARAM
329 .loop:
330 movdqu xmm0, [rsi] ;load src
331 movdqu xmm1, [rsi + 1]
332 movdqa xmm2, xmm0
333
334 APPLY_FILTER_16 0
335 jnz .loop
336
337 ; begin epilog
338 pop rdi
339 pop rsi
340 RESTORE_XMM
341 UNSHADOW_ARGS
342 pop rbp
343 ret
344
345 global sym(vp9_filter_block1d4_h2_avg_ssse3) PRIVATE
346 sym(vp9_filter_block1d4_h2_avg_ssse3):
347 push rbp
348 mov rbp, rsp
349 SHADOW_ARGS_TO_STACK 6
350 push rsi
351 push rdi
352 ; end prolog
353
354 GET_PARAM_4
355 .loop:
356 movdqu xmm0, [rsi] ;load src
357 movdqa xmm1, xmm0
358 psrldq xmm1, 1
359
360 APPLY_FILTER_4 1
361 jnz .loop
362
363 ; begin epilog
364 pop rdi
365 pop rsi
366 UNSHADOW_ARGS
367 pop rbp
368 ret
369
370 global sym(vp9_filter_block1d8_h2_avg_ssse3) PRIVATE
371 sym(vp9_filter_block1d8_h2_avg_ssse3):
372 push rbp
373 mov rbp, rsp
374 SHADOW_ARGS_TO_STACK 6
375 SAVE_XMM 7
376 push rsi
377 push rdi
378 ; end prolog
379
380 GET_PARAM
381 .loop:
382 movdqu xmm0, [rsi] ;load src
383 movdqa xmm1, xmm0
384 psrldq xmm1, 1
385
386 APPLY_FILTER_8 1
387 jnz .loop
388
389 ; begin epilog
390 pop rdi
391 pop rsi
392 RESTORE_XMM
393 UNSHADOW_ARGS
394 pop rbp
395 ret
396
397 global sym(vp9_filter_block1d16_h2_avg_ssse3) PRIVATE
398 sym(vp9_filter_block1d16_h2_avg_ssse3):
399 push rbp
400 mov rbp, rsp
401 SHADOW_ARGS_TO_STACK 6
402 SAVE_XMM 7
403 push rsi
404 push rdi
405 ; end prolog
406
407 GET_PARAM
408 .loop:
409 movdqu xmm0, [rsi] ;load src
410 movdqu xmm1, [rsi + 1]
411 movdqa xmm2, xmm0
412
413 APPLY_FILTER_16 1
414 jnz .loop
415
416 ; begin epilog
417 pop rdi
418 pop rsi
419 RESTORE_XMM
420 UNSHADOW_ARGS
421 pop rbp
422 ret
OLDNEW
« no previous file with comments | « source/libvpx/vp9/common/x86/vp9_subpixel_bilinear_sse2.asm ('k') | source/libvpx/vp9/decoder/vp9_decodeframe.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698