Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(324)

Side by Side Diff: source/libvpx/vp8/common/x86/variance_impl_mmx.asm

Issue 1154153005: Rename variance files (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master
Patch Set: Created 5 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « libvpx_srcs_x86_intrinsics.gypi ('k') | source/libvpx/vp8/common/x86/variance_mmx.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 ;
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 ;
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
9 ;
10
11
12 %include "vpx_ports/x86_abi_support.asm"
13
14 %define mmx_filter_shift 7
15
16 ;void vp8_filter_block2d_bil4x4_var_mmx
17 ;(
18 ; unsigned char *ref_ptr,
19 ; int ref_pixels_per_line,
20 ; unsigned char *src_ptr,
21 ; int src_pixels_per_line,
22 ; unsigned short *HFilter,
23 ; unsigned short *VFilter,
24 ; int *sum,
25 ; unsigned int *sumsquared
26 ;)
27 global sym(vp8_filter_block2d_bil4x4_var_mmx) PRIVATE
28 sym(vp8_filter_block2d_bil4x4_var_mmx):
29 push rbp
30 mov rbp, rsp
31 SHADOW_ARGS_TO_STACK 8
32 GET_GOT rbx
33 push rsi
34 push rdi
35 sub rsp, 16
36 ; end prolog
37
38
39 pxor mm6, mm6 ;
40 pxor mm7, mm7 ;
41
42 mov rax, arg(4) ;HFilter ;
43 mov rdx, arg(5) ;VFilter ;
44
45 mov rsi, arg(0) ;ref_ptr ;
46 mov rdi, arg(2) ;src_ptr ;
47
48 mov rcx, 4 ;
49 pxor mm0, mm0 ;
50
51 movd mm1, [rsi] ;
52 movd mm3, [rsi+1] ;
53
54 punpcklbw mm1, mm0 ;
55 pmullw mm1, [rax] ;
56
57 punpcklbw mm3, mm0 ;
58 pmullw mm3, [rax+8] ;
59
60 paddw mm1, mm3 ;
61 paddw mm1, [GLOBAL(mmx_bi_rd)] ;
62
63 psraw mm1, mmx_filter_shift ;
64 movq mm5, mm1
65
66 %if ABI_IS_32BIT
67 add rsi, dword ptr arg(1) ;ref_pixels_per_line ;
68 %else
69 movsxd r8, dword ptr arg(1) ;ref_pixels_per_line ;
70 add rsi, r8
71 %endif
72
73 .filter_block2d_bil4x4_var_mmx_loop:
74
75 movd mm1, [rsi] ;
76 movd mm3, [rsi+1] ;
77
78 punpcklbw mm1, mm0 ;
79 pmullw mm1, [rax] ;
80
81 punpcklbw mm3, mm0 ;
82 pmullw mm3, [rax+8] ;
83
84 paddw mm1, mm3 ;
85 paddw mm1, [GLOBAL(mmx_bi_rd)] ;
86
87 psraw mm1, mmx_filter_shift ;
88 movq mm3, mm5 ;
89
90 movq mm5, mm1 ;
91 pmullw mm3, [rdx] ;
92
93 pmullw mm1, [rdx+8] ;
94 paddw mm1, mm3 ;
95
96
97 paddw mm1, [GLOBAL(mmx_bi_rd)] ;
98 psraw mm1, mmx_filter_shift ;
99
100 movd mm3, [rdi] ;
101 punpcklbw mm3, mm0 ;
102
103 psubw mm1, mm3 ;
104 paddw mm6, mm1 ;
105
106 pmaddwd mm1, mm1 ;
107 paddd mm7, mm1 ;
108
109 %if ABI_IS_32BIT
110 add rsi, dword ptr arg(1) ;ref_pixels_per_line ;
111 add rdi, dword ptr arg(3) ;src_pixels_per_line ;
112 %else
113 movsxd r8, dword ptr arg(1) ;ref_pixels_per_line
114 movsxd r9, dword ptr arg(3) ;src_pixels_per_line
115 add rsi, r8
116 add rdi, r9
117 %endif
118 sub rcx, 1 ;
119 jnz .filter_block2d_bil4x4_var_mmx_loop ;
120
121
122 pxor mm3, mm3 ;
123 pxor mm2, mm2 ;
124
125 punpcklwd mm2, mm6 ;
126 punpckhwd mm3, mm6 ;
127
128 paddd mm2, mm3 ;
129 movq mm6, mm2 ;
130
131 psrlq mm6, 32 ;
132 paddd mm2, mm6 ;
133
134 psrad mm2, 16 ;
135 movq mm4, mm7 ;
136
137 psrlq mm4, 32 ;
138 paddd mm4, mm7 ;
139
140 mov rdi, arg(6) ;sum
141 mov rsi, arg(7) ;sumsquared
142
143 movd dword ptr [rdi], mm2 ;
144 movd dword ptr [rsi], mm4 ;
145
146
147
148 ; begin epilog
149 add rsp, 16
150 pop rdi
151 pop rsi
152 RESTORE_GOT
153 UNSHADOW_ARGS
154 pop rbp
155 ret
156
157
158
159
160 ;void vp8_filter_block2d_bil_var_mmx
161 ;(
162 ; unsigned char *ref_ptr,
163 ; int ref_pixels_per_line,
164 ; unsigned char *src_ptr,
165 ; int src_pixels_per_line,
166 ; unsigned int Height,
167 ; unsigned short *HFilter,
168 ; unsigned short *VFilter,
169 ; int *sum,
170 ; unsigned int *sumsquared
171 ;)
172 global sym(vp8_filter_block2d_bil_var_mmx) PRIVATE
173 sym(vp8_filter_block2d_bil_var_mmx):
174 push rbp
175 mov rbp, rsp
176 SHADOW_ARGS_TO_STACK 9
177 GET_GOT rbx
178 push rsi
179 push rdi
180 sub rsp, 16
181 ; end prolog
182
183 pxor mm6, mm6 ;
184 pxor mm7, mm7 ;
185 mov rax, arg(5) ;HFilter ;
186
187 mov rdx, arg(6) ;VFilter ;
188 mov rsi, arg(0) ;ref_ptr ;
189
190 mov rdi, arg(2) ;src_ptr ;
191 movsxd rcx, dword ptr arg(4) ;Height ;
192
193 pxor mm0, mm0 ;
194 movq mm1, [rsi] ;
195
196 movq mm3, [rsi+1] ;
197 movq mm2, mm1 ;
198
199 movq mm4, mm3 ;
200 punpcklbw mm1, mm0 ;
201
202 punpckhbw mm2, mm0 ;
203 pmullw mm1, [rax] ;
204
205 pmullw mm2, [rax] ;
206 punpcklbw mm3, mm0 ;
207
208 punpckhbw mm4, mm0 ;
209 pmullw mm3, [rax+8] ;
210
211 pmullw mm4, [rax+8] ;
212 paddw mm1, mm3 ;
213
214 paddw mm2, mm4 ;
215 paddw mm1, [GLOBAL(mmx_bi_rd)] ;
216
217 psraw mm1, mmx_filter_shift ;
218 paddw mm2, [GLOBAL(mmx_bi_rd)] ;
219
220 psraw mm2, mmx_filter_shift ;
221 movq mm5, mm1
222
223 packuswb mm5, mm2 ;
224 %if ABI_IS_32BIT
225 add rsi, dword ptr arg(1) ;ref_pixels_per_line
226 %else
227 movsxd r8, dword ptr arg(1) ;ref_pixels_per_line
228 add rsi, r8
229 %endif
230
231 .filter_block2d_bil_var_mmx_loop:
232
233 movq mm1, [rsi] ;
234 movq mm3, [rsi+1] ;
235
236 movq mm2, mm1 ;
237 movq mm4, mm3 ;
238
239 punpcklbw mm1, mm0 ;
240 punpckhbw mm2, mm0 ;
241
242 pmullw mm1, [rax] ;
243 pmullw mm2, [rax] ;
244
245 punpcklbw mm3, mm0 ;
246 punpckhbw mm4, mm0 ;
247
248 pmullw mm3, [rax+8] ;
249 pmullw mm4, [rax+8] ;
250
251 paddw mm1, mm3 ;
252 paddw mm2, mm4 ;
253
254 paddw mm1, [GLOBAL(mmx_bi_rd)] ;
255 psraw mm1, mmx_filter_shift ;
256
257 paddw mm2, [GLOBAL(mmx_bi_rd)] ;
258 psraw mm2, mmx_filter_shift ;
259
260 movq mm3, mm5 ;
261 movq mm4, mm5 ;
262
263 punpcklbw mm3, mm0 ;
264 punpckhbw mm4, mm0 ;
265
266 movq mm5, mm1 ;
267 packuswb mm5, mm2 ;
268
269 pmullw mm3, [rdx] ;
270 pmullw mm4, [rdx] ;
271
272 pmullw mm1, [rdx+8] ;
273 pmullw mm2, [rdx+8] ;
274
275 paddw mm1, mm3 ;
276 paddw mm2, mm4 ;
277
278 paddw mm1, [GLOBAL(mmx_bi_rd)] ;
279 paddw mm2, [GLOBAL(mmx_bi_rd)] ;
280
281 psraw mm1, mmx_filter_shift ;
282 psraw mm2, mmx_filter_shift ;
283
284 movq mm3, [rdi] ;
285 movq mm4, mm3 ;
286
287 punpcklbw mm3, mm0 ;
288 punpckhbw mm4, mm0 ;
289
290 psubw mm1, mm3 ;
291 psubw mm2, mm4 ;
292
293 paddw mm6, mm1 ;
294 pmaddwd mm1, mm1 ;
295
296 paddw mm6, mm2 ;
297 pmaddwd mm2, mm2 ;
298
299 paddd mm7, mm1 ;
300 paddd mm7, mm2 ;
301
302 %if ABI_IS_32BIT
303 add rsi, dword ptr arg(1) ;ref_pixels_per_line ;
304 add rdi, dword ptr arg(3) ;src_pixels_per_line ;
305 %else
306 movsxd r8, dword ptr arg(1) ;ref_pixels_per_line ;
307 movsxd r9, dword ptr arg(3) ;src_pixels_per_line ;
308 add rsi, r8
309 add rdi, r9
310 %endif
311 sub rcx, 1 ;
312 jnz .filter_block2d_bil_var_mmx_loop ;
313
314
315 pxor mm3, mm3 ;
316 pxor mm2, mm2 ;
317
318 punpcklwd mm2, mm6 ;
319 punpckhwd mm3, mm6 ;
320
321 paddd mm2, mm3 ;
322 movq mm6, mm2 ;
323
324 psrlq mm6, 32 ;
325 paddd mm2, mm6 ;
326
327 psrad mm2, 16 ;
328 movq mm4, mm7 ;
329
330 psrlq mm4, 32 ;
331 paddd mm4, mm7 ;
332
333 mov rdi, arg(7) ;sum
334 mov rsi, arg(8) ;sumsquared
335
336 movd dword ptr [rdi], mm2 ;
337 movd dword ptr [rsi], mm4 ;
338
339 ; begin epilog
340 add rsp, 16
341 pop rdi
342 pop rsi
343 RESTORE_GOT
344 UNSHADOW_ARGS
345 pop rbp
346 ret
347
348
349 SECTION_RODATA
350 ;short mmx_bi_rd[4] = { 64, 64, 64, 64};
351 align 16
352 mmx_bi_rd:
353 times 4 dw 64
OLDNEW
« no previous file with comments | « libvpx_srcs_x86_intrinsics.gypi ('k') | source/libvpx/vp8/common/x86/variance_mmx.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698