Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(455)

Side by Side Diff: source/libvpx/vp9/encoder/x86/vp9_sad_sse3.asm

Issue 1124333011: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master
Patch Set: only update to last nights LKGR Created 5 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 ;
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 ;
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
9 ;
10
11 %include "vpx_ports/x86_abi_support.asm"
12
13 %macro STACK_FRAME_CREATE_X3 0
14 %if ABI_IS_32BIT
15 %define src_ptr rsi
16 %define src_stride rax
17 %define ref_ptr rdi
18 %define ref_stride rdx
19 %define end_ptr rcx
20 %define ret_var rbx
21 %define result_ptr arg(4)
22 %define max_err arg(4)
23 %define height dword ptr arg(4)
24 push rbp
25 mov rbp, rsp
26 push rsi
27 push rdi
28 push rbx
29
30 mov rsi, arg(0) ; src_ptr
31 mov rdi, arg(2) ; ref_ptr
32
33 movsxd rax, dword ptr arg(1) ; src_stride
34 movsxd rdx, dword ptr arg(3) ; ref_stride
35 %else
36 %if LIBVPX_YASM_WIN64
37 SAVE_XMM 7, u
38 %define src_ptr rcx
39 %define src_stride rdx
40 %define ref_ptr r8
41 %define ref_stride r9
42 %define end_ptr r10
43 %define ret_var r11
44 %define result_ptr [rsp+xmm_stack_space+8+4*8]
45 %define max_err [rsp+xmm_stack_space+8+4*8]
46 %define height dword ptr [rsp+xmm_stack_space+8+4*8]
47 %else
48 %define src_ptr rdi
49 %define src_stride rsi
50 %define ref_ptr rdx
51 %define ref_stride rcx
52 %define end_ptr r9
53 %define ret_var r10
54 %define result_ptr r8
55 %define max_err r8
56 %define height r8
57 %endif
58 %endif
59
60 %endmacro
61
62 %macro STACK_FRAME_DESTROY_X3 0
63 %define src_ptr
64 %define src_stride
65 %define ref_ptr
66 %define ref_stride
67 %define end_ptr
68 %define ret_var
69 %define result_ptr
70 %define max_err
71 %define height
72
73 %if ABI_IS_32BIT
74 pop rbx
75 pop rdi
76 pop rsi
77 pop rbp
78 %else
79 %if LIBVPX_YASM_WIN64
80 RESTORE_XMM
81 %endif
82 %endif
83 ret
84 %endmacro
85
86 %macro PROCESS_16X2X3 5
87 %if %1==0
88 movdqa xmm0, XMMWORD PTR [%2]
89 lddqu xmm5, XMMWORD PTR [%3]
90 lddqu xmm6, XMMWORD PTR [%3+1]
91 lddqu xmm7, XMMWORD PTR [%3+2]
92
93 psadbw xmm5, xmm0
94 psadbw xmm6, xmm0
95 psadbw xmm7, xmm0
96 %else
97 movdqa xmm0, XMMWORD PTR [%2]
98 lddqu xmm1, XMMWORD PTR [%3]
99 lddqu xmm2, XMMWORD PTR [%3+1]
100 lddqu xmm3, XMMWORD PTR [%3+2]
101
102 psadbw xmm1, xmm0
103 psadbw xmm2, xmm0
104 psadbw xmm3, xmm0
105
106 paddw xmm5, xmm1
107 paddw xmm6, xmm2
108 paddw xmm7, xmm3
109 %endif
110 movdqa xmm0, XMMWORD PTR [%2+%4]
111 lddqu xmm1, XMMWORD PTR [%3+%5]
112 lddqu xmm2, XMMWORD PTR [%3+%5+1]
113 lddqu xmm3, XMMWORD PTR [%3+%5+2]
114
115 %if %1==0 || %1==1
116 lea %2, [%2+%4*2]
117 lea %3, [%3+%5*2]
118 %endif
119
120 psadbw xmm1, xmm0
121 psadbw xmm2, xmm0
122 psadbw xmm3, xmm0
123
124 paddw xmm5, xmm1
125 paddw xmm6, xmm2
126 paddw xmm7, xmm3
127 %endmacro
128
129 %macro PROCESS_8X2X3 5
130 %if %1==0
131 movq mm0, QWORD PTR [%2]
132 movq mm5, QWORD PTR [%3]
133 movq mm6, QWORD PTR [%3+1]
134 movq mm7, QWORD PTR [%3+2]
135
136 psadbw mm5, mm0
137 psadbw mm6, mm0
138 psadbw mm7, mm0
139 %else
140 movq mm0, QWORD PTR [%2]
141 movq mm1, QWORD PTR [%3]
142 movq mm2, QWORD PTR [%3+1]
143 movq mm3, QWORD PTR [%3+2]
144
145 psadbw mm1, mm0
146 psadbw mm2, mm0
147 psadbw mm3, mm0
148
149 paddw mm5, mm1
150 paddw mm6, mm2
151 paddw mm7, mm3
152 %endif
153 movq mm0, QWORD PTR [%2+%4]
154 movq mm1, QWORD PTR [%3+%5]
155 movq mm2, QWORD PTR [%3+%5+1]
156 movq mm3, QWORD PTR [%3+%5+2]
157
158 %if %1==0 || %1==1
159 lea %2, [%2+%4*2]
160 lea %3, [%3+%5*2]
161 %endif
162
163 psadbw mm1, mm0
164 psadbw mm2, mm0
165 psadbw mm3, mm0
166
167 paddw mm5, mm1
168 paddw mm6, mm2
169 paddw mm7, mm3
170 %endmacro
171
172 ;void int vp9_sad16x16x3_sse3(
173 ; unsigned char *src_ptr,
174 ; int src_stride,
175 ; unsigned char *ref_ptr,
176 ; int ref_stride,
177 ; int *results)
178 global sym(vp9_sad16x16x3_sse3) PRIVATE
179 sym(vp9_sad16x16x3_sse3):
180
181 STACK_FRAME_CREATE_X3
182
183 PROCESS_16X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride
184 PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
185 PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
186 PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
187 PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
188 PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
189 PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
190 PROCESS_16X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride
191
192 mov rcx, result_ptr
193
194 movq xmm0, xmm5
195 psrldq xmm5, 8
196
197 paddw xmm0, xmm5
198 movd [rcx], xmm0
199 ;-
200 movq xmm0, xmm6
201 psrldq xmm6, 8
202
203 paddw xmm0, xmm6
204 movd [rcx+4], xmm0
205 ;-
206 movq xmm0, xmm7
207 psrldq xmm7, 8
208
209 paddw xmm0, xmm7
210 movd [rcx+8], xmm0
211
212 STACK_FRAME_DESTROY_X3
213
214 ;void int vp9_sad16x8x3_sse3(
215 ; unsigned char *src_ptr,
216 ; int src_stride,
217 ; unsigned char *ref_ptr,
218 ; int ref_stride,
219 ; int *results)
220 global sym(vp9_sad16x8x3_sse3) PRIVATE
221 sym(vp9_sad16x8x3_sse3):
222
223 STACK_FRAME_CREATE_X3
224
225 PROCESS_16X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride
226 PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
227 PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
228 PROCESS_16X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride
229
230 mov rcx, result_ptr
231
232 movq xmm0, xmm5
233 psrldq xmm5, 8
234
235 paddw xmm0, xmm5
236 movd [rcx], xmm0
237 ;-
238 movq xmm0, xmm6
239 psrldq xmm6, 8
240
241 paddw xmm0, xmm6
242 movd [rcx+4], xmm0
243 ;-
244 movq xmm0, xmm7
245 psrldq xmm7, 8
246
247 paddw xmm0, xmm7
248 movd [rcx+8], xmm0
249
250 STACK_FRAME_DESTROY_X3
251
252 ;void int vp9_sad8x16x3_sse3(
253 ; unsigned char *src_ptr,
254 ; int src_stride,
255 ; unsigned char *ref_ptr,
256 ; int ref_stride,
257 ; int *results)
258 global sym(vp9_sad8x16x3_sse3) PRIVATE
259 sym(vp9_sad8x16x3_sse3):
260
261 STACK_FRAME_CREATE_X3
262
263 PROCESS_8X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride
264 PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
265 PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
266 PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
267 PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
268 PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
269 PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
270 PROCESS_8X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride
271
272 mov rcx, result_ptr
273
274 punpckldq mm5, mm6
275
276 movq [rcx], mm5
277 movd [rcx+8], mm7
278
279 STACK_FRAME_DESTROY_X3
280
281 ;void int vp9_sad8x8x3_sse3(
282 ; unsigned char *src_ptr,
283 ; int src_stride,
284 ; unsigned char *ref_ptr,
285 ; int ref_stride,
286 ; int *results)
287 global sym(vp9_sad8x8x3_sse3) PRIVATE
288 sym(vp9_sad8x8x3_sse3):
289
290 STACK_FRAME_CREATE_X3
291
292 PROCESS_8X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride
293 PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
294 PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
295 PROCESS_8X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride
296
297 mov rcx, result_ptr
298
299 punpckldq mm5, mm6
300
301 movq [rcx], mm5
302 movd [rcx+8], mm7
303
304 STACK_FRAME_DESTROY_X3
305
306 ;void int vp9_sad4x4x3_sse3(
307 ; unsigned char *src_ptr,
308 ; int src_stride,
309 ; unsigned char *ref_ptr,
310 ; int ref_stride,
311 ; int *results)
312 global sym(vp9_sad4x4x3_sse3) PRIVATE
313 sym(vp9_sad4x4x3_sse3):
314
315 STACK_FRAME_CREATE_X3
316
317 movd mm0, DWORD PTR [src_ptr]
318 movd mm1, DWORD PTR [ref_ptr]
319
320 movd mm2, DWORD PTR [src_ptr+src_stride]
321 movd mm3, DWORD PTR [ref_ptr+ref_stride]
322
323 punpcklbw mm0, mm2
324 punpcklbw mm1, mm3
325
326 movd mm4, DWORD PTR [ref_ptr+1]
327 movd mm5, DWORD PTR [ref_ptr+2]
328
329 movd mm2, DWORD PTR [ref_ptr+ref_stride+1]
330 movd mm3, DWORD PTR [ref_ptr+ref_stride+2]
331
332 psadbw mm1, mm0
333
334 punpcklbw mm4, mm2
335 punpcklbw mm5, mm3
336
337 psadbw mm4, mm0
338 psadbw mm5, mm0
339
340 lea src_ptr, [src_ptr+src_stride*2]
341 lea ref_ptr, [ref_ptr+ref_stride*2]
342
343 movd mm0, DWORD PTR [src_ptr]
344 movd mm2, DWORD PTR [ref_ptr]
345
346 movd mm3, DWORD PTR [src_ptr+src_stride]
347 movd mm6, DWORD PTR [ref_ptr+ref_stride]
348
349 punpcklbw mm0, mm3
350 punpcklbw mm2, mm6
351
352 movd mm3, DWORD PTR [ref_ptr+1]
353 movd mm7, DWORD PTR [ref_ptr+2]
354
355 psadbw mm2, mm0
356
357 paddw mm1, mm2
358
359 movd mm2, DWORD PTR [ref_ptr+ref_stride+1]
360 movd mm6, DWORD PTR [ref_ptr+ref_stride+2]
361
362 punpcklbw mm3, mm2
363 punpcklbw mm7, mm6
364
365 psadbw mm3, mm0
366 psadbw mm7, mm0
367
368 paddw mm3, mm4
369 paddw mm7, mm5
370
371 mov rcx, result_ptr
372
373 punpckldq mm1, mm3
374
375 movq [rcx], mm1
376 movd [rcx+8], mm7
377
378 STACK_FRAME_DESTROY_X3
OLDNEW
« no previous file with comments | « source/libvpx/vp9/encoder/x86/vp9_sad_sse2.asm ('k') | source/libvpx/vp9/encoder/x86/vp9_sad_sse4.asm » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698