Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(8)

Side by Side Diff: source/libvpx/vp9/encoder/x86/vp9_sad_sse4.asm

Issue 1124333011: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master
Patch Set: only update to last nights LKGR Created 5 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 ;
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 ;
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
9 ;
10
11
12 %include "vpx_ports/x86_abi_support.asm"
13
14 %macro PROCESS_16X2X8 1
15 %if %1
16 movdqa xmm0, XMMWORD PTR [rsi]
17 movq xmm1, MMWORD PTR [rdi]
18 movq xmm3, MMWORD PTR [rdi+8]
19 movq xmm2, MMWORD PTR [rdi+16]
20 punpcklqdq xmm1, xmm3
21 punpcklqdq xmm3, xmm2
22
23 movdqa xmm2, xmm1
24 mpsadbw xmm1, xmm0, 0x0
25 mpsadbw xmm2, xmm0, 0x5
26
27 psrldq xmm0, 8
28
29 movdqa xmm4, xmm3
30 mpsadbw xmm3, xmm0, 0x0
31 mpsadbw xmm4, xmm0, 0x5
32
33 paddw xmm1, xmm2
34 paddw xmm1, xmm3
35 paddw xmm1, xmm4
36 %else
37 movdqa xmm0, XMMWORD PTR [rsi]
38 movq xmm5, MMWORD PTR [rdi]
39 movq xmm3, MMWORD PTR [rdi+8]
40 movq xmm2, MMWORD PTR [rdi+16]
41 punpcklqdq xmm5, xmm3
42 punpcklqdq xmm3, xmm2
43
44 movdqa xmm2, xmm5
45 mpsadbw xmm5, xmm0, 0x0
46 mpsadbw xmm2, xmm0, 0x5
47
48 psrldq xmm0, 8
49
50 movdqa xmm4, xmm3
51 mpsadbw xmm3, xmm0, 0x0
52 mpsadbw xmm4, xmm0, 0x5
53
54 paddw xmm5, xmm2
55 paddw xmm5, xmm3
56 paddw xmm5, xmm4
57
58 paddw xmm1, xmm5
59 %endif
60 movdqa xmm0, XMMWORD PTR [rsi + rax]
61 movq xmm5, MMWORD PTR [rdi+ rdx]
62 movq xmm3, MMWORD PTR [rdi+ rdx+8]
63 movq xmm2, MMWORD PTR [rdi+ rdx+16]
64 punpcklqdq xmm5, xmm3
65 punpcklqdq xmm3, xmm2
66
67 lea rsi, [rsi+rax*2]
68 lea rdi, [rdi+rdx*2]
69
70 movdqa xmm2, xmm5
71 mpsadbw xmm5, xmm0, 0x0
72 mpsadbw xmm2, xmm0, 0x5
73
74 psrldq xmm0, 8
75 movdqa xmm4, xmm3
76 mpsadbw xmm3, xmm0, 0x0
77 mpsadbw xmm4, xmm0, 0x5
78
79 paddw xmm5, xmm2
80 paddw xmm5, xmm3
81 paddw xmm5, xmm4
82
83 paddw xmm1, xmm5
84 %endmacro
85
86 %macro PROCESS_8X2X8 1
87 %if %1
88 movq xmm0, MMWORD PTR [rsi]
89 movq xmm1, MMWORD PTR [rdi]
90 movq xmm3, MMWORD PTR [rdi+8]
91 punpcklqdq xmm1, xmm3
92
93 movdqa xmm2, xmm1
94 mpsadbw xmm1, xmm0, 0x0
95 mpsadbw xmm2, xmm0, 0x5
96 paddw xmm1, xmm2
97 %else
98 movq xmm0, MMWORD PTR [rsi]
99 movq xmm5, MMWORD PTR [rdi]
100 movq xmm3, MMWORD PTR [rdi+8]
101 punpcklqdq xmm5, xmm3
102
103 movdqa xmm2, xmm5
104 mpsadbw xmm5, xmm0, 0x0
105 mpsadbw xmm2, xmm0, 0x5
106 paddw xmm5, xmm2
107
108 paddw xmm1, xmm5
109 %endif
110 movq xmm0, MMWORD PTR [rsi + rax]
111 movq xmm5, MMWORD PTR [rdi+ rdx]
112 movq xmm3, MMWORD PTR [rdi+ rdx+8]
113 punpcklqdq xmm5, xmm3
114
115 lea rsi, [rsi+rax*2]
116 lea rdi, [rdi+rdx*2]
117
118 movdqa xmm2, xmm5
119 mpsadbw xmm5, xmm0, 0x0
120 mpsadbw xmm2, xmm0, 0x5
121 paddw xmm5, xmm2
122
123 paddw xmm1, xmm5
124 %endmacro
125
126 %macro PROCESS_4X2X8 1
127 %if %1
128 movd xmm0, [rsi]
129 movq xmm1, MMWORD PTR [rdi]
130 movq xmm3, MMWORD PTR [rdi+8]
131 punpcklqdq xmm1, xmm3
132
133 mpsadbw xmm1, xmm0, 0x0
134 %else
135 movd xmm0, [rsi]
136 movq xmm5, MMWORD PTR [rdi]
137 movq xmm3, MMWORD PTR [rdi+8]
138 punpcklqdq xmm5, xmm3
139
140 mpsadbw xmm5, xmm0, 0x0
141
142 paddw xmm1, xmm5
143 %endif
144 movd xmm0, [rsi + rax]
145 movq xmm5, MMWORD PTR [rdi+ rdx]
146 movq xmm3, MMWORD PTR [rdi+ rdx+8]
147 punpcklqdq xmm5, xmm3
148
149 lea rsi, [rsi+rax*2]
150 lea rdi, [rdi+rdx*2]
151
152 mpsadbw xmm5, xmm0, 0x0
153
154 paddw xmm1, xmm5
155 %endmacro
156
157 %macro WRITE_AS_INTS 0
158 mov rdi, arg(4) ;Results
159 pxor xmm0, xmm0
160 movdqa xmm2, xmm1
161 punpcklwd xmm1, xmm0
162 punpckhwd xmm2, xmm0
163
164 movdqa [rdi], xmm1
165 movdqa [rdi + 16], xmm2
166 %endmacro
167
168 ;void vp9_sad16x16x8_sse4(
169 ; const unsigned char *src_ptr,
170 ; int src_stride,
171 ; const unsigned char *ref_ptr,
172 ; int ref_stride,
173 ; unsigned short *sad_array);
174 global sym(vp9_sad16x16x8_sse4) PRIVATE
175 sym(vp9_sad16x16x8_sse4):
176 push rbp
177 mov rbp, rsp
178 SHADOW_ARGS_TO_STACK 5
179 push rsi
180 push rdi
181 ; end prolog
182
183 mov rsi, arg(0) ;src_ptr
184 mov rdi, arg(2) ;ref_ptr
185
186 movsxd rax, dword ptr arg(1) ;src_stride
187 movsxd rdx, dword ptr arg(3) ;ref_stride
188
189 PROCESS_16X2X8 1
190 PROCESS_16X2X8 0
191 PROCESS_16X2X8 0
192 PROCESS_16X2X8 0
193 PROCESS_16X2X8 0
194 PROCESS_16X2X8 0
195 PROCESS_16X2X8 0
196 PROCESS_16X2X8 0
197
198 WRITE_AS_INTS
199
200 ; begin epilog
201 pop rdi
202 pop rsi
203 UNSHADOW_ARGS
204 pop rbp
205 ret
206
207
208 ;void vp9_sad16x8x8_sse4(
209 ; const unsigned char *src_ptr,
210 ; int src_stride,
211 ; const unsigned char *ref_ptr,
212 ; int ref_stride,
213 ; unsigned short *sad_array
214 ;);
215 global sym(vp9_sad16x8x8_sse4) PRIVATE
216 sym(vp9_sad16x8x8_sse4):
217 push rbp
218 mov rbp, rsp
219 SHADOW_ARGS_TO_STACK 5
220 push rsi
221 push rdi
222 ; end prolog
223
224 mov rsi, arg(0) ;src_ptr
225 mov rdi, arg(2) ;ref_ptr
226
227 movsxd rax, dword ptr arg(1) ;src_stride
228 movsxd rdx, dword ptr arg(3) ;ref_stride
229
230 PROCESS_16X2X8 1
231 PROCESS_16X2X8 0
232 PROCESS_16X2X8 0
233 PROCESS_16X2X8 0
234
235 WRITE_AS_INTS
236
237 ; begin epilog
238 pop rdi
239 pop rsi
240 UNSHADOW_ARGS
241 pop rbp
242 ret
243
244
245 ;void vp9_sad8x8x8_sse4(
246 ; const unsigned char *src_ptr,
247 ; int src_stride,
248 ; const unsigned char *ref_ptr,
249 ; int ref_stride,
250 ; unsigned short *sad_array
251 ;);
252 global sym(vp9_sad8x8x8_sse4) PRIVATE
253 sym(vp9_sad8x8x8_sse4):
254 push rbp
255 mov rbp, rsp
256 SHADOW_ARGS_TO_STACK 5
257 push rsi
258 push rdi
259 ; end prolog
260
261 mov rsi, arg(0) ;src_ptr
262 mov rdi, arg(2) ;ref_ptr
263
264 movsxd rax, dword ptr arg(1) ;src_stride
265 movsxd rdx, dword ptr arg(3) ;ref_stride
266
267 PROCESS_8X2X8 1
268 PROCESS_8X2X8 0
269 PROCESS_8X2X8 0
270 PROCESS_8X2X8 0
271
272 WRITE_AS_INTS
273
274 ; begin epilog
275 pop rdi
276 pop rsi
277 UNSHADOW_ARGS
278 pop rbp
279 ret
280
281
282 ;void vp9_sad8x16x8_sse4(
283 ; const unsigned char *src_ptr,
284 ; int src_stride,
285 ; const unsigned char *ref_ptr,
286 ; int ref_stride,
287 ; unsigned short *sad_array
288 ;);
289 global sym(vp9_sad8x16x8_sse4) PRIVATE
290 sym(vp9_sad8x16x8_sse4):
291 push rbp
292 mov rbp, rsp
293 SHADOW_ARGS_TO_STACK 5
294 push rsi
295 push rdi
296 ; end prolog
297
298 mov rsi, arg(0) ;src_ptr
299 mov rdi, arg(2) ;ref_ptr
300
301 movsxd rax, dword ptr arg(1) ;src_stride
302 movsxd rdx, dword ptr arg(3) ;ref_stride
303
304 PROCESS_8X2X8 1
305 PROCESS_8X2X8 0
306 PROCESS_8X2X8 0
307 PROCESS_8X2X8 0
308 PROCESS_8X2X8 0
309 PROCESS_8X2X8 0
310 PROCESS_8X2X8 0
311 PROCESS_8X2X8 0
312
313 WRITE_AS_INTS
314
315 ; begin epilog
316 pop rdi
317 pop rsi
318 UNSHADOW_ARGS
319 pop rbp
320 ret
321
322
323 ;void vp9_sad4x4x8_c(
324 ; const unsigned char *src_ptr,
325 ; int src_stride,
326 ; const unsigned char *ref_ptr,
327 ; int ref_stride,
328 ; unsigned short *sad_array
329 ;);
330 global sym(vp9_sad4x4x8_sse4) PRIVATE
331 sym(vp9_sad4x4x8_sse4):
332 push rbp
333 mov rbp, rsp
334 SHADOW_ARGS_TO_STACK 5
335 push rsi
336 push rdi
337 ; end prolog
338
339 mov rsi, arg(0) ;src_ptr
340 mov rdi, arg(2) ;ref_ptr
341
342 movsxd rax, dword ptr arg(1) ;src_stride
343 movsxd rdx, dword ptr arg(3) ;ref_stride
344
345 PROCESS_4X2X8 1
346 PROCESS_4X2X8 0
347
348 WRITE_AS_INTS
349
350 ; begin epilog
351 pop rdi
352 pop rsi
353 UNSHADOW_ARGS
354 pop rbp
355 ret
356
357
358
359
OLDNEW
« no previous file with comments | « source/libvpx/vp9/encoder/x86/vp9_sad_sse3.asm ('k') | source/libvpx/vp9/encoder/x86/vp9_sad_ssse3.asm » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698