Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(4)

Side by Side Diff: source/libvpx/vpx_dsp/x86/sad_sse3.asm

Issue 1124333011: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master
Patch Set: only update to last nights LKGR Created 5 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/libvpx/vpx_dsp/x86/sad_sse2.asm ('k') | source/libvpx/vpx_dsp/x86/sad_sse4.asm » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 ; 1 ;
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 ; 3 ;
4 ; Use of this source code is governed by a BSD-style license 4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source 5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found 6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may 7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree. 8 ; be found in the AUTHORS file in the root of the source tree.
9 ; 9 ;
10 10
11 %include "vpx_ports/x86_abi_support.asm" 11 %include "vpx_ports/x86_abi_support.asm"
12 12
13 %macro STACK_FRAME_CREATE_X3 0 13 %macro STACK_FRAME_CREATE_X3 0
14 %if ABI_IS_32BIT 14 %if ABI_IS_32BIT
15 %define src_ptr rsi 15 %define src_ptr rsi
16 %define src_stride rax 16 %define src_stride rax
17 %define ref_ptr rdi 17 %define ref_ptr rdi
18 %define ref_stride rdx 18 %define ref_stride rdx
19 %define end_ptr rcx 19 %define end_ptr rcx
20 %define ret_var rbx 20 %define ret_var rbx
21 %define result_ptr arg(4) 21 %define result_ptr arg(4)
22 %define max_err arg(4)
23 %define height dword ptr arg(4) 22 %define height dword ptr arg(4)
24 push rbp 23 push rbp
25 mov rbp, rsp 24 mov rbp, rsp
26 push rsi 25 push rsi
27 push rdi 26 push rdi
28 push rbx 27 push rbx
29 28
30 mov rsi, arg(0) ; src_ptr 29 mov rsi, arg(0) ; src_ptr
31 mov rdi, arg(2) ; ref_ptr 30 mov rdi, arg(2) ; ref_ptr
32 31
33 movsxd rax, dword ptr arg(1) ; src_stride 32 movsxd rax, dword ptr arg(1) ; src_stride
34 movsxd rdx, dword ptr arg(3) ; ref_stride 33 movsxd rdx, dword ptr arg(3) ; ref_stride
35 %else 34 %else
36 %if LIBVPX_YASM_WIN64 35 %if LIBVPX_YASM_WIN64
37 SAVE_XMM 7, u 36 SAVE_XMM 7, u
38 %define src_ptr rcx 37 %define src_ptr rcx
39 %define src_stride rdx 38 %define src_stride rdx
40 %define ref_ptr r8 39 %define ref_ptr r8
41 %define ref_stride r9 40 %define ref_stride r9
42 %define end_ptr r10 41 %define end_ptr r10
43 %define ret_var r11 42 %define ret_var r11
44 %define result_ptr [rsp+xmm_stack_space+8+4*8] 43 %define result_ptr [rsp+xmm_stack_space+8+4*8]
45 %define max_err [rsp+xmm_stack_space+8+4*8]
46 %define height dword ptr [rsp+xmm_stack_space+8+4*8] 44 %define height dword ptr [rsp+xmm_stack_space+8+4*8]
47 %else 45 %else
48 %define src_ptr rdi 46 %define src_ptr rdi
49 %define src_stride rsi 47 %define src_stride rsi
50 %define ref_ptr rdx 48 %define ref_ptr rdx
51 %define ref_stride rcx 49 %define ref_stride rcx
52 %define end_ptr r9 50 %define end_ptr r9
53 %define ret_var r10 51 %define ret_var r10
54 %define result_ptr r8 52 %define result_ptr r8
55 %define max_err r8
56 %define height r8 53 %define height r8
57 %endif 54 %endif
58 %endif 55 %endif
59 56
60 %endmacro 57 %endmacro
61 58
62 %macro STACK_FRAME_DESTROY_X3 0 59 %macro STACK_FRAME_DESTROY_X3 0
63 %define src_ptr 60 %define src_ptr
64 %define src_stride 61 %define src_stride
65 %define ref_ptr 62 %define ref_ptr
66 %define ref_stride 63 %define ref_stride
67 %define end_ptr 64 %define end_ptr
68 %define ret_var 65 %define ret_var
69 %define result_ptr 66 %define result_ptr
70 %define max_err
71 %define height 67 %define height
72 68
73 %if ABI_IS_32BIT 69 %if ABI_IS_32BIT
74 pop rbx 70 pop rbx
75 pop rdi 71 pop rdi
76 pop rsi 72 pop rsi
77 pop rbp 73 pop rbp
78 %else 74 %else
79 %if LIBVPX_YASM_WIN64 75 %if LIBVPX_YASM_WIN64
80 RESTORE_XMM 76 RESTORE_XMM
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after
162 158
163 psadbw mm1, mm0 159 psadbw mm1, mm0
164 psadbw mm2, mm0 160 psadbw mm2, mm0
165 psadbw mm3, mm0 161 psadbw mm3, mm0
166 162
167 paddw mm5, mm1 163 paddw mm5, mm1
168 paddw mm6, mm2 164 paddw mm6, mm2
169 paddw mm7, mm3 165 paddw mm7, mm3
170 %endmacro 166 %endmacro
171 167
172 ;void int vp9_sad16x16x3_sse3( 168 ;void int vpx_sad16x16x3_sse3(
173 ; unsigned char *src_ptr, 169 ; unsigned char *src_ptr,
174 ; int src_stride, 170 ; int src_stride,
175 ; unsigned char *ref_ptr, 171 ; unsigned char *ref_ptr,
176 ; int ref_stride, 172 ; int ref_stride,
177 ; int *results) 173 ; int *results)
178 global sym(vp9_sad16x16x3_sse3) PRIVATE 174 global sym(vpx_sad16x16x3_sse3) PRIVATE
179 sym(vp9_sad16x16x3_sse3): 175 sym(vpx_sad16x16x3_sse3):
180 176
181 STACK_FRAME_CREATE_X3 177 STACK_FRAME_CREATE_X3
182 178
183 PROCESS_16X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride 179 PROCESS_16X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride
184 PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride 180 PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
185 PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride 181 PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
186 PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride 182 PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
187 PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride 183 PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
188 PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride 184 PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
189 PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride 185 PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
(...skipping 14 matching lines...) Expand all
204 movd [rcx+4], xmm0 200 movd [rcx+4], xmm0
205 ;- 201 ;-
206 movq xmm0, xmm7 202 movq xmm0, xmm7
207 psrldq xmm7, 8 203 psrldq xmm7, 8
208 204
209 paddw xmm0, xmm7 205 paddw xmm0, xmm7
210 movd [rcx+8], xmm0 206 movd [rcx+8], xmm0
211 207
212 STACK_FRAME_DESTROY_X3 208 STACK_FRAME_DESTROY_X3
213 209
214 ;void int vp9_sad16x8x3_sse3( 210 ;void int vpx_sad16x8x3_sse3(
215 ; unsigned char *src_ptr, 211 ; unsigned char *src_ptr,
216 ; int src_stride, 212 ; int src_stride,
217 ; unsigned char *ref_ptr, 213 ; unsigned char *ref_ptr,
218 ; int ref_stride, 214 ; int ref_stride,
219 ; int *results) 215 ; int *results)
220 global sym(vp9_sad16x8x3_sse3) PRIVATE 216 global sym(vpx_sad16x8x3_sse3) PRIVATE
221 sym(vp9_sad16x8x3_sse3): 217 sym(vpx_sad16x8x3_sse3):
222 218
223 STACK_FRAME_CREATE_X3 219 STACK_FRAME_CREATE_X3
224 220
225 PROCESS_16X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride 221 PROCESS_16X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride
226 PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride 222 PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
227 PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride 223 PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
228 PROCESS_16X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride 224 PROCESS_16X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride
229 225
230 mov rcx, result_ptr 226 mov rcx, result_ptr
231 227
(...skipping 10 matching lines...) Expand all
242 movd [rcx+4], xmm0 238 movd [rcx+4], xmm0
243 ;- 239 ;-
244 movq xmm0, xmm7 240 movq xmm0, xmm7
245 psrldq xmm7, 8 241 psrldq xmm7, 8
246 242
247 paddw xmm0, xmm7 243 paddw xmm0, xmm7
248 movd [rcx+8], xmm0 244 movd [rcx+8], xmm0
249 245
250 STACK_FRAME_DESTROY_X3 246 STACK_FRAME_DESTROY_X3
251 247
252 ;void int vp9_sad8x16x3_sse3( 248 ;void int vpx_sad8x16x3_sse3(
253 ; unsigned char *src_ptr, 249 ; unsigned char *src_ptr,
254 ; int src_stride, 250 ; int src_stride,
255 ; unsigned char *ref_ptr, 251 ; unsigned char *ref_ptr,
256 ; int ref_stride, 252 ; int ref_stride,
257 ; int *results) 253 ; int *results)
258 global sym(vp9_sad8x16x3_sse3) PRIVATE 254 global sym(vpx_sad8x16x3_sse3) PRIVATE
259 sym(vp9_sad8x16x3_sse3): 255 sym(vpx_sad8x16x3_sse3):
260 256
261 STACK_FRAME_CREATE_X3 257 STACK_FRAME_CREATE_X3
262 258
263 PROCESS_8X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride 259 PROCESS_8X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride
264 PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride 260 PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
265 PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride 261 PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
266 PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride 262 PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
267 PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride 263 PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
268 PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride 264 PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
269 PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride 265 PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
270 PROCESS_8X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride 266 PROCESS_8X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride
271 267
272 mov rcx, result_ptr 268 mov rcx, result_ptr
273 269
274 punpckldq mm5, mm6 270 punpckldq mm5, mm6
275 271
276 movq [rcx], mm5 272 movq [rcx], mm5
277 movd [rcx+8], mm7 273 movd [rcx+8], mm7
278 274
279 STACK_FRAME_DESTROY_X3 275 STACK_FRAME_DESTROY_X3
280 276
281 ;void int vp9_sad8x8x3_sse3( 277 ;void int vpx_sad8x8x3_sse3(
282 ; unsigned char *src_ptr, 278 ; unsigned char *src_ptr,
283 ; int src_stride, 279 ; int src_stride,
284 ; unsigned char *ref_ptr, 280 ; unsigned char *ref_ptr,
285 ; int ref_stride, 281 ; int ref_stride,
286 ; int *results) 282 ; int *results)
287 global sym(vp9_sad8x8x3_sse3) PRIVATE 283 global sym(vpx_sad8x8x3_sse3) PRIVATE
288 sym(vp9_sad8x8x3_sse3): 284 sym(vpx_sad8x8x3_sse3):
289 285
290 STACK_FRAME_CREATE_X3 286 STACK_FRAME_CREATE_X3
291 287
292 PROCESS_8X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride 288 PROCESS_8X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride
293 PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride 289 PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
294 PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride 290 PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
295 PROCESS_8X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride 291 PROCESS_8X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride
296 292
297 mov rcx, result_ptr 293 mov rcx, result_ptr
298 294
299 punpckldq mm5, mm6 295 punpckldq mm5, mm6
300 296
301 movq [rcx], mm5 297 movq [rcx], mm5
302 movd [rcx+8], mm7 298 movd [rcx+8], mm7
303 299
304 STACK_FRAME_DESTROY_X3 300 STACK_FRAME_DESTROY_X3
305 301
306 ;void int vp9_sad4x4x3_sse3( 302 ;void int vpx_sad4x4x3_sse3(
307 ; unsigned char *src_ptr, 303 ; unsigned char *src_ptr,
308 ; int src_stride, 304 ; int src_stride,
309 ; unsigned char *ref_ptr, 305 ; unsigned char *ref_ptr,
310 ; int ref_stride, 306 ; int ref_stride,
311 ; int *results) 307 ; int *results)
312 global sym(vp9_sad4x4x3_sse3) PRIVATE 308 global sym(vpx_sad4x4x3_sse3) PRIVATE
313 sym(vp9_sad4x4x3_sse3): 309 sym(vpx_sad4x4x3_sse3):
314 310
315 STACK_FRAME_CREATE_X3 311 STACK_FRAME_CREATE_X3
316 312
317 movd mm0, DWORD PTR [src_ptr] 313 movd mm0, DWORD PTR [src_ptr]
318 movd mm1, DWORD PTR [ref_ptr] 314 movd mm1, DWORD PTR [ref_ptr]
319 315
320 movd mm2, DWORD PTR [src_ptr+src_stride] 316 movd mm2, DWORD PTR [src_ptr+src_stride]
321 movd mm3, DWORD PTR [ref_ptr+ref_stride] 317 movd mm3, DWORD PTR [ref_ptr+ref_stride]
322 318
323 punpcklbw mm0, mm2 319 punpcklbw mm0, mm2
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after
369 paddw mm7, mm5 365 paddw mm7, mm5
370 366
371 mov rcx, result_ptr 367 mov rcx, result_ptr
372 368
373 punpckldq mm1, mm3 369 punpckldq mm1, mm3
374 370
375 movq [rcx], mm1 371 movq [rcx], mm1
376 movd [rcx+8], mm7 372 movd [rcx+8], mm7
377 373
378 STACK_FRAME_DESTROY_X3 374 STACK_FRAME_DESTROY_X3
OLDNEW
« no previous file with comments | « source/libvpx/vpx_dsp/x86/sad_sse2.asm ('k') | source/libvpx/vpx_dsp/x86/sad_sse4.asm » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698