Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(79)

Side by Side Diff: source/libvpx/vp9/encoder/x86/vp9_sad_mmx.asm

Issue 554673004: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/
Patch Set: Created 6 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 ;
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 ;
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
9 ;
10
11
12 %include "vpx_ports/x86_abi_support.asm"
13
14 global sym(vp9_sad16x16_mmx) PRIVATE
15 global sym(vp9_sad8x16_mmx) PRIVATE
16 global sym(vp9_sad8x8_mmx) PRIVATE
17 global sym(vp9_sad4x4_mmx) PRIVATE
18 global sym(vp9_sad16x8_mmx) PRIVATE
19
20 ;unsigned int vp9_sad16x16_mmx(
21 ; unsigned char *src_ptr,
22 ; int src_stride,
23 ; unsigned char *ref_ptr,
24 ; int ref_stride)
25 sym(vp9_sad16x16_mmx):
26 push rbp
27 mov rbp, rsp
28 SHADOW_ARGS_TO_STACK 4
29 push rsi
30 push rdi
31 ; end prolog
32
33 mov rsi, arg(0) ;src_ptr
34 mov rdi, arg(2) ;ref_ptr
35
36 movsxd rax, dword ptr arg(1) ;src_stride
37 movsxd rdx, dword ptr arg(3) ;ref_stride
38
39 lea rcx, [rsi+rax*8]
40
41 lea rcx, [rcx+rax*8]
42 pxor mm7, mm7
43
44 pxor mm6, mm6
45
46 .x16x16sad_mmx_loop:
47
48 movq mm0, QWORD PTR [rsi]
49 movq mm2, QWORD PTR [rsi+8]
50
51 movq mm1, QWORD PTR [rdi]
52 movq mm3, QWORD PTR [rdi+8]
53
54 movq mm4, mm0
55 movq mm5, mm2
56
57 psubusb mm0, mm1
58 psubusb mm1, mm4
59
60 psubusb mm2, mm3
61 psubusb mm3, mm5
62
63 por mm0, mm1
64 por mm2, mm3
65
66 movq mm1, mm0
67 movq mm3, mm2
68
69 punpcklbw mm0, mm6
70 punpcklbw mm2, mm6
71
72 punpckhbw mm1, mm6
73 punpckhbw mm3, mm6
74
75 paddw mm0, mm2
76 paddw mm1, mm3
77
78
79 lea rsi, [rsi+rax]
80 add rdi, rdx
81
82 paddw mm7, mm0
83 paddw mm7, mm1
84
85 cmp rsi, rcx
86 jne .x16x16sad_mmx_loop
87
88
89 movq mm0, mm7
90
91 punpcklwd mm0, mm6
92 punpckhwd mm7, mm6
93
94 paddw mm0, mm7
95 movq mm7, mm0
96
97
98 psrlq mm0, 32
99 paddw mm7, mm0
100
101 movq rax, mm7
102
103 pop rdi
104 pop rsi
105 mov rsp, rbp
106 ; begin epilog
107 UNSHADOW_ARGS
108 pop rbp
109 ret
110
111
112 ;unsigned int vp9_sad8x16_mmx(
113 ; unsigned char *src_ptr,
114 ; int src_stride,
115 ; unsigned char *ref_ptr,
116 ; int ref_stride)
117 sym(vp9_sad8x16_mmx):
118 push rbp
119 mov rbp, rsp
120 SHADOW_ARGS_TO_STACK 4
121 push rsi
122 push rdi
123 ; end prolog
124
125 mov rsi, arg(0) ;src_ptr
126 mov rdi, arg(2) ;ref_ptr
127
128 movsxd rax, dword ptr arg(1) ;src_stride
129 movsxd rdx, dword ptr arg(3) ;ref_stride
130
131 lea rcx, [rsi+rax*8]
132
133 lea rcx, [rcx+rax*8]
134 pxor mm7, mm7
135
136 pxor mm6, mm6
137
138 .x8x16sad_mmx_loop:
139
140 movq mm0, QWORD PTR [rsi]
141 movq mm1, QWORD PTR [rdi]
142
143 movq mm2, mm0
144 psubusb mm0, mm1
145
146 psubusb mm1, mm2
147 por mm0, mm1
148
149 movq mm2, mm0
150 punpcklbw mm0, mm6
151
152 punpckhbw mm2, mm6
153 lea rsi, [rsi+rax]
154
155 add rdi, rdx
156 paddw mm7, mm0
157
158 paddw mm7, mm2
159 cmp rsi, rcx
160
161 jne .x8x16sad_mmx_loop
162
163 movq mm0, mm7
164 punpcklwd mm0, mm6
165
166 punpckhwd mm7, mm6
167 paddw mm0, mm7
168
169 movq mm7, mm0
170 psrlq mm0, 32
171
172 paddw mm7, mm0
173 movq rax, mm7
174
175 pop rdi
176 pop rsi
177 mov rsp, rbp
178 ; begin epilog
179 UNSHADOW_ARGS
180 pop rbp
181 ret
182
183
184 ;unsigned int vp9_sad8x8_mmx(
185 ; unsigned char *src_ptr,
186 ; int src_stride,
187 ; unsigned char *ref_ptr,
188 ; int ref_stride)
189 sym(vp9_sad8x8_mmx):
190 push rbp
191 mov rbp, rsp
192 SHADOW_ARGS_TO_STACK 4
193 push rsi
194 push rdi
195 ; end prolog
196
197 mov rsi, arg(0) ;src_ptr
198 mov rdi, arg(2) ;ref_ptr
199
200 movsxd rax, dword ptr arg(1) ;src_stride
201 movsxd rdx, dword ptr arg(3) ;ref_stride
202
203 lea rcx, [rsi+rax*8]
204 pxor mm7, mm7
205
206 pxor mm6, mm6
207
208 .x8x8sad_mmx_loop:
209
210 movq mm0, QWORD PTR [rsi]
211 movq mm1, QWORD PTR [rdi]
212
213 movq mm2, mm0
214 psubusb mm0, mm1
215
216 psubusb mm1, mm2
217 por mm0, mm1
218
219 movq mm2, mm0
220 punpcklbw mm0, mm6
221
222 punpckhbw mm2, mm6
223 paddw mm0, mm2
224
225 lea rsi, [rsi+rax]
226 add rdi, rdx
227
228 paddw mm7, mm0
229 cmp rsi, rcx
230
231 jne .x8x8sad_mmx_loop
232
233 movq mm0, mm7
234 punpcklwd mm0, mm6
235
236 punpckhwd mm7, mm6
237 paddw mm0, mm7
238
239 movq mm7, mm0
240 psrlq mm0, 32
241
242 paddw mm7, mm0
243 movq rax, mm7
244
245 pop rdi
246 pop rsi
247 mov rsp, rbp
248 ; begin epilog
249 UNSHADOW_ARGS
250 pop rbp
251 ret
252
253
254 ;unsigned int vp9_sad4x4_mmx(
255 ; unsigned char *src_ptr,
256 ; int src_stride,
257 ; unsigned char *ref_ptr,
258 ; int ref_stride)
259 sym(vp9_sad4x4_mmx):
260 push rbp
261 mov rbp, rsp
262 SHADOW_ARGS_TO_STACK 4
263 push rsi
264 push rdi
265 ; end prolog
266
267 mov rsi, arg(0) ;src_ptr
268 mov rdi, arg(2) ;ref_ptr
269
270 movsxd rax, dword ptr arg(1) ;src_stride
271 movsxd rdx, dword ptr arg(3) ;ref_stride
272
273 movd mm0, DWORD PTR [rsi]
274 movd mm1, DWORD PTR [rdi]
275
276 movd mm2, DWORD PTR [rsi+rax]
277 movd mm3, DWORD PTR [rdi+rdx]
278
279 punpcklbw mm0, mm2
280 punpcklbw mm1, mm3
281
282 movq mm2, mm0
283 psubusb mm0, mm1
284
285 psubusb mm1, mm2
286 por mm0, mm1
287
288 movq mm2, mm0
289 pxor mm3, mm3
290
291 punpcklbw mm0, mm3
292 punpckhbw mm2, mm3
293
294 paddw mm0, mm2
295
296 lea rsi, [rsi+rax*2]
297 lea rdi, [rdi+rdx*2]
298
299 movd mm4, DWORD PTR [rsi]
300 movd mm5, DWORD PTR [rdi]
301
302 movd mm6, DWORD PTR [rsi+rax]
303 movd mm7, DWORD PTR [rdi+rdx]
304
305 punpcklbw mm4, mm6
306 punpcklbw mm5, mm7
307
308 movq mm6, mm4
309 psubusb mm4, mm5
310
311 psubusb mm5, mm6
312 por mm4, mm5
313
314 movq mm5, mm4
315 punpcklbw mm4, mm3
316
317 punpckhbw mm5, mm3
318 paddw mm4, mm5
319
320 paddw mm0, mm4
321 movq mm1, mm0
322
323 punpcklwd mm0, mm3
324 punpckhwd mm1, mm3
325
326 paddw mm0, mm1
327 movq mm1, mm0
328
329 psrlq mm0, 32
330 paddw mm0, mm1
331
332 movq rax, mm0
333
334 pop rdi
335 pop rsi
336 mov rsp, rbp
337 ; begin epilog
338 UNSHADOW_ARGS
339 pop rbp
340 ret
341
342
343 ;unsigned int vp9_sad16x8_mmx(
344 ; unsigned char *src_ptr,
345 ; int src_stride,
346 ; unsigned char *ref_ptr,
347 ; int ref_stride)
348 sym(vp9_sad16x8_mmx):
349 push rbp
350 mov rbp, rsp
351 SHADOW_ARGS_TO_STACK 4
352 push rsi
353 push rdi
354 ; end prolog
355
356 mov rsi, arg(0) ;src_ptr
357 mov rdi, arg(2) ;ref_ptr
358
359 movsxd rax, dword ptr arg(1) ;src_stride
360 movsxd rdx, dword ptr arg(3) ;ref_stride
361
362 lea rcx, [rsi+rax*8]
363 pxor mm7, mm7
364
365 pxor mm6, mm6
366
367 .x16x8sad_mmx_loop:
368
369 movq mm0, [rsi]
370 movq mm1, [rdi]
371
372 movq mm2, [rsi+8]
373 movq mm3, [rdi+8]
374
375 movq mm4, mm0
376 movq mm5, mm2
377
378 psubusb mm0, mm1
379 psubusb mm1, mm4
380
381 psubusb mm2, mm3
382 psubusb mm3, mm5
383
384 por mm0, mm1
385 por mm2, mm3
386
387 movq mm1, mm0
388 movq mm3, mm2
389
390 punpcklbw mm0, mm6
391 punpckhbw mm1, mm6
392
393 punpcklbw mm2, mm6
394 punpckhbw mm3, mm6
395
396
397 paddw mm0, mm2
398 paddw mm1, mm3
399
400 paddw mm0, mm1
401 lea rsi, [rsi+rax]
402
403 add rdi, rdx
404 paddw mm7, mm0
405
406 cmp rsi, rcx
407 jne .x16x8sad_mmx_loop
408
409 movq mm0, mm7
410 punpcklwd mm0, mm6
411
412 punpckhwd mm7, mm6
413 paddw mm0, mm7
414
415 movq mm7, mm0
416 psrlq mm0, 32
417
418 paddw mm7, mm0
419 movq rax, mm7
420
421 pop rdi
422 pop rsi
423 mov rsp, rbp
424 ; begin epilog
425 UNSHADOW_ARGS
426 pop rbp
427 ret
OLDNEW
« no previous file with comments | « source/libvpx/vp9/encoder/x86/vp9_dct_sse2.c ('k') | source/libvpx/vp9/encoder/x86/vp9_variance_avx2.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698