Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(389)

Side by Side Diff: source/libvpx/vp9/encoder/x86/vp9_variance_impl_mmx.asm

Issue 554673004: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/
Patch Set: Created 6 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 ;
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 ;
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
9 ;
10
11
12 %include "vpx_ports/x86_abi_support.asm"
13
14 ;unsigned int vp9_get_mb_ss_mmx( short *src_ptr )
15 global sym(vp9_get_mb_ss_mmx) PRIVATE
16 sym(vp9_get_mb_ss_mmx):
17 push rbp
18 mov rbp, rsp
19 SHADOW_ARGS_TO_STACK 7
20 GET_GOT rbx
21 push rsi
22 push rdi
23 sub rsp, 8
24 ; end prolog
25
26 mov rax, arg(0) ;src_ptr
27 mov rcx, 16
28 pxor mm4, mm4
29
30 .NEXTROW:
31 movq mm0, [rax]
32 movq mm1, [rax+8]
33 movq mm2, [rax+16]
34 movq mm3, [rax+24]
35 pmaddwd mm0, mm0
36 pmaddwd mm1, mm1
37 pmaddwd mm2, mm2
38 pmaddwd mm3, mm3
39
40 paddd mm4, mm0
41 paddd mm4, mm1
42 paddd mm4, mm2
43 paddd mm4, mm3
44
45 add rax, 32
46 dec rcx
47 ja .NEXTROW
48 movq QWORD PTR [rsp], mm4
49
50 ;return sum[0]+sum[1];
51 movsxd rax, dword ptr [rsp]
52 movsxd rcx, dword ptr [rsp+4]
53 add rax, rcx
54
55
56 ; begin epilog
57 add rsp, 8
58 pop rdi
59 pop rsi
60 RESTORE_GOT
61 UNSHADOW_ARGS
62 pop rbp
63 ret
64
65
66 ;unsigned int vp9_get8x8var_mmx
67 ;(
68 ; unsigned char *src_ptr,
69 ; int source_stride,
70 ; unsigned char *ref_ptr,
71 ; int recon_stride,
72 ; unsigned int *SSE,
73 ; int *Sum
74 ;)
75 global sym(vp9_get8x8var_mmx) PRIVATE
76 sym(vp9_get8x8var_mmx):
77 push rbp
78 mov rbp, rsp
79 SHADOW_ARGS_TO_STACK 6
80 push rsi
81 push rdi
82 push rbx
83 sub rsp, 16
84 ; end prolog
85
86
87 pxor mm5, mm5 ; Blank mmx6
88 pxor mm6, mm6 ; Blank mmx7
89 pxor mm7, mm7 ; Blank mmx7
90
91 mov rax, arg(0) ;[src_ptr] ; Load base addresses
92 mov rbx, arg(2) ;[ref_ptr]
93 movsxd rcx, dword ptr arg(1) ;[source_stride]
94 movsxd rdx, dword ptr arg(3) ;[recon_stride]
95
96 ; Row 1
97 movq mm0, [rax] ; Copy eight bytes to mm0
98 movq mm1, [rbx] ; Copy eight bytes to mm1
99 movq mm2, mm0 ; Take copies
100 movq mm3, mm1 ; Take copies
101
102 punpcklbw mm0, mm6 ; unpack to higher prrcision
103 punpcklbw mm1, mm6
104 punpckhbw mm2, mm6 ; unpack to higher prrcision
105 punpckhbw mm3, mm6
106 psubsw mm0, mm1 ; A-B (low order) to MM0
107 psubsw mm2, mm3 ; A-B (high order) to MM2
108
109 paddw mm5, mm0 ; accumulate differences in mm5
110 paddw mm5, mm2 ; accumulate differences in mm5
111
112 pmaddwd mm0, mm0 ; square and accumulate
113 pmaddwd mm2, mm2 ; square and accumulate
114 add rbx,rdx ; Inc pointer into ref data
115 add rax,rcx ; Inc pointer into the new data
116 movq mm1, [rbx] ; Copy eight bytes to mm1
117 paddd mm7, mm0 ; accumulate in mm7
118 paddd mm7, mm2 ; accumulate in mm7
119
120
121 ; Row 2
122 movq mm0, [rax] ; Copy eight bytes to mm0
123 movq mm2, mm0 ; Take copies
124 movq mm3, mm1 ; Take copies
125
126 punpcklbw mm0, mm6 ; unpack to higher prrcision
127 punpcklbw mm1, mm6
128 punpckhbw mm2, mm6 ; unpack to higher prrcision
129 punpckhbw mm3, mm6
130 psubsw mm0, mm1 ; A-B (low order) to MM0
131 psubsw mm2, mm3 ; A-B (high order) to MM2
132
133 paddw mm5, mm0 ; accumulate differences in mm5
134 paddw mm5, mm2 ; accumulate differences in mm5
135
136 pmaddwd mm0, mm0 ; square and accumulate
137 pmaddwd mm2, mm2 ; square and accumulate
138 add rbx,rdx ; Inc pointer into ref data
139 add rax,rcx ; Inc pointer into the new data
140 movq mm1, [rbx] ; Copy eight bytes to mm1
141 paddd mm7, mm0 ; accumulate in mm7
142 paddd mm7, mm2 ; accumulate in mm7
143
144 ; Row 3
145 movq mm0, [rax] ; Copy eight bytes to mm0
146 movq mm2, mm0 ; Take copies
147 movq mm3, mm1 ; Take copies
148
149 punpcklbw mm0, mm6 ; unpack to higher prrcision
150 punpcklbw mm1, mm6
151 punpckhbw mm2, mm6 ; unpack to higher prrcision
152 punpckhbw mm3, mm6
153 psubsw mm0, mm1 ; A-B (low order) to MM0
154 psubsw mm2, mm3 ; A-B (high order) to MM2
155
156 paddw mm5, mm0 ; accumulate differences in mm5
157 paddw mm5, mm2 ; accumulate differences in mm5
158
159 pmaddwd mm0, mm0 ; square and accumulate
160 pmaddwd mm2, mm2 ; square and accumulate
161 add rbx,rdx ; Inc pointer into ref data
162 add rax,rcx ; Inc pointer into the new data
163 movq mm1, [rbx] ; Copy eight bytes to mm1
164 paddd mm7, mm0 ; accumulate in mm7
165 paddd mm7, mm2 ; accumulate in mm7
166
167 ; Row 4
168 movq mm0, [rax] ; Copy eight bytes to mm0
169 movq mm2, mm0 ; Take copies
170 movq mm3, mm1 ; Take copies
171
172 punpcklbw mm0, mm6 ; unpack to higher prrcision
173 punpcklbw mm1, mm6
174 punpckhbw mm2, mm6 ; unpack to higher prrcision
175 punpckhbw mm3, mm6
176 psubsw mm0, mm1 ; A-B (low order) to MM0
177 psubsw mm2, mm3 ; A-B (high order) to MM2
178
179 paddw mm5, mm0 ; accumulate differences in mm5
180 paddw mm5, mm2 ; accumulate differences in mm5
181
182 pmaddwd mm0, mm0 ; square and accumulate
183 pmaddwd mm2, mm2 ; square and accumulate
184 add rbx,rdx ; Inc pointer into ref data
185 add rax,rcx ; Inc pointer into the new data
186 movq mm1, [rbx] ; Copy eight bytes to mm1
187 paddd mm7, mm0 ; accumulate in mm7
188 paddd mm7, mm2 ; accumulate in mm7
189
190 ; Row 5
191 movq mm0, [rax] ; Copy eight bytes to mm0
192 movq mm2, mm0 ; Take copies
193 movq mm3, mm1 ; Take copies
194
195 punpcklbw mm0, mm6 ; unpack to higher prrcision
196 punpcklbw mm1, mm6
197 punpckhbw mm2, mm6 ; unpack to higher prrcision
198 punpckhbw mm3, mm6
199 psubsw mm0, mm1 ; A-B (low order) to MM0
200 psubsw mm2, mm3 ; A-B (high order) to MM2
201
202 paddw mm5, mm0 ; accumulate differences in mm5
203 paddw mm5, mm2 ; accumulate differences in mm5
204
205 pmaddwd mm0, mm0 ; square and accumulate
206 pmaddwd mm2, mm2 ; square and accumulate
207 add rbx,rdx ; Inc pointer into ref data
208 add rax,rcx ; Inc pointer into the new data
209 movq mm1, [rbx] ; Copy eight bytes to mm1
210 ; movq mm4, [rbx + rdx]
211 paddd mm7, mm0 ; accumulate in mm7
212 paddd mm7, mm2 ; accumulate in mm7
213
214 ; Row 6
215 movq mm0, [rax] ; Copy eight bytes to mm0
216 movq mm2, mm0 ; Take copies
217 movq mm3, mm1 ; Take copies
218
219 punpcklbw mm0, mm6 ; unpack to higher prrcision
220 punpcklbw mm1, mm6
221 punpckhbw mm2, mm6 ; unpack to higher prrcision
222 punpckhbw mm3, mm6
223 psubsw mm0, mm1 ; A-B (low order) to MM0
224 psubsw mm2, mm3 ; A-B (high order) to MM2
225
226 paddw mm5, mm0 ; accumulate differences in mm5
227 paddw mm5, mm2 ; accumulate differences in mm5
228
229 pmaddwd mm0, mm0 ; square and accumulate
230 pmaddwd mm2, mm2 ; square and accumulate
231 add rbx,rdx ; Inc pointer into ref data
232 add rax,rcx ; Inc pointer into the new data
233 movq mm1, [rbx] ; Copy eight bytes to mm1
234 paddd mm7, mm0 ; accumulate in mm7
235 paddd mm7, mm2 ; accumulate in mm7
236
237 ; Row 7
238 movq mm0, [rax] ; Copy eight bytes to mm0
239 movq mm2, mm0 ; Take copies
240 movq mm3, mm1 ; Take copies
241
242 punpcklbw mm0, mm6 ; unpack to higher prrcision
243 punpcklbw mm1, mm6
244 punpckhbw mm2, mm6 ; unpack to higher prrcision
245 punpckhbw mm3, mm6
246 psubsw mm0, mm1 ; A-B (low order) to MM0
247 psubsw mm2, mm3 ; A-B (high order) to MM2
248
249 paddw mm5, mm0 ; accumulate differences in mm5
250 paddw mm5, mm2 ; accumulate differences in mm5
251
252 pmaddwd mm0, mm0 ; square and accumulate
253 pmaddwd mm2, mm2 ; square and accumulate
254 add rbx,rdx ; Inc pointer into ref data
255 add rax,rcx ; Inc pointer into the new data
256 movq mm1, [rbx] ; Copy eight bytes to mm1
257 paddd mm7, mm0 ; accumulate in mm7
258 paddd mm7, mm2 ; accumulate in mm7
259
260 ; Row 8
261 movq mm0, [rax] ; Copy eight bytes to mm0
262 movq mm2, mm0 ; Take copies
263 movq mm3, mm1 ; Take copies
264
265 punpcklbw mm0, mm6 ; unpack to higher prrcision
266 punpcklbw mm1, mm6
267 punpckhbw mm2, mm6 ; unpack to higher prrcision
268 punpckhbw mm3, mm6
269 psubsw mm0, mm1 ; A-B (low order) to MM0
270 psubsw mm2, mm3 ; A-B (high order) to MM2
271
272 paddw mm5, mm0 ; accumulate differences in mm5
273 paddw mm5, mm2 ; accumulate differences in mm5
274
275 pmaddwd mm0, mm0 ; square and accumulate
276 pmaddwd mm2, mm2 ; square and accumulate
277 add rbx,rdx ; Inc pointer into ref data
278 add rax,rcx ; Inc pointer into the new data
279 paddd mm7, mm0 ; accumulate in mm7
280 paddd mm7, mm2 ; accumulate in mm7
281
282 ; Now accumulate the final results.
283 movq QWORD PTR [rsp+8], mm5 ; copy back accumulated results into normal memory
284 movq QWORD PTR [rsp], mm7 ; copy back accumulated results into normal memory
285 movsx rdx, WORD PTR [rsp+8]
286 movsx rcx, WORD PTR [rsp+10]
287 movsx rbx, WORD PTR [rsp+12]
288 movsx rax, WORD PTR [rsp+14]
289 add rdx, rcx
290 add rbx, rax
291 add rdx, rbx ;XSum
292 movsxd rax, DWORD PTR [rsp]
293 movsxd rcx, DWORD PTR [rsp+4]
294 add rax, rcx ;XXSum
295 mov rsi, arg(4) ;SSE
296 mov rdi, arg(5) ;Sum
297 mov dword ptr [rsi], eax
298 mov dword ptr [rdi], edx
299 xor rax, rax ; return 0
300
301
302 ; begin epilog
303 add rsp, 16
304 pop rbx
305 pop rdi
306 pop rsi
307 UNSHADOW_ARGS
308 pop rbp
309 ret
310
311
312
313 ;unsigned int
314 ;vp9_get4x4var_mmx
315 ;(
316 ; unsigned char *src_ptr,
317 ; int source_stride,
318 ; unsigned char *ref_ptr,
319 ; int recon_stride,
320 ; unsigned int *SSE,
321 ; int *Sum
322 ;)
323 global sym(vp9_get4x4var_mmx) PRIVATE
324 sym(vp9_get4x4var_mmx):
325 push rbp
326 mov rbp, rsp
327 SHADOW_ARGS_TO_STACK 6
328 push rsi
329 push rdi
330 push rbx
331 sub rsp, 16
332 ; end prolog
333
334
335 pxor mm5, mm5 ; Blank mmx6
336 pxor mm6, mm6 ; Blank mmx7
337 pxor mm7, mm7 ; Blank mmx7
338
339 mov rax, arg(0) ;[src_ptr] ; Load base addresses
340 mov rbx, arg(2) ;[ref_ptr]
341 movsxd rcx, dword ptr arg(1) ;[source_stride]
342 movsxd rdx, dword ptr arg(3) ;[recon_stride]
343
344 ; Row 1
345 movd mm0, [rax] ; Copy 4 bytes to mm0
346 movd mm1, [rbx] ; Copy 4 bytes to mm1
347 punpcklbw mm0, mm6 ; unpack to higher prrcision
348 punpcklbw mm1, mm6
349 psubsw mm0, mm1 ; A-B (low order) to MM0
350 paddw mm5, mm0 ; accumulate differences in mm5
351 pmaddwd mm0, mm0 ; square and accumulate
352 add rbx,rdx ; Inc pointer into ref data
353 add rax,rcx ; Inc pointer into the new data
354 movd mm1, [rbx] ; Copy 4 bytes to mm1
355 paddd mm7, mm0 ; accumulate in mm7
356
357
358 ; Row 2
359 movd mm0, [rax] ; Copy 4 bytes to mm0
360 punpcklbw mm0, mm6 ; unpack to higher prrcision
361 punpcklbw mm1, mm6
362 psubsw mm0, mm1 ; A-B (low order) to MM0
363 paddw mm5, mm0 ; accumulate differences in mm5
364
365 pmaddwd mm0, mm0 ; square and accumulate
366 add rbx,rdx ; Inc pointer into ref data
367 add rax,rcx ; Inc pointer into the new data
368 movd mm1, [rbx] ; Copy 4 bytes to mm1
369 paddd mm7, mm0 ; accumulate in mm7
370
371 ; Row 3
372 movd mm0, [rax] ; Copy 4 bytes to mm0
373 punpcklbw mm0, mm6 ; unpack to higher prrcision
374 punpcklbw mm1, mm6
375 psubsw mm0, mm1 ; A-B (low order) to MM0
376 paddw mm5, mm0 ; accumulate differences in mm5
377
378 pmaddwd mm0, mm0 ; square and accumulate
379 add rbx,rdx ; Inc pointer into ref data
380 add rax,rcx ; Inc pointer into the new data
381 movd mm1, [rbx] ; Copy 4 bytes to mm1
382 paddd mm7, mm0 ; accumulate in mm7
383
384 ; Row 4
385 movd mm0, [rax] ; Copy 4 bytes to mm0
386
387 punpcklbw mm0, mm6 ; unpack to higher prrcision
388 punpcklbw mm1, mm6
389 psubsw mm0, mm1 ; A-B (low order) to MM0
390
391 paddw mm5, mm0 ; accumulate differences in mm5
392
393 pmaddwd mm0, mm0 ; square and accumulate
394 paddd mm7, mm0 ; accumulate in mm7
395
396
397 ; Now accumulate the final results.
398 movq QWORD PTR [rsp+8], mm5 ; copy back accumulated results into normal memory
399 movq QWORD PTR [rsp], mm7 ; copy back accumulated results into normal memory
400 movsx rdx, WORD PTR [rsp+8]
401 movsx rcx, WORD PTR [rsp+10]
402 movsx rbx, WORD PTR [rsp+12]
403 movsx rax, WORD PTR [rsp+14]
404 add rdx, rcx
405 add rbx, rax
406 add rdx, rbx ;XSum
407 movsxd rax, DWORD PTR [rsp]
408 movsxd rcx, DWORD PTR [rsp+4]
409 add rax, rcx ;XXSum
410 mov rsi, arg(4) ;SSE
411 mov rdi, arg(5) ;Sum
412 mov dword ptr [rsi], eax
413 mov dword ptr [rdi], edx
414 xor rax, rax ; return 0
415
416
417 ; begin epilog
418 add rsp, 16
419 pop rbx
420 pop rdi
421 pop rsi
422 UNSHADOW_ARGS
423 pop rbp
424 ret
425
426
427
428 ;unsigned int
429 ;vp9_get4x4sse_cs_mmx
430 ;(
431 ; unsigned char *src_ptr,
432 ; int source_stride,
433 ; unsigned char *ref_ptr,
434 ; int recon_stride
435 ;)
436 global sym(vp9_get4x4sse_cs_mmx) PRIVATE
437 sym(vp9_get4x4sse_cs_mmx):
438 push rbp
439 mov rbp, rsp
440 SHADOW_ARGS_TO_STACK 4
441 push rsi
442 push rdi
443 push rbx
444 ; end prolog
445
446
447 pxor mm6, mm6 ; Blank mmx7
448 pxor mm7, mm7 ; Blank mmx7
449
450 mov rax, arg(0) ;[src_ptr] ; Load base addresses
451 mov rbx, arg(2) ;[ref_ptr]
452 movsxd rcx, dword ptr arg(1) ;[source_stride]
453 movsxd rdx, dword ptr arg(3) ;[recon_stride]
454 ; Row 1
455 movd mm0, [rax] ; Copy eight bytes to mm0
456 movd mm1, [rbx] ; Copy eight bytes to mm1
457 punpcklbw mm0, mm6 ; unpack to higher prrcision
458 punpcklbw mm1, mm6
459 psubsw mm0, mm1 ; A-B (low order) to MM0
460 pmaddwd mm0, mm0 ; square and accumulate
461 add rbx,rdx ; Inc pointer into ref data
462 add rax,rcx ; Inc pointer into the new data
463 movd mm1, [rbx] ; Copy eight bytes to mm1
464 paddd mm7, mm0 ; accumulate in mm7
465
466 ; Row 2
467 movd mm0, [rax] ; Copy eight bytes to mm0
468 punpcklbw mm0, mm6 ; unpack to higher prrcision
469 punpcklbw mm1, mm6
470 psubsw mm0, mm1 ; A-B (low order) to MM0
471 pmaddwd mm0, mm0 ; square and accumulate
472 add rbx,rdx ; Inc pointer into ref data
473 add rax,rcx ; Inc pointer into the new data
474 movd mm1, [rbx] ; Copy eight bytes to mm1
475 paddd mm7, mm0 ; accumulate in mm7
476
477 ; Row 3
478 movd mm0, [rax] ; Copy eight bytes to mm0
479 punpcklbw mm1, mm6
480 punpcklbw mm0, mm6 ; unpack to higher prrcision
481 psubsw mm0, mm1 ; A-B (low order) to MM0
482
483 pmaddwd mm0, mm0 ; square and accumulate
484 add rbx,rdx ; Inc pointer into ref data
485 add rax,rcx ; Inc pointer into the new data
486 movd mm1, [rbx] ; Copy eight bytes to mm1
487 paddd mm7, mm0 ; accumulate in mm7
488
489 ; Row 4
490 movd mm0, [rax] ; Copy eight bytes to mm0
491 punpcklbw mm0, mm6 ; unpack to higher prrcision
492 punpcklbw mm1, mm6
493 psubsw mm0, mm1 ; A-B (low order) to MM0
494 pmaddwd mm0, mm0 ; square and accumulate
495 paddd mm7, mm0 ; accumulate in mm7
496
497 movq mm0, mm7 ;
498 psrlq mm7, 32
499
500 paddd mm0, mm7
501 movq rax, mm0
502
503
504 ; begin epilog
505 pop rbx
506 pop rdi
507 pop rsi
508 UNSHADOW_ARGS
509 pop rbp
510 ret
OLDNEW
« no previous file with comments | « source/libvpx/vp9/encoder/x86/vp9_variance_avx2.c ('k') | source/libvpx/vp9/encoder/x86/vp9_variance_impl_sse2.asm » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698