OLD | NEW |
1 ; | 1 ; |
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
3 ; | 3 ; |
4 ; Use of this source code is governed by a BSD-style license | 4 ; Use of this source code is governed by a BSD-style license |
5 ; that can be found in the LICENSE file in the root of the source | 5 ; that can be found in the LICENSE file in the root of the source |
6 ; tree. An additional intellectual property rights grant can be found | 6 ; tree. An additional intellectual property rights grant can be found |
7 ; in the file PATENTS. All contributing project authors may | 7 ; in the file PATENTS. All contributing project authors may |
8 ; be found in the AUTHORS file in the root of the source tree. | 8 ; be found in the AUTHORS file in the root of the source tree. |
9 ; | 9 ; |
10 | 10 |
(...skipping 324 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
335 pxor mm5, mm5 ; Blank mmx6 | 335 pxor mm5, mm5 ; Blank mmx6 |
336 pxor mm6, mm6 ; Blank mmx7 | 336 pxor mm6, mm6 ; Blank mmx7 |
337 pxor mm7, mm7 ; Blank mmx7 | 337 pxor mm7, mm7 ; Blank mmx7 |
338 | 338 |
339 mov rax, arg(0) ;[src_ptr] ; Load base addresses | 339 mov rax, arg(0) ;[src_ptr] ; Load base addresses |
340 mov rbx, arg(2) ;[ref_ptr] | 340 mov rbx, arg(2) ;[ref_ptr] |
341 movsxd rcx, dword ptr arg(1) ;[source_stride] | 341 movsxd rcx, dword ptr arg(1) ;[source_stride] |
342 movsxd rdx, dword ptr arg(3) ;[recon_stride] | 342 movsxd rdx, dword ptr arg(3) ;[recon_stride] |
343 | 343 |
344 ; Row 1 | 344 ; Row 1 |
345 movq mm0, [rax] ; Copy eight bytes to mm0 | 345 movd mm0, [rax] ; Copy four bytes to mm0 |
346 movq mm1, [rbx] ; Copy eight bytes to mm1 | 346 movd mm1, [rbx] ; Copy four bytes to mm1 |
347 punpcklbw mm0, mm6 ; unpack to higher prrcision | 347 punpcklbw mm0, mm6 ; unpack to higher prrcision |
348 punpcklbw mm1, mm6 | 348 punpcklbw mm1, mm6 |
349 psubsw mm0, mm1 ; A-B (low order) to MM0 | 349 psubsw mm0, mm1 ; A-B (low order) to MM0 |
350 paddw mm5, mm0 ; accumulate differences in mm5 | 350 paddw mm5, mm0 ; accumulate differences in mm5 |
351 pmaddwd mm0, mm0 ; square and accumulate | 351 pmaddwd mm0, mm0 ; square and accumulate |
352 add rbx,rdx ; Inc pointer into ref data | 352 add rbx,rdx ; Inc pointer into ref data |
353 add rax,rcx ; Inc pointer into the new data | 353 add rax,rcx ; Inc pointer into the new data |
354 movq mm1, [rbx] ; Copy eight bytes to mm1 | 354 movd mm1, [rbx] ; Copy four bytes to mm1 |
355 paddd mm7, mm0 ; accumulate in mm7 | 355 paddd mm7, mm0 ; accumulate in mm7 |
356 | 356 |
357 | 357 |
358 ; Row 2 | 358 ; Row 2 |
359 movq mm0, [rax] ; Copy eight bytes to mm0 | 359 movd mm0, [rax] ; Copy four bytes to mm0 |
360 punpcklbw mm0, mm6 ; unpack to higher prrcision | 360 punpcklbw mm0, mm6 ; unpack to higher prrcision |
361 punpcklbw mm1, mm6 | 361 punpcklbw mm1, mm6 |
362 psubsw mm0, mm1 ; A-B (low order) to MM0 | 362 psubsw mm0, mm1 ; A-B (low order) to MM0 |
363 paddw mm5, mm0 ; accumulate differences in mm5 | 363 paddw mm5, mm0 ; accumulate differences in mm5 |
364 | 364 |
365 pmaddwd mm0, mm0 ; square and accumulate | 365 pmaddwd mm0, mm0 ; square and accumulate |
366 add rbx,rdx ; Inc pointer into ref data | 366 add rbx,rdx ; Inc pointer into ref data |
367 add rax,rcx ; Inc pointer into the new data | 367 add rax,rcx ; Inc pointer into the new data |
368 movq mm1, [rbx] ; Copy eight bytes to mm1 | 368 movd mm1, [rbx] ; Copy four bytes to mm1 |
369 paddd mm7, mm0 ; accumulate in mm7 | 369 paddd mm7, mm0 ; accumulate in mm7 |
370 | 370 |
371 ; Row 3 | 371 ; Row 3 |
372 movq mm0, [rax] ; Copy eight bytes to mm0 | 372 movd mm0, [rax] ; Copy four bytes to mm0 |
373 punpcklbw mm0, mm6 ; unpack to higher prrcision | 373 punpcklbw mm0, mm6 ; unpack to higher precision |
374 punpcklbw mm1, mm6 | 374 punpcklbw mm1, mm6 |
375 psubsw mm0, mm1 ; A-B (low order) to MM0 | 375 psubsw mm0, mm1 ; A-B (low order) to MM0 |
376 paddw mm5, mm0 ; accumulate differences in mm5 | 376 paddw mm5, mm0 ; accumulate differences in mm5 |
377 | 377 |
378 pmaddwd mm0, mm0 ; square and accumulate | 378 pmaddwd mm0, mm0 ; square and accumulate |
379 add rbx,rdx ; Inc pointer into ref data | 379 add rbx,rdx ; Inc pointer into ref data |
380 add rax,rcx ; Inc pointer into the new data | 380 add rax,rcx ; Inc pointer into the new data |
381 movq mm1, [rbx] ; Copy eight bytes to mm1 | 381 movd mm1, [rbx] ; Copy four bytes to mm1 |
382 paddd mm7, mm0 ; accumulate in mm7 | 382 paddd mm7, mm0 ; accumulate in mm7 |
383 | 383 |
384 ; Row 4 | 384 ; Row 4 |
385 movq mm0, [rax] ; Copy eight bytes to mm0 | 385 movd mm0, [rax] ; Copy four bytes to mm0 |
386 | 386 |
387 punpcklbw mm0, mm6 ; unpack to higher prrcision | 387 punpcklbw mm0, mm6 ; unpack to higher prrcision |
388 punpcklbw mm1, mm6 | 388 punpcklbw mm1, mm6 |
389 psubsw mm0, mm1 ; A-B (low order) to MM0 | 389 psubsw mm0, mm1 ; A-B (low order) to MM0 |
390 | 390 |
391 paddw mm5, mm0 ; accumulate differences in mm5 | 391 paddw mm5, mm0 ; accumulate differences in mm5 |
392 | 392 |
393 pmaddwd mm0, mm0 ; square and accumulate | 393 pmaddwd mm0, mm0 ; square and accumulate |
394 paddd mm7, mm0 ; accumulate in mm7 | 394 paddd mm7, mm0 ; accumulate in mm7 |
395 | 395 |
(...skipping 446 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
842 UNSHADOW_ARGS | 842 UNSHADOW_ARGS |
843 pop rbp | 843 pop rbp |
844 ret | 844 ret |
845 | 845 |
846 | 846 |
847 SECTION_RODATA | 847 SECTION_RODATA |
848 ;short mmx_bi_rd[4] = { 64, 64, 64, 64}; | 848 ;short mmx_bi_rd[4] = { 64, 64, 64, 64}; |
849 align 16 | 849 align 16 |
850 mmx_bi_rd: | 850 mmx_bi_rd: |
851 times 4 dw 64 | 851 times 4 dw 64 |
OLD | NEW |