OLD | NEW |
1 ; | 1 ; |
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
3 ; | 3 ; |
4 ; Use of this source code is governed by a BSD-style license | 4 ; Use of this source code is governed by a BSD-style license |
5 ; that can be found in the LICENSE file in the root of the source | 5 ; that can be found in the LICENSE file in the root of the source |
6 ; tree. An additional intellectual property rights grant can be found | 6 ; tree. An additional intellectual property rights grant can be found |
7 ; in the file PATENTS. All contributing project authors may | 7 ; in the file PATENTS. All contributing project authors may |
8 ; be found in the AUTHORS file in the root of the source tree. | 8 ; be found in the AUTHORS file in the root of the source tree. |
9 ; | 9 ; |
10 | 10 |
(...skipping 324 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
335 pxor mm5, mm5 ; Blank mmx6 | 335 pxor mm5, mm5 ; Blank mmx6 |
336 pxor mm6, mm6 ; Blank mmx7 | 336 pxor mm6, mm6 ; Blank mmx7 |
337 pxor mm7, mm7 ; Blank mmx7 | 337 pxor mm7, mm7 ; Blank mmx7 |
338 | 338 |
339 mov rax, arg(0) ;[src_ptr] ; Load base addresses | 339 mov rax, arg(0) ;[src_ptr] ; Load base addresses |
340 mov rbx, arg(2) ;[ref_ptr] | 340 mov rbx, arg(2) ;[ref_ptr] |
341 movsxd rcx, dword ptr arg(1) ;[source_stride] | 341 movsxd rcx, dword ptr arg(1) ;[source_stride] |
342 movsxd rdx, dword ptr arg(3) ;[recon_stride] | 342 movsxd rdx, dword ptr arg(3) ;[recon_stride] |
343 | 343 |
344 ; Row 1 | 344 ; Row 1 |
345 movq mm0, [rax] ; Copy eight bytes to mm0 | 345 movd mm0, [rax] ; Copy 4 bytes to mm0 |
346 movq mm1, [rbx] ; Copy eight bytes to mm1 | 346 movd mm1, [rbx] ; Copy 4 bytes to mm1 |
347 punpcklbw mm0, mm6 ; unpack to higher prrcision | 347 punpcklbw mm0, mm6 ; unpack to higher prrcision |
348 punpcklbw mm1, mm6 | 348 punpcklbw mm1, mm6 |
349 psubsw mm0, mm1 ; A-B (low order) to MM0 | 349 psubsw mm0, mm1 ; A-B (low order) to MM0 |
350 paddw mm5, mm0 ; accumulate differences in mm5 | 350 paddw mm5, mm0 ; accumulate differences in mm5 |
351 pmaddwd mm0, mm0 ; square and accumulate | 351 pmaddwd mm0, mm0 ; square and accumulate |
352 add rbx,rdx ; Inc pointer into ref data | 352 add rbx,rdx ; Inc pointer into ref data |
353 add rax,rcx ; Inc pointer into the new data | 353 add rax,rcx ; Inc pointer into the new data |
354 movq mm1, [rbx] ; Copy eight bytes to mm1 | 354 movd mm1, [rbx] ; Copy 4 bytes to mm1 |
355 paddd mm7, mm0 ; accumulate in mm7 | 355 paddd mm7, mm0 ; accumulate in mm7 |
356 | 356 |
357 | 357 |
358 ; Row 2 | 358 ; Row 2 |
359 movq mm0, [rax] ; Copy eight bytes to mm0 | 359 movd mm0, [rax] ; Copy 4 bytes to mm0 |
360 punpcklbw mm0, mm6 ; unpack to higher prrcision | 360 punpcklbw mm0, mm6 ; unpack to higher prrcision |
361 punpcklbw mm1, mm6 | 361 punpcklbw mm1, mm6 |
362 psubsw mm0, mm1 ; A-B (low order) to MM0 | 362 psubsw mm0, mm1 ; A-B (low order) to MM0 |
363 paddw mm5, mm0 ; accumulate differences in mm5 | 363 paddw mm5, mm0 ; accumulate differences in mm5 |
364 | 364 |
365 pmaddwd mm0, mm0 ; square and accumulate | 365 pmaddwd mm0, mm0 ; square and accumulate |
366 add rbx,rdx ; Inc pointer into ref data | 366 add rbx,rdx ; Inc pointer into ref data |
367 add rax,rcx ; Inc pointer into the new data | 367 add rax,rcx ; Inc pointer into the new data |
368 movq mm1, [rbx] ; Copy eight bytes to mm1 | 368 movd mm1, [rbx] ; Copy 4 bytes to mm1 |
369 paddd mm7, mm0 ; accumulate in mm7 | 369 paddd mm7, mm0 ; accumulate in mm7 |
370 | 370 |
371 ; Row 3 | 371 ; Row 3 |
372 movq mm0, [rax] ; Copy eight bytes to mm0 | 372 movd mm0, [rax] ; Copy 4 bytes to mm0 |
373 punpcklbw mm0, mm6 ; unpack to higher prrcision | 373 punpcklbw mm0, mm6 ; unpack to higher prrcision |
374 punpcklbw mm1, mm6 | 374 punpcklbw mm1, mm6 |
375 psubsw mm0, mm1 ; A-B (low order) to MM0 | 375 psubsw mm0, mm1 ; A-B (low order) to MM0 |
376 paddw mm5, mm0 ; accumulate differences in mm5 | 376 paddw mm5, mm0 ; accumulate differences in mm5 |
377 | 377 |
378 pmaddwd mm0, mm0 ; square and accumulate | 378 pmaddwd mm0, mm0 ; square and accumulate |
379 add rbx,rdx ; Inc pointer into ref data | 379 add rbx,rdx ; Inc pointer into ref data |
380 add rax,rcx ; Inc pointer into the new data | 380 add rax,rcx ; Inc pointer into the new data |
381 movq mm1, [rbx] ; Copy eight bytes to mm1 | 381 movd mm1, [rbx] ; Copy 4 bytes to mm1 |
382 paddd mm7, mm0 ; accumulate in mm7 | 382 paddd mm7, mm0 ; accumulate in mm7 |
383 | 383 |
384 ; Row 4 | 384 ; Row 4 |
385 movq mm0, [rax] ; Copy eight bytes to mm0 | 385 movd mm0, [rax] ; Copy 4 bytes to mm0 |
386 | 386 |
387 punpcklbw mm0, mm6 ; unpack to higher prrcision | 387 punpcklbw mm0, mm6 ; unpack to higher prrcision |
388 punpcklbw mm1, mm6 | 388 punpcklbw mm1, mm6 |
389 psubsw mm0, mm1 ; A-B (low order) to MM0 | 389 psubsw mm0, mm1 ; A-B (low order) to MM0 |
390 | 390 |
391 paddw mm5, mm0 ; accumulate differences in mm5 | 391 paddw mm5, mm0 ; accumulate differences in mm5 |
392 | 392 |
393 pmaddwd mm0, mm0 ; square and accumulate | 393 pmaddwd mm0, mm0 ; square and accumulate |
394 paddd mm7, mm0 ; accumulate in mm7 | 394 paddd mm7, mm0 ; accumulate in mm7 |
395 | 395 |
(...skipping 105 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
501 movq rax, mm0 | 501 movq rax, mm0 |
502 | 502 |
503 | 503 |
504 ; begin epilog | 504 ; begin epilog |
505 pop rbx | 505 pop rbx |
506 pop rdi | 506 pop rdi |
507 pop rsi | 507 pop rsi |
508 UNSHADOW_ARGS | 508 UNSHADOW_ARGS |
509 pop rbp | 509 pop rbp |
510 ret | 510 ret |
OLD | NEW |