| OLD | NEW |
| 1 ;****************************************************************************** | 1 ;****************************************************************************** |
| 2 ;* VC1 deblocking optimizations | 2 ;* VC1 deblocking optimizations |
| 3 ;* Copyright (c) 2009 David Conrad | 3 ;* Copyright (c) 2009 David Conrad |
| 4 ;* | 4 ;* |
| 5 ;* This file is part of FFmpeg. | 5 ;* This file is part of FFmpeg. |
| 6 ;* | 6 ;* |
| 7 ;* FFmpeg is free software; you can redistribute it and/or | 7 ;* FFmpeg is free software; you can redistribute it and/or |
| 8 ;* modify it under the terms of the GNU Lesser General Public | 8 ;* modify it under the terms of the GNU Lesser General Public |
| 9 ;* License as published by the Free Software Foundation; either | 9 ;* License as published by the Free Software Foundation; either |
| 10 ;* version 2.1 of the License, or (at your option) any later version. | 10 ;* version 2.1 of the License, or (at your option) any later version. |
| (...skipping 18 matching lines...) Expand all Loading... |
| 29 | 29 |
| 30 ; dst_low, dst_high (src), zero | 30 ; dst_low, dst_high (src), zero |
| 31 ; zero-extends one vector from 8 to 16 bits | 31 ; zero-extends one vector from 8 to 16 bits |
| 32 %macro UNPACK_8TO16 4 | 32 %macro UNPACK_8TO16 4 |
| 33 mova m%2, m%3 | 33 mova m%2, m%3 |
| 34 punpckh%1 m%3, m%4 | 34 punpckh%1 m%3, m%4 |
| 35 punpckl%1 m%2, m%4 | 35 punpckl%1 m%2, m%4 |
| 36 %endmacro | 36 %endmacro |
| 37 | 37 |
| 38 %macro STORE_4_WORDS_MMX 6 | 38 %macro STORE_4_WORDS_MMX 6 |
| 39 movd %6, %5 | 39 movd %6d, %5 |
| 40 %if mmsize==16 | 40 %if mmsize==16 |
| 41 psrldq %5, 4 | 41 psrldq %5, 4 |
| 42 %else | 42 %else |
| 43 psrlq %5, 32 | 43 psrlq %5, 32 |
| 44 %endif | 44 %endif |
| 45 mov %1, %6w | 45 mov %1, %6w |
| 46 shr %6, 16 | 46 shr %6, 16 |
| 47 mov %2, %6w | 47 mov %2, %6w |
| 48 movd %6, %5 | 48 movd %6d, %5 |
| 49 mov %3, %6w | 49 mov %3, %6w |
| 50 shr %6, 16 | 50 shr %6, 16 |
| 51 mov %4, %6w | 51 mov %4, %6w |
| 52 %endmacro | 52 %endmacro |
| 53 | 53 |
| 54 %macro STORE_4_WORDS_SSE4 6 | 54 %macro STORE_4_WORDS_SSE4 6 |
| 55 pextrw %1, %5, %6+0 | 55 pextrw %1, %5, %6+0 |
| 56 pextrw %2, %5, %6+1 | 56 pextrw %2, %5, %6+1 |
| 57 pextrw %3, %5, %6+2 | 57 pextrw %3, %5, %6+2 |
| 58 pextrw %4, %5, %6+3 | 58 pextrw %4, %5, %6+3 |
| (...skipping 22 matching lines...) Expand all Loading... |
| 81 mova m6, m4 | 81 mova m6, m4 |
| 82 pminsw m3, m2 | 82 pminsw m3, m2 |
| 83 pcmpgtw m6, m3 ; if (a2 < a0 || a1 < a0) | 83 pcmpgtw m6, m3 ; if (a2 < a0 || a1 < a0) |
| 84 psubw m3, m4 | 84 psubw m3, m4 |
| 85 pmullw m3, [pw_5] ; 5*(a3 - a0) | 85 pmullw m3, [pw_5] ; 5*(a3 - a0) |
| 86 PABSW m2, m3 | 86 PABSW m2, m3 |
| 87 psraw m2, 3 ; abs(d/8) | 87 psraw m2, 3 ; abs(d/8) |
| 88 pxor m7, m3 ; d_sign ^= a0_sign | 88 pxor m7, m3 ; d_sign ^= a0_sign |
| 89 | 89 |
| 90 pxor m5, m5 | 90 pxor m5, m5 |
| 91 movd m3, r2 | 91 movd m3, r2d |
| 92 %if %1 > 4 | 92 %if %1 > 4 |
| 93 punpcklbw m3, m3 | 93 punpcklbw m3, m3 |
| 94 %endif | 94 %endif |
| 95 punpcklbw m3, m5 | 95 punpcklbw m3, m5 |
| 96 pcmpgtw m3, m4 ; if (a0 < pq) | 96 pcmpgtw m3, m4 ; if (a0 < pq) |
| 97 pand m6, m3 | 97 pand m6, m3 |
| 98 | 98 |
| 99 mova m3, m0 | 99 mova m3, m0 |
| 100 psubw m3, m1 | 100 psubw m3, m1 |
| 101 PABSW m4, m3 | 101 PABSW m4, m3 |
| (...skipping 219 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 321 cglobal vc1_h_loop_filter8_ssse3, 3,6,8 | 321 cglobal vc1_h_loop_filter8_ssse3, 3,6,8 |
| 322 START_H_FILTER 8 | 322 START_H_FILTER 8 |
| 323 VC1_H_LOOP_FILTER 8, r5 | 323 VC1_H_LOOP_FILTER 8, r5 |
| 324 RET | 324 RET |
| 325 | 325 |
| 326 ; void ff_vc1_h_loop_filter8_sse4(uint8_t *src, int stride, int pq) | 326 ; void ff_vc1_h_loop_filter8_sse4(uint8_t *src, int stride, int pq) |
| 327 cglobal vc1_h_loop_filter8_sse4, 3,5,8 | 327 cglobal vc1_h_loop_filter8_sse4, 3,5,8 |
| 328 START_H_FILTER 8 | 328 START_H_FILTER 8 |
| 329 VC1_H_LOOP_FILTER 8 | 329 VC1_H_LOOP_FILTER 8 |
| 330 RET | 330 RET |
| OLD | NEW |