OLD | NEW |
1 /* | 1 /* |
2 * MMX and SSE2 optimized snow DSP utils | 2 * MMX and SSE2 optimized snow DSP utils |
3 * Copyright (c) 2005-2006 Robert Edele <yartrebo@earthlink.net> | 3 * Copyright (c) 2005-2006 Robert Edele <yartrebo@earthlink.net> |
4 * | 4 * |
5 * This file is part of FFmpeg. | 5 * This file is part of FFmpeg. |
6 * | 6 * |
7 * FFmpeg is free software; you can redistribute it and/or | 7 * FFmpeg is free software; you can redistribute it and/or |
8 * modify it under the terms of the GNU Lesser General Public | 8 * modify it under the terms of the GNU Lesser General Public |
9 * License as published by the Free Software Foundation; either | 9 * License as published by the Free Software Foundation; either |
10 * version 2.1 of the License, or (at your option) any later version. | 10 * version 2.1 of the License, or (at your option) any later version. |
11 * | 11 * |
12 * FFmpeg is distributed in the hope that it will be useful, | 12 * FFmpeg is distributed in the hope that it will be useful, |
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of | 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 * Lesser General Public License for more details. | 15 * Lesser General Public License for more details. |
16 * | 16 * |
17 * You should have received a copy of the GNU Lesser General Public | 17 * You should have received a copy of the GNU Lesser General Public |
18 * License along with FFmpeg; if not, write to the Free Software | 18 * License along with FFmpeg; if not, write to the Free Software |
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
20 */ | 20 */ |
21 | 21 |
22 #include "libavutil/x86_cpu.h" | 22 #include "libavutil/x86_cpu.h" |
23 #include "libavcodec/avcodec.h" | 23 #include "libavcodec/avcodec.h" |
24 #include "libavcodec/snow.h" | 24 #include "libavcodec/snow.h" |
| 25 #include "dsputil_mmx.h" |
25 | 26 |
26 void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){ | 27 void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){ |
27 const int w2= (width+1)>>1; | 28 const int w2= (width+1)>>1; |
28 DECLARE_ALIGNED_16(IDWTELEM, temp)[width>>1]; | 29 DECLARE_ALIGNED(16, IDWTELEM, temp)[width>>1]; |
29 const int w_l= (width>>1); | 30 const int w_l= (width>>1); |
30 const int w_r= w2 - 1; | 31 const int w_r= w2 - 1; |
31 int i; | 32 int i; |
32 | 33 |
33 { // Lift 0 | 34 { // Lift 0 |
34 IDWTELEM * const ref = b + w2 - 1; | 35 IDWTELEM * const ref = b + w2 - 1; |
35 IDWTELEM b_0 = b[0]; //By allowing the first entry in b[0] to be calcula
ted twice | 36 IDWTELEM b_0 = b[0]; //By allowing the first entry in b[0] to be calcula
ted twice |
36 // (the first time erroneously), we allow the SSE2 code to run an extra
pass. | 37 // (the first time erroneously), we allow the SSE2 code to run an extra
pass. |
37 // The savings in code and time are well worth having to store this valu
e and | 38 // The savings in code and time are well worth having to store this valu
e and |
38 // calculate b[0] correctly afterwards. | 39 // calculate b[0] correctly afterwards. |
(...skipping 823 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
862 | 863 |
863 void ff_snow_inner_add_yblock_mmx(const uint8_t *obmc, const int obmc_stride, ui
nt8_t * * block, int b_w, int b_h, | 864 void ff_snow_inner_add_yblock_mmx(const uint8_t *obmc, const int obmc_stride, ui
nt8_t * * block, int b_w, int b_h, |
864 int src_x, int src_y, int src_stride, slice_buffer * s
b, int add, uint8_t * dst8){ | 865 int src_x, int src_y, int src_stride, slice_buffer * s
b, int add, uint8_t * dst8){ |
865 if (b_w == 16) | 866 if (b_w == 16) |
866 inner_add_yblock_bw_16_obmc_32_mmx(obmc, obmc_stride, block, b_w, b_h, s
rc_x,src_y, src_stride, sb, add, dst8); | 867 inner_add_yblock_bw_16_obmc_32_mmx(obmc, obmc_stride, block, b_w, b_h, s
rc_x,src_y, src_stride, sb, add, dst8); |
867 else if (b_w == 8 && obmc_stride == 16) | 868 else if (b_w == 8 && obmc_stride == 16) |
868 inner_add_yblock_bw_8_obmc_16_mmx(obmc, obmc_stride, block, b_w, b_h, sr
c_x,src_y, src_stride, sb, add, dst8); | 869 inner_add_yblock_bw_8_obmc_16_mmx(obmc, obmc_stride, block, b_w, b_h, sr
c_x,src_y, src_stride, sb, add, dst8); |
869 else | 870 else |
870 ff_snow_inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y
, src_stride, sb, add, dst8); | 871 ff_snow_inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y
, src_stride, sb, add, dst8); |
871 } | 872 } |
OLD | NEW |