| OLD | NEW |
| 1 /* | 1 /* |
| 2 * This is optimized for sh, which have post increment addressing (*p++). | 2 * This is optimized for sh, which have post increment addressing (*p++). |
| 3 * Some CPU may be index (p[n]) faster than post increment (*p++). | 3 * Some CPU may be index (p[n]) faster than post increment (*p++). |
| 4 * | 4 * |
| 5 * copyright (c) 2001-2003 BERO <bero@geocities.co.jp> | 5 * copyright (c) 2001-2003 BERO <bero@geocities.co.jp> |
| 6 * | 6 * |
| 7 * This file is part of FFmpeg. | 7 * This file is part of FFmpeg. |
| 8 * | 8 * |
| 9 * FFmpeg is free software; you can redistribute it and/or | 9 * FFmpeg is free software; you can redistribute it and/or |
| 10 * modify it under the terms of the GNU Lesser General Public | 10 * modify it under the terms of the GNU Lesser General Public |
| 11 * License as published by the Free Software Foundation; either | 11 * License as published by the Free Software Foundation; either |
| 12 * version 2.1 of the License, or (at your option) any later version. | 12 * version 2.1 of the License, or (at your option) any later version. |
| 13 * | 13 * |
| 14 * FFmpeg is distributed in the hope that it will be useful, | 14 * FFmpeg is distributed in the hope that it will be useful, |
| 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of | 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 17 * Lesser General Public License for more details. | 17 * Lesser General Public License for more details. |
| 18 * | 18 * |
| 19 * You should have received a copy of the GNU Lesser General Public | 19 * You should have received a copy of the GNU Lesser General Public |
| 20 * License along with FFmpeg; if not, write to the Free Software | 20 * License along with FFmpeg; if not, write to the Free Software |
| 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 22 */ | 22 */ |
| 23 | 23 |
| 24 #define PIXOP2(OPNAME, OP) \ | 24 #define PIXOP2(OPNAME, OP) \ |
| 25 \ | 25 \ |
| 26 static inline void OPNAME ## _pixels4_l2_aligned(uint8_t *dst, const uint8_t *sr
c1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h
) \ | 26 static inline void OPNAME ## _pixels4_l2_aligned(uint8_t *dst, const uint8_t *sr
c1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h
) \ |
| 27 {\ | 27 {\ |
| 28 do {\ | 28 do {\ |
| 29 OP(LP(dst ),rnd_avg32(LP(src1 ),LP(src2 )) ); \ | 29 OP(LP(dst ),rnd_avg32(LPC(src1 ),LPC(src2 )) ); \ |
| 30 src1+=src_stride1; \ | 30 src1+=src_stride1; \ |
| 31 src2+=src_stride2; \ | 31 src2+=src_stride2; \ |
| 32 dst+=dst_stride; \ | 32 dst+=dst_stride; \ |
| 33 } while(--h); \ | 33 } while(--h); \ |
| 34 }\ | 34 }\ |
| 35 \ | 35 \ |
| 36 static inline void OPNAME ## _pixels4_l2_aligned2(uint8_t *dst, const uint8_t *s
rc1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int
h) \ | 36 static inline void OPNAME ## _pixels4_l2_aligned2(uint8_t *dst, const uint8_t *s
rc1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int
h) \ |
| 37 {\ | 37 {\ |
| 38 do {\ | 38 do {\ |
| 39 OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),LP(src2 )) ); \ | 39 OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),LPC(src2 )) ); \ |
| 40 src1+=src_stride1; \ | 40 src1+=src_stride1; \ |
| 41 src2+=src_stride2; \ | 41 src2+=src_stride2; \ |
| 42 dst+=dst_stride; \ | 42 dst+=dst_stride; \ |
| 43 } while(--h); \ | 43 } while(--h); \ |
| 44 }\ | 44 }\ |
| 45 \ | 45 \ |
| 46 static inline void OPNAME ## _no_rnd_pixels16_l2_aligned2(uint8_t *dst, const ui
nt8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_strid
e2, int h) \ | 46 static inline void OPNAME ## _no_rnd_pixels16_l2_aligned2(uint8_t *dst, const ui
nt8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_strid
e2, int h) \ |
| 47 {\ | 47 {\ |
| 48 do {\ | 48 do {\ |
| 49 OP(LP(dst ),no_rnd_avg32(AV_RN32(src1 ),LP(src2 )) ); \ | 49 OP(LP(dst ),no_rnd_avg32(AV_RN32(src1 ),LPC(src2 )) ); \ |
| 50 OP(LP(dst+4),no_rnd_avg32(AV_RN32(src1+4),LP(src2+4)) ); \ | 50 OP(LP(dst+4),no_rnd_avg32(AV_RN32(src1+4),LPC(src2+4)) ); \ |
| 51 OP(LP(dst+8),no_rnd_avg32(AV_RN32(src1+8),LP(src2+8)) ); \ | 51 OP(LP(dst+8),no_rnd_avg32(AV_RN32(src1+8),LPC(src2+8)) ); \ |
| 52 OP(LP(dst+12),no_rnd_avg32(AV_RN32(src1+12),LP(src2+12)) ); \ | 52 OP(LP(dst+12),no_rnd_avg32(AV_RN32(src1+12),LPC(src2+12)) ); \ |
| 53 src1+=src_stride1; \ | 53 src1+=src_stride1; \ |
| 54 src2+=src_stride2; \ | 54 src2+=src_stride2; \ |
| 55 dst+=dst_stride; \ | 55 dst+=dst_stride; \ |
| 56 } while(--h); \ | 56 } while(--h); \ |
| 57 }\ | 57 }\ |
| 58 \ | 58 \ |
| 59 static inline void OPNAME ## _pixels16_l2_aligned2(uint8_t *dst, const uint8_t *
src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int
h) \ | 59 static inline void OPNAME ## _pixels16_l2_aligned2(uint8_t *dst, const uint8_t *
src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int
h) \ |
| 60 {\ | 60 {\ |
| 61 do {\ | 61 do {\ |
| 62 OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),LP(src2 )) ); \ | 62 OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),LPC(src2 )) ); \ |
| 63 OP(LP(dst+4),rnd_avg32(AV_RN32(src1+4),LP(src2+4)) ); \ | 63 OP(LP(dst+4),rnd_avg32(AV_RN32(src1+4),LPC(src2+4)) ); \ |
| 64 OP(LP(dst+8),rnd_avg32(AV_RN32(src1+8),LP(src2+8)) ); \ | 64 OP(LP(dst+8),rnd_avg32(AV_RN32(src1+8),LPC(src2+8)) ); \ |
| 65 OP(LP(dst+12),rnd_avg32(AV_RN32(src1+12),LP(src2+12)) ); \ | 65 OP(LP(dst+12),rnd_avg32(AV_RN32(src1+12),LPC(src2+12)) ); \ |
| 66 src1+=src_stride1; \ | 66 src1+=src_stride1; \ |
| 67 src2+=src_stride2; \ | 67 src2+=src_stride2; \ |
| 68 dst+=dst_stride; \ | 68 dst+=dst_stride; \ |
| 69 } while(--h); \ | 69 } while(--h); \ |
| 70 }\ | 70 }\ |
| 71 \ | 71 \ |
| 72 static inline void OPNAME ## _no_rnd_pixels8_l2_aligned2(uint8_t *dst, const uin
t8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride
2, int h) \ | 72 static inline void OPNAME ## _no_rnd_pixels8_l2_aligned2(uint8_t *dst, const uin
t8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride
2, int h) \ |
| 73 {\ | 73 {\ |
| 74 do { /* onlye src2 aligned */\ | 74 do { /* onlye src2 aligned */\ |
| 75 OP(LP(dst ),no_rnd_avg32(AV_RN32(src1 ),LP(src2 )) ); \ | 75 OP(LP(dst ),no_rnd_avg32(AV_RN32(src1 ),LPC(src2 )) ); \ |
| 76 OP(LP(dst+4),no_rnd_avg32(AV_RN32(src1+4),LP(src2+4)) ); \ | 76 OP(LP(dst+4),no_rnd_avg32(AV_RN32(src1+4),LPC(src2+4)) ); \ |
| 77 src1+=src_stride1; \ | 77 src1+=src_stride1; \ |
| 78 src2+=src_stride2; \ | 78 src2+=src_stride2; \ |
| 79 dst+=dst_stride; \ | 79 dst+=dst_stride; \ |
| 80 } while(--h); \ | 80 } while(--h); \ |
| 81 }\ | 81 }\ |
| 82 \ | 82 \ |
| 83 static inline void OPNAME ## _pixels8_l2_aligned2(uint8_t *dst, const uint8_t *s
rc1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int
h) \ | 83 static inline void OPNAME ## _pixels8_l2_aligned2(uint8_t *dst, const uint8_t *s
rc1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int
h) \ |
| 84 {\ | 84 {\ |
| 85 do {\ | 85 do {\ |
| 86 OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),LP(src2 )) ); \ | 86 OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),LPC(src2 )) ); \ |
| 87 OP(LP(dst+4),rnd_avg32(AV_RN32(src1+4),LP(src2+4)) ); \ | 87 OP(LP(dst+4),rnd_avg32(AV_RN32(src1+4),LPC(src2+4)) ); \ |
| 88 src1+=src_stride1; \ | 88 src1+=src_stride1; \ |
| 89 src2+=src_stride2; \ | 89 src2+=src_stride2; \ |
| 90 dst+=dst_stride; \ | 90 dst+=dst_stride; \ |
| 91 } while(--h); \ | 91 } while(--h); \ |
| 92 }\ | 92 }\ |
| 93 \ | 93 \ |
| 94 static inline void OPNAME ## _no_rnd_pixels8_l2_aligned(uint8_t *dst, const uint
8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2
, int h) \ | 94 static inline void OPNAME ## _no_rnd_pixels8_l2_aligned(uint8_t *dst, const uint
8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2
, int h) \ |
| 95 {\ | 95 {\ |
| 96 do {\ | 96 do {\ |
| 97 OP(LP(dst ),no_rnd_avg32(LP(src1 ),LP(src2 )) ); \ | 97 OP(LP(dst ),no_rnd_avg32(LPC(src1 ),LPC(src2 )) ); \ |
| 98 OP(LP(dst+4),no_rnd_avg32(LP(src1+4),LP(src2+4)) ); \ | 98 OP(LP(dst+4),no_rnd_avg32(LPC(src1+4),LPC(src2+4)) ); \ |
| 99 src1+=src_stride1; \ | 99 src1+=src_stride1; \ |
| 100 src2+=src_stride2; \ | 100 src2+=src_stride2; \ |
| 101 dst+=dst_stride; \ | 101 dst+=dst_stride; \ |
| 102 } while(--h); \ | 102 } while(--h); \ |
| 103 }\ | 103 }\ |
| 104 \ | 104 \ |
| 105 static inline void OPNAME ## _pixels8_l2_aligned(uint8_t *dst, const uint8_t *sr
c1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h
) \ | 105 static inline void OPNAME ## _pixels8_l2_aligned(uint8_t *dst, const uint8_t *sr
c1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h
) \ |
| 106 {\ | 106 {\ |
| 107 do {\ | 107 do {\ |
| 108 OP(LP(dst ),rnd_avg32(LP(src1 ),LP(src2 )) ); \ | 108 OP(LP(dst ),rnd_avg32(LPC(src1 ),LPC(src2 )) ); \ |
| 109 OP(LP(dst+4),rnd_avg32(LP(src1+4),LP(src2+4)) ); \ | 109 OP(LP(dst+4),rnd_avg32(LPC(src1+4),LPC(src2+4)) ); \ |
| 110 src1+=src_stride1; \ | 110 src1+=src_stride1; \ |
| 111 src2+=src_stride2; \ | 111 src2+=src_stride2; \ |
| 112 dst+=dst_stride; \ | 112 dst+=dst_stride; \ |
| 113 } while(--h); \ | 113 } while(--h); \ |
| 114 }\ | 114 }\ |
| 115 \ | 115 \ |
| 116 static inline void OPNAME ## _no_rnd_pixels16_l2_aligned(uint8_t *dst, const uin
t8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride
2, int h) \ | 116 static inline void OPNAME ## _no_rnd_pixels16_l2_aligned(uint8_t *dst, const uin
t8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride
2, int h) \ |
| 117 {\ | 117 {\ |
| 118 do {\ | 118 do {\ |
| 119 OP(LP(dst ),no_rnd_avg32(LP(src1 ),LP(src2 )) ); \ | 119 OP(LP(dst ),no_rnd_avg32(LPC(src1 ),LPC(src2 )) ); \ |
| 120 OP(LP(dst+4),no_rnd_avg32(LP(src1+4),LP(src2+4)) ); \ | 120 OP(LP(dst+4),no_rnd_avg32(LPC(src1+4),LPC(src2+4)) ); \ |
| 121 OP(LP(dst+8),no_rnd_avg32(LP(src1+8),LP(src2+8)) ); \ | 121 OP(LP(dst+8),no_rnd_avg32(LPC(src1+8),LPC(src2+8)) ); \ |
| 122 OP(LP(dst+12),no_rnd_avg32(LP(src1+12),LP(src2+12)) ); \ | 122 OP(LP(dst+12),no_rnd_avg32(LPC(src1+12),LPC(src2+12)) ); \ |
| 123 src1+=src_stride1; \ | 123 src1+=src_stride1; \ |
| 124 src2+=src_stride2; \ | 124 src2+=src_stride2; \ |
| 125 dst+=dst_stride; \ | 125 dst+=dst_stride; \ |
| 126 } while(--h); \ | 126 } while(--h); \ |
| 127 }\ | 127 }\ |
| 128 \ | 128 \ |
| 129 static inline void OPNAME ## _pixels16_l2_aligned(uint8_t *dst, const uint8_t *s
rc1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int
h) \ | 129 static inline void OPNAME ## _pixels16_l2_aligned(uint8_t *dst, const uint8_t *s
rc1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int
h) \ |
| 130 {\ | 130 {\ |
| 131 do {\ | 131 do {\ |
| 132 OP(LP(dst ),rnd_avg32(LP(src1 ),LP(src2 )) ); \ | 132 OP(LP(dst ),rnd_avg32(LPC(src1 ),LPC(src2 )) ); \ |
| 133 OP(LP(dst+4),rnd_avg32(LP(src1+4),LP(src2+4)) ); \ | 133 OP(LP(dst+4),rnd_avg32(LPC(src1+4),LPC(src2+4)) ); \ |
| 134 OP(LP(dst+8),rnd_avg32(LP(src1+8),LP(src2+8)) ); \ | 134 OP(LP(dst+8),rnd_avg32(LPC(src1+8),LPC(src2+8)) ); \ |
| 135 OP(LP(dst+12),rnd_avg32(LP(src1+12),LP(src2+12)) ); \ | 135 OP(LP(dst+12),rnd_avg32(LPC(src1+12),LPC(src2+12)) ); \ |
| 136 src1+=src_stride1; \ | 136 src1+=src_stride1; \ |
| 137 src2+=src_stride2; \ | 137 src2+=src_stride2; \ |
| 138 dst+=dst_stride; \ | 138 dst+=dst_stride; \ |
| 139 } while(--h); \ | 139 } while(--h); \ |
| 140 }\ | 140 }\ |
| 141 \ | 141 \ |
| 142 static inline void OPNAME ## _no_rnd_pixels16_l2_aligned1(uint8_t *dst, const ui
nt8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_strid
e2, int h) \ | 142 static inline void OPNAME ## _no_rnd_pixels16_l2_aligned1(uint8_t *dst, const ui
nt8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_strid
e2, int h) \ |
| 143 { OPNAME ## _no_rnd_pixels16_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,sr
c_stride1,h); } \ | 143 { OPNAME ## _no_rnd_pixels16_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,sr
c_stride1,h); } \ |
| 144 \ | 144 \ |
| 145 static inline void OPNAME ## _pixels16_l2_aligned1(uint8_t *dst, const uint8_t *
src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int
h) \ | 145 static inline void OPNAME ## _pixels16_l2_aligned1(uint8_t *dst, const uint8_t *
src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int
h) \ |
| 146 { OPNAME ## _pixels16_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src_strid
e1,h); } \ | 146 { OPNAME ## _pixels16_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src_strid
e1,h); } \ |
| 147 \ | 147 \ |
| 148 static inline void OPNAME ## _no_rnd_pixels8_l2_aligned1(uint8_t *dst, const uin
t8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride
2, int h) \ | 148 static inline void OPNAME ## _no_rnd_pixels8_l2_aligned1(uint8_t *dst, const uin
t8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride
2, int h) \ |
| 149 { OPNAME ## _no_rnd_pixels8_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src
_stride1,h); } \ | 149 { OPNAME ## _no_rnd_pixels8_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src
_stride1,h); } \ |
| 150 \ | 150 \ |
| 151 static inline void OPNAME ## _pixels8_l2_aligned1(uint8_t *dst, const uint8_t *s
rc1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int
h) \ | 151 static inline void OPNAME ## _pixels8_l2_aligned1(uint8_t *dst, const uint8_t *s
rc1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int
h) \ |
| 152 { OPNAME ## _pixels8_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src_stride
1,h); } \ | 152 { OPNAME ## _pixels8_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src_stride
1,h); } \ |
| 153 \ | 153 \ |
| 154 static inline void OPNAME ## _pixels8_l4_aligned(uint8_t *dst, const uint8_t *sr
c1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1,
int src_stride2,int src_stride3,int src_stride4, int h){\ | 154 static inline void OPNAME ## _pixels8_l4_aligned(uint8_t *dst, const uint8_t *sr
c1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1,
int src_stride2,int src_stride3,int src_stride4, int h){\ |
| 155 do { \ | 155 do { \ |
| 156 uint32_t a0,a1,a2,a3; \ | 156 uint32_t a0,a1,a2,a3; \ |
| 157 UNPACK(a0,a1,LP(src1),LP(src2)); \ | 157 UNPACK(a0,a1,LPC(src1),LPC(src2)); \ |
| 158 UNPACK(a2,a3,LP(src3),LP(src4)); \ | 158 UNPACK(a2,a3,LPC(src3),LPC(src4)); \ |
| 159 OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \ | 159 OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \ |
| 160 UNPACK(a0,a1,LP(src1+4),LP(src2+4)); \ | 160 UNPACK(a0,a1,LPC(src1+4),LPC(src2+4)); \ |
| 161 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ | 161 UNPACK(a2,a3,LPC(src3+4),LPC(src4+4)); \ |
| 162 OP(LP(dst+4),rnd_PACK(a0,a1,a2,a3)); \ | 162 OP(LP(dst+4),rnd_PACK(a0,a1,a2,a3)); \ |
| 163 src1+=src_stride1;\ | 163 src1+=src_stride1;\ |
| 164 src2+=src_stride2;\ | 164 src2+=src_stride2;\ |
| 165 src3+=src_stride3;\ | 165 src3+=src_stride3;\ |
| 166 src4+=src_stride4;\ | 166 src4+=src_stride4;\ |
| 167 dst+=dst_stride;\ | 167 dst+=dst_stride;\ |
| 168 } while(--h); \ | 168 } while(--h); \ |
| 169 } \ | 169 } \ |
| 170 \ | 170 \ |
| 171 static inline void OPNAME ## _no_rnd_pixels8_l4_aligned(uint8_t *dst, const uint
8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_s
tride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | 171 static inline void OPNAME ## _no_rnd_pixels8_l4_aligned(uint8_t *dst, const uint
8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_s
tride1, int src_stride2,int src_stride3,int src_stride4, int h){\ |
| 172 do { \ | 172 do { \ |
| 173 uint32_t a0,a1,a2,a3; \ | 173 uint32_t a0,a1,a2,a3; \ |
| 174 UNPACK(a0,a1,LP(src1),LP(src2)); \ | 174 UNPACK(a0,a1,LPC(src1),LPC(src2)); \ |
| 175 UNPACK(a2,a3,LP(src3),LP(src4)); \ | 175 UNPACK(a2,a3,LPC(src3),LPC(src4)); \ |
| 176 OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \ | 176 OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \ |
| 177 UNPACK(a0,a1,LP(src1+4),LP(src2+4)); \ | 177 UNPACK(a0,a1,LPC(src1+4),LPC(src2+4)); \ |
| 178 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ | 178 UNPACK(a2,a3,LPC(src3+4),LPC(src4+4)); \ |
| 179 OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \ | 179 OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \ |
| 180 src1+=src_stride1;\ | 180 src1+=src_stride1;\ |
| 181 src2+=src_stride2;\ | 181 src2+=src_stride2;\ |
| 182 src3+=src_stride3;\ | 182 src3+=src_stride3;\ |
| 183 src4+=src_stride4;\ | 183 src4+=src_stride4;\ |
| 184 dst+=dst_stride;\ | 184 dst+=dst_stride;\ |
| 185 } while(--h); \ | 185 } while(--h); \ |
| 186 } \ | 186 } \ |
| 187 \ | 187 \ |
| 188 static inline void OPNAME ## _pixels8_l4_aligned0(uint8_t *dst, const uint8_t *s
rc1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1
, int src_stride2,int src_stride3,int src_stride4, int h){\ | 188 static inline void OPNAME ## _pixels8_l4_aligned0(uint8_t *dst, const uint8_t *s
rc1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1
, int src_stride2,int src_stride3,int src_stride4, int h){\ |
| 189 do { \ | 189 do { \ |
| 190 uint32_t a0,a1,a2,a3; /* src1 only not aligned */\ | 190 uint32_t a0,a1,a2,a3; /* src1 only not aligned */\ |
| 191 UNPACK(a0,a1,AV_RN32(src1),LP(src2)); \ | 191 UNPACK(a0,a1,AV_RN32(src1),LPC(src2)); \ |
| 192 UNPACK(a2,a3,LP(src3),LP(src4)); \ | 192 UNPACK(a2,a3,LPC(src3),LPC(src4)); \ |
| 193 OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \ | 193 OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \ |
| 194 UNPACK(a0,a1,AV_RN32(src1+4),LP(src2+4)); \ | 194 UNPACK(a0,a1,AV_RN32(src1+4),LPC(src2+4)); \ |
| 195 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ | 195 UNPACK(a2,a3,LPC(src3+4),LPC(src4+4)); \ |
| 196 OP(LP(dst+4),rnd_PACK(a0,a1,a2,a3)); \ | 196 OP(LP(dst+4),rnd_PACK(a0,a1,a2,a3)); \ |
| 197 src1+=src_stride1;\ | 197 src1+=src_stride1;\ |
| 198 src2+=src_stride2;\ | 198 src2+=src_stride2;\ |
| 199 src3+=src_stride3;\ | 199 src3+=src_stride3;\ |
| 200 src4+=src_stride4;\ | 200 src4+=src_stride4;\ |
| 201 dst+=dst_stride;\ | 201 dst+=dst_stride;\ |
| 202 } while(--h); \ | 202 } while(--h); \ |
| 203 } \ | 203 } \ |
| 204 \ | 204 \ |
| 205 static inline void OPNAME ## _no_rnd_pixels8_l4_aligned0(uint8_t *dst, const uin
t8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_
stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | 205 static inline void OPNAME ## _no_rnd_pixels8_l4_aligned0(uint8_t *dst, const uin
t8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_
stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ |
| 206 do { \ | 206 do { \ |
| 207 uint32_t a0,a1,a2,a3; \ | 207 uint32_t a0,a1,a2,a3; \ |
| 208 UNPACK(a0,a1,AV_RN32(src1),LP(src2)); \ | 208 UNPACK(a0,a1,AV_RN32(src1),LPC(src2)); \ |
| 209 UNPACK(a2,a3,LP(src3),LP(src4)); \ | 209 UNPACK(a2,a3,LPC(src3),LPC(src4)); \ |
| 210 OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \ | 210 OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \ |
| 211 UNPACK(a0,a1,AV_RN32(src1+4),LP(src2+4)); \ | 211 UNPACK(a0,a1,AV_RN32(src1+4),LPC(src2+4)); \ |
| 212 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ | 212 UNPACK(a2,a3,LPC(src3+4),LPC(src4+4)); \ |
| 213 OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \ | 213 OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \ |
| 214 src1+=src_stride1;\ | 214 src1+=src_stride1;\ |
| 215 src2+=src_stride2;\ | 215 src2+=src_stride2;\ |
| 216 src3+=src_stride3;\ | 216 src3+=src_stride3;\ |
| 217 src4+=src_stride4;\ | 217 src4+=src_stride4;\ |
| 218 dst+=dst_stride;\ | 218 dst+=dst_stride;\ |
| 219 } while(--h); \ | 219 } while(--h); \ |
| 220 } \ | 220 } \ |
| 221 \ | 221 \ |
| 222 static inline void OPNAME ## _pixels16_l4_aligned(uint8_t *dst, const uint8_t *s
rc1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1
, int src_stride2,int src_stride3,int src_stride4, int h){\ | 222 static inline void OPNAME ## _pixels16_l4_aligned(uint8_t *dst, const uint8_t *s
rc1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1
, int src_stride2,int src_stride3,int src_stride4, int h){\ |
| 223 do { \ | 223 do { \ |
| 224 uint32_t a0,a1,a2,a3; \ | 224 uint32_t a0,a1,a2,a3; \ |
| 225 UNPACK(a0,a1,LP(src1),LP(src2)); \ | 225 UNPACK(a0,a1,LPC(src1),LPC(src2)); \ |
| 226 UNPACK(a2,a3,LP(src3),LP(src4)); \ | 226 UNPACK(a2,a3,LPC(src3),LPC(src4)); \ |
| 227 OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \ | 227 OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \ |
| 228 UNPACK(a0,a1,LP(src1+4),LP(src2+4)); \ | 228 UNPACK(a0,a1,LPC(src1+4),LPC(src2+4)); \ |
| 229 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ | 229 UNPACK(a2,a3,LPC(src3+4),LPC(src4+4)); \ |
| 230 OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \ | 230 OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \ |
| 231 UNPACK(a0,a1,LP(src1+8),LP(src2+8)); \ | 231 UNPACK(a0,a1,LPC(src1+8),LPC(src2+8)); \ |
| 232 UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \ | 232 UNPACK(a2,a3,LPC(src3+8),LPC(src4+8)); \ |
| 233 OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \ | 233 OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \ |
| 234 UNPACK(a0,a1,LP(src1+12),LP(src2+12)); \ | 234 UNPACK(a0,a1,LPC(src1+12),LPC(src2+12)); \ |
| 235 UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \ | 235 UNPACK(a2,a3,LPC(src3+12),LPC(src4+12)); \ |
| 236 OP(LP(dst+12),rnd_PACK(a0,a1,a2,a3)); \ | 236 OP(LP(dst+12),rnd_PACK(a0,a1,a2,a3)); \ |
| 237 src1+=src_stride1;\ | 237 src1+=src_stride1;\ |
| 238 src2+=src_stride2;\ | 238 src2+=src_stride2;\ |
| 239 src3+=src_stride3;\ | 239 src3+=src_stride3;\ |
| 240 src4+=src_stride4;\ | 240 src4+=src_stride4;\ |
| 241 dst+=dst_stride;\ | 241 dst+=dst_stride;\ |
| 242 } while(--h); \ | 242 } while(--h); \ |
| 243 } \ | 243 } \ |
| 244 \ | 244 \ |
| 245 static inline void OPNAME ## _no_rnd_pixels16_l4_aligned(uint8_t *dst, const uin
t8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_
stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | 245 static inline void OPNAME ## _no_rnd_pixels16_l4_aligned(uint8_t *dst, const uin
t8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_
stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ |
| 246 do { \ | 246 do { \ |
| 247 uint32_t a0,a1,a2,a3; \ | 247 uint32_t a0,a1,a2,a3; \ |
| 248 UNPACK(a0,a1,LP(src1),LP(src2)); \ | 248 UNPACK(a0,a1,LPC(src1),LPC(src2)); \ |
| 249 UNPACK(a2,a3,LP(src3),LP(src4)); \ | 249 UNPACK(a2,a3,LPC(src3),LPC(src4)); \ |
| 250 OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \ | 250 OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \ |
| 251 UNPACK(a0,a1,LP(src1+4),LP(src2+4)); \ | 251 UNPACK(a0,a1,LPC(src1+4),LPC(src2+4)); \ |
| 252 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ | 252 UNPACK(a2,a3,LPC(src3+4),LPC(src4+4)); \ |
| 253 OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \ | 253 OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \ |
| 254 UNPACK(a0,a1,LP(src1+8),LP(src2+8)); \ | 254 UNPACK(a0,a1,LPC(src1+8),LPC(src2+8)); \ |
| 255 UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \ | 255 UNPACK(a2,a3,LPC(src3+8),LPC(src4+8)); \ |
| 256 OP(LP(dst+8),no_rnd_PACK(a0,a1,a2,a3)); \ | 256 OP(LP(dst+8),no_rnd_PACK(a0,a1,a2,a3)); \ |
| 257 UNPACK(a0,a1,LP(src1+12),LP(src2+12)); \ | 257 UNPACK(a0,a1,LPC(src1+12),LPC(src2+12)); \ |
| 258 UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \ | 258 UNPACK(a2,a3,LPC(src3+12),LPC(src4+12)); \ |
| 259 OP(LP(dst+12),no_rnd_PACK(a0,a1,a2,a3)); \ | 259 OP(LP(dst+12),no_rnd_PACK(a0,a1,a2,a3)); \ |
| 260 src1+=src_stride1;\ | 260 src1+=src_stride1;\ |
| 261 src2+=src_stride2;\ | 261 src2+=src_stride2;\ |
| 262 src3+=src_stride3;\ | 262 src3+=src_stride3;\ |
| 263 src4+=src_stride4;\ | 263 src4+=src_stride4;\ |
| 264 dst+=dst_stride;\ | 264 dst+=dst_stride;\ |
| 265 } while(--h); \ | 265 } while(--h); \ |
| 266 } \ | 266 } \ |
| 267 \ | 267 \ |
| 268 static inline void OPNAME ## _pixels16_l4_aligned0(uint8_t *dst, const uint8_t *
src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride
1, int src_stride2,int src_stride3,int src_stride4, int h){\ | 268 static inline void OPNAME ## _pixels16_l4_aligned0(uint8_t *dst, const uint8_t *
src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride
1, int src_stride2,int src_stride3,int src_stride4, int h){\ |
| 269 do { /* src1 is unaligned */\ | 269 do { /* src1 is unaligned */\ |
| 270 uint32_t a0,a1,a2,a3; \ | 270 uint32_t a0,a1,a2,a3; \ |
| 271 UNPACK(a0,a1,AV_RN32(src1),LP(src2)); \ | 271 UNPACK(a0,a1,AV_RN32(src1),LPC(src2)); \ |
| 272 UNPACK(a2,a3,LP(src3),LP(src4)); \ | 272 UNPACK(a2,a3,LPC(src3),LPC(src4)); \ |
| 273 OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \ | 273 OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \ |
| 274 UNPACK(a0,a1,AV_RN32(src1+4),LP(src2+4)); \ | 274 UNPACK(a0,a1,AV_RN32(src1+4),LPC(src2+4)); \ |
| 275 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ | 275 UNPACK(a2,a3,LPC(src3+4),LPC(src4+4)); \ |
| 276 OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \ | 276 OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \ |
| 277 UNPACK(a0,a1,AV_RN32(src1+8),LP(src2+8)); \ | 277 UNPACK(a0,a1,AV_RN32(src1+8),LPC(src2+8)); \ |
| 278 UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \ | 278 UNPACK(a2,a3,LPC(src3+8),LPC(src4+8)); \ |
| 279 OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \ | 279 OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \ |
| 280 UNPACK(a0,a1,AV_RN32(src1+12),LP(src2+12)); \ | 280 UNPACK(a0,a1,AV_RN32(src1+12),LPC(src2+12)); \ |
| 281 UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \ | 281 UNPACK(a2,a3,LPC(src3+12),LPC(src4+12)); \ |
| 282 OP(LP(dst+12),rnd_PACK(a0,a1,a2,a3)); \ | 282 OP(LP(dst+12),rnd_PACK(a0,a1,a2,a3)); \ |
| 283 src1+=src_stride1;\ | 283 src1+=src_stride1;\ |
| 284 src2+=src_stride2;\ | 284 src2+=src_stride2;\ |
| 285 src3+=src_stride3;\ | 285 src3+=src_stride3;\ |
| 286 src4+=src_stride4;\ | 286 src4+=src_stride4;\ |
| 287 dst+=dst_stride;\ | 287 dst+=dst_stride;\ |
| 288 } while(--h); \ | 288 } while(--h); \ |
| 289 } \ | 289 } \ |
| 290 \ | 290 \ |
| 291 static inline void OPNAME ## _no_rnd_pixels16_l4_aligned0(uint8_t *dst, const ui
nt8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src
_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | 291 static inline void OPNAME ## _no_rnd_pixels16_l4_aligned0(uint8_t *dst, const ui
nt8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src
_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ |
| 292 do { \ | 292 do { \ |
| 293 uint32_t a0,a1,a2,a3; \ | 293 uint32_t a0,a1,a2,a3; \ |
| 294 UNPACK(a0,a1,AV_RN32(src1),LP(src2)); \ | 294 UNPACK(a0,a1,AV_RN32(src1),LPC(src2)); \ |
| 295 UNPACK(a2,a3,LP(src3),LP(src4)); \ | 295 UNPACK(a2,a3,LPC(src3),LPC(src4)); \ |
| 296 OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \ | 296 OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \ |
| 297 UNPACK(a0,a1,AV_RN32(src1+4),LP(src2+4)); \ | 297 UNPACK(a0,a1,AV_RN32(src1+4),LPC(src2+4)); \ |
| 298 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ | 298 UNPACK(a2,a3,LPC(src3+4),LPC(src4+4)); \ |
| 299 OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \ | 299 OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \ |
| 300 UNPACK(a0,a1,AV_RN32(src1+8),LP(src2+8)); \ | 300 UNPACK(a0,a1,AV_RN32(src1+8),LPC(src2+8)); \ |
| 301 UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \ | 301 UNPACK(a2,a3,LPC(src3+8),LPC(src4+8)); \ |
| 302 OP(LP(dst+8),no_rnd_PACK(a0,a1,a2,a3)); \ | 302 OP(LP(dst+8),no_rnd_PACK(a0,a1,a2,a3)); \ |
| 303 UNPACK(a0,a1,AV_RN32(src1+12),LP(src2+12)); \ | 303 UNPACK(a0,a1,AV_RN32(src1+12),LPC(src2+12)); \ |
| 304 UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \ | 304 UNPACK(a2,a3,LPC(src3+12),LPC(src4+12)); \ |
| 305 OP(LP(dst+12),no_rnd_PACK(a0,a1,a2,a3)); \ | 305 OP(LP(dst+12),no_rnd_PACK(a0,a1,a2,a3)); \ |
| 306 src1+=src_stride1;\ | 306 src1+=src_stride1;\ |
| 307 src2+=src_stride2;\ | 307 src2+=src_stride2;\ |
| 308 src3+=src_stride3;\ | 308 src3+=src_stride3;\ |
| 309 src4+=src_stride4;\ | 309 src4+=src_stride4;\ |
| 310 dst+=dst_stride;\ | 310 dst+=dst_stride;\ |
| 311 } while(--h); \ | 311 } while(--h); \ |
| 312 } \ | 312 } \ |
| 313 \ | 313 \ |
| 314 | 314 |
| (...skipping 1085 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1400 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); | 1400 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); |
| 1401 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8); | 1401 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8); |
| 1402 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8); | 1402 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8); |
| 1403 put_pixels8_l2_aligned(dst, halfV, halfHV, stride, 8, 8, 8); | 1403 put_pixels8_l2_aligned(dst, halfV, halfHV, stride, 8, 8, 8); |
| 1404 } | 1404 } |
| 1405 static void put_mspel8_mc22_sh4(uint8_t *dst, uint8_t *src, int stride){ | 1405 static void put_mspel8_mc22_sh4(uint8_t *dst, uint8_t *src, int stride){ |
| 1406 uint8_t halfH[88]; | 1406 uint8_t halfH[88]; |
| 1407 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); | 1407 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); |
| 1408 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8); | 1408 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8); |
| 1409 } | 1409 } |
| OLD | NEW |