Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(149)

Side by Side Diff: patched-ffmpeg-mt/libavcodec/sh4/qpel.c

Issue 789004: ffmpeg roll of source to mar 9 version... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/ffmpeg/
Patch Set: '' Created 10 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 /* 1 /*
2 * This is optimized for sh, which have post increment addressing (*p++). 2 * This is optimized for sh, which have post increment addressing (*p++).
3 * Some CPU may be index (p[n]) faster than post increment (*p++). 3 * Some CPU may be index (p[n]) faster than post increment (*p++).
4 * 4 *
5 * copyright (c) 2001-2003 BERO <bero@geocities.co.jp> 5 * copyright (c) 2001-2003 BERO <bero@geocities.co.jp>
6 * 6 *
7 * This file is part of FFmpeg. 7 * This file is part of FFmpeg.
8 * 8 *
9 * FFmpeg is free software; you can redistribute it and/or 9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public 10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either 11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version. 12 * version 2.1 of the License, or (at your option) any later version.
13 * 13 *
14 * FFmpeg is distributed in the hope that it will be useful, 14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details. 17 * Lesser General Public License for more details.
18 * 18 *
19 * You should have received a copy of the GNU Lesser General Public 19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software 20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */ 22 */
23 23
24 #define PIXOP2(OPNAME, OP) \ 24 #define PIXOP2(OPNAME, OP) \
25 \ 25 \
26 static inline void OPNAME ## _pixels4_l2_aligned(uint8_t *dst, const uint8_t *sr c1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h ) \ 26 static inline void OPNAME ## _pixels4_l2_aligned(uint8_t *dst, const uint8_t *sr c1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h ) \
27 {\ 27 {\
28 do {\ 28 do {\
29 OP(LP(dst ),rnd_avg32(LP(src1 ),LP(src2 )) ); \ 29 OP(LP(dst ),rnd_avg32(LPC(src1 ),LPC(src2 )) ); \
30 src1+=src_stride1; \ 30 src1+=src_stride1; \
31 src2+=src_stride2; \ 31 src2+=src_stride2; \
32 dst+=dst_stride; \ 32 dst+=dst_stride; \
33 } while(--h); \ 33 } while(--h); \
34 }\ 34 }\
35 \ 35 \
36 static inline void OPNAME ## _pixels4_l2_aligned2(uint8_t *dst, const uint8_t *s rc1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ 36 static inline void OPNAME ## _pixels4_l2_aligned2(uint8_t *dst, const uint8_t *s rc1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
37 {\ 37 {\
38 do {\ 38 do {\
39 OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),LP(src2 )) ); \ 39 OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),LPC(src2 )) ); \
40 src1+=src_stride1; \ 40 src1+=src_stride1; \
41 src2+=src_stride2; \ 41 src2+=src_stride2; \
42 dst+=dst_stride; \ 42 dst+=dst_stride; \
43 } while(--h); \ 43 } while(--h); \
44 }\ 44 }\
45 \ 45 \
46 static inline void OPNAME ## _no_rnd_pixels16_l2_aligned2(uint8_t *dst, const ui nt8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_strid e2, int h) \ 46 static inline void OPNAME ## _no_rnd_pixels16_l2_aligned2(uint8_t *dst, const ui nt8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_strid e2, int h) \
47 {\ 47 {\
48 do {\ 48 do {\
49 OP(LP(dst ),no_rnd_avg32(AV_RN32(src1 ),LP(src2 )) ); \ 49 OP(LP(dst ),no_rnd_avg32(AV_RN32(src1 ),LPC(src2 )) ); \
50 OP(LP(dst+4),no_rnd_avg32(AV_RN32(src1+4),LP(src2+4)) ); \ 50 OP(LP(dst+4),no_rnd_avg32(AV_RN32(src1+4),LPC(src2+4)) ); \
51 OP(LP(dst+8),no_rnd_avg32(AV_RN32(src1+8),LP(src2+8)) ); \ 51 OP(LP(dst+8),no_rnd_avg32(AV_RN32(src1+8),LPC(src2+8)) ); \
52 OP(LP(dst+12),no_rnd_avg32(AV_RN32(src1+12),LP(src2+12)) ); \ 52 OP(LP(dst+12),no_rnd_avg32(AV_RN32(src1+12),LPC(src2+12)) ); \
53 src1+=src_stride1; \ 53 src1+=src_stride1; \
54 src2+=src_stride2; \ 54 src2+=src_stride2; \
55 dst+=dst_stride; \ 55 dst+=dst_stride; \
56 } while(--h); \ 56 } while(--h); \
57 }\ 57 }\
58 \ 58 \
59 static inline void OPNAME ## _pixels16_l2_aligned2(uint8_t *dst, const uint8_t * src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ 59 static inline void OPNAME ## _pixels16_l2_aligned2(uint8_t *dst, const uint8_t * src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
60 {\ 60 {\
61 do {\ 61 do {\
62 OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),LP(src2 )) ); \ 62 OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),LPC(src2 )) ); \
63 OP(LP(dst+4),rnd_avg32(AV_RN32(src1+4),LP(src2+4)) ); \ 63 OP(LP(dst+4),rnd_avg32(AV_RN32(src1+4),LPC(src2+4)) ); \
64 OP(LP(dst+8),rnd_avg32(AV_RN32(src1+8),LP(src2+8)) ); \ 64 OP(LP(dst+8),rnd_avg32(AV_RN32(src1+8),LPC(src2+8)) ); \
65 OP(LP(dst+12),rnd_avg32(AV_RN32(src1+12),LP(src2+12)) ); \ 65 OP(LP(dst+12),rnd_avg32(AV_RN32(src1+12),LPC(src2+12)) ); \
66 src1+=src_stride1; \ 66 src1+=src_stride1; \
67 src2+=src_stride2; \ 67 src2+=src_stride2; \
68 dst+=dst_stride; \ 68 dst+=dst_stride; \
69 } while(--h); \ 69 } while(--h); \
70 }\ 70 }\
71 \ 71 \
72 static inline void OPNAME ## _no_rnd_pixels8_l2_aligned2(uint8_t *dst, const uin t8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride 2, int h) \ 72 static inline void OPNAME ## _no_rnd_pixels8_l2_aligned2(uint8_t *dst, const uin t8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride 2, int h) \
73 {\ 73 {\
74 do { /* onlye src2 aligned */\ 74 do { /* onlye src2 aligned */\
75 OP(LP(dst ),no_rnd_avg32(AV_RN32(src1 ),LP(src2 )) ); \ 75 OP(LP(dst ),no_rnd_avg32(AV_RN32(src1 ),LPC(src2 )) ); \
76 OP(LP(dst+4),no_rnd_avg32(AV_RN32(src1+4),LP(src2+4)) ); \ 76 OP(LP(dst+4),no_rnd_avg32(AV_RN32(src1+4),LPC(src2+4)) ); \
77 src1+=src_stride1; \ 77 src1+=src_stride1; \
78 src2+=src_stride2; \ 78 src2+=src_stride2; \
79 dst+=dst_stride; \ 79 dst+=dst_stride; \
80 } while(--h); \ 80 } while(--h); \
81 }\ 81 }\
82 \ 82 \
83 static inline void OPNAME ## _pixels8_l2_aligned2(uint8_t *dst, const uint8_t *s rc1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ 83 static inline void OPNAME ## _pixels8_l2_aligned2(uint8_t *dst, const uint8_t *s rc1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
84 {\ 84 {\
85 do {\ 85 do {\
86 OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),LP(src2 )) ); \ 86 OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),LPC(src2 )) ); \
87 OP(LP(dst+4),rnd_avg32(AV_RN32(src1+4),LP(src2+4)) ); \ 87 OP(LP(dst+4),rnd_avg32(AV_RN32(src1+4),LPC(src2+4)) ); \
88 src1+=src_stride1; \ 88 src1+=src_stride1; \
89 src2+=src_stride2; \ 89 src2+=src_stride2; \
90 dst+=dst_stride; \ 90 dst+=dst_stride; \
91 } while(--h); \ 91 } while(--h); \
92 }\ 92 }\
93 \ 93 \
94 static inline void OPNAME ## _no_rnd_pixels8_l2_aligned(uint8_t *dst, const uint 8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2 , int h) \ 94 static inline void OPNAME ## _no_rnd_pixels8_l2_aligned(uint8_t *dst, const uint 8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2 , int h) \
95 {\ 95 {\
96 do {\ 96 do {\
97 OP(LP(dst ),no_rnd_avg32(LP(src1 ),LP(src2 )) ); \ 97 OP(LP(dst ),no_rnd_avg32(LPC(src1 ),LPC(src2 )) ); \
98 OP(LP(dst+4),no_rnd_avg32(LP(src1+4),LP(src2+4)) ); \ 98 OP(LP(dst+4),no_rnd_avg32(LPC(src1+4),LPC(src2+4)) ); \
99 src1+=src_stride1; \ 99 src1+=src_stride1; \
100 src2+=src_stride2; \ 100 src2+=src_stride2; \
101 dst+=dst_stride; \ 101 dst+=dst_stride; \
102 } while(--h); \ 102 } while(--h); \
103 }\ 103 }\
104 \ 104 \
105 static inline void OPNAME ## _pixels8_l2_aligned(uint8_t *dst, const uint8_t *sr c1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h ) \ 105 static inline void OPNAME ## _pixels8_l2_aligned(uint8_t *dst, const uint8_t *sr c1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h ) \
106 {\ 106 {\
107 do {\ 107 do {\
108 OP(LP(dst ),rnd_avg32(LP(src1 ),LP(src2 )) ); \ 108 OP(LP(dst ),rnd_avg32(LPC(src1 ),LPC(src2 )) ); \
109 OP(LP(dst+4),rnd_avg32(LP(src1+4),LP(src2+4)) ); \ 109 OP(LP(dst+4),rnd_avg32(LPC(src1+4),LPC(src2+4)) ); \
110 src1+=src_stride1; \ 110 src1+=src_stride1; \
111 src2+=src_stride2; \ 111 src2+=src_stride2; \
112 dst+=dst_stride; \ 112 dst+=dst_stride; \
113 } while(--h); \ 113 } while(--h); \
114 }\ 114 }\
115 \ 115 \
116 static inline void OPNAME ## _no_rnd_pixels16_l2_aligned(uint8_t *dst, const uin t8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride 2, int h) \ 116 static inline void OPNAME ## _no_rnd_pixels16_l2_aligned(uint8_t *dst, const uin t8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride 2, int h) \
117 {\ 117 {\
118 do {\ 118 do {\
119 OP(LP(dst ),no_rnd_avg32(LP(src1 ),LP(src2 )) ); \ 119 OP(LP(dst ),no_rnd_avg32(LPC(src1 ),LPC(src2 )) ); \
120 OP(LP(dst+4),no_rnd_avg32(LP(src1+4),LP(src2+4)) ); \ 120 OP(LP(dst+4),no_rnd_avg32(LPC(src1+4),LPC(src2+4)) ); \
121 OP(LP(dst+8),no_rnd_avg32(LP(src1+8),LP(src2+8)) ); \ 121 OP(LP(dst+8),no_rnd_avg32(LPC(src1+8),LPC(src2+8)) ); \
122 OP(LP(dst+12),no_rnd_avg32(LP(src1+12),LP(src2+12)) ); \ 122 OP(LP(dst+12),no_rnd_avg32(LPC(src1+12),LPC(src2+12)) ); \
123 src1+=src_stride1; \ 123 src1+=src_stride1; \
124 src2+=src_stride2; \ 124 src2+=src_stride2; \
125 dst+=dst_stride; \ 125 dst+=dst_stride; \
126 } while(--h); \ 126 } while(--h); \
127 }\ 127 }\
128 \ 128 \
129 static inline void OPNAME ## _pixels16_l2_aligned(uint8_t *dst, const uint8_t *s rc1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ 129 static inline void OPNAME ## _pixels16_l2_aligned(uint8_t *dst, const uint8_t *s rc1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
130 {\ 130 {\
131 do {\ 131 do {\
132 OP(LP(dst ),rnd_avg32(LP(src1 ),LP(src2 )) ); \ 132 OP(LP(dst ),rnd_avg32(LPC(src1 ),LPC(src2 )) ); \
133 OP(LP(dst+4),rnd_avg32(LP(src1+4),LP(src2+4)) ); \ 133 OP(LP(dst+4),rnd_avg32(LPC(src1+4),LPC(src2+4)) ); \
134 OP(LP(dst+8),rnd_avg32(LP(src1+8),LP(src2+8)) ); \ 134 OP(LP(dst+8),rnd_avg32(LPC(src1+8),LPC(src2+8)) ); \
135 OP(LP(dst+12),rnd_avg32(LP(src1+12),LP(src2+12)) ); \ 135 OP(LP(dst+12),rnd_avg32(LPC(src1+12),LPC(src2+12)) ); \
136 src1+=src_stride1; \ 136 src1+=src_stride1; \
137 src2+=src_stride2; \ 137 src2+=src_stride2; \
138 dst+=dst_stride; \ 138 dst+=dst_stride; \
139 } while(--h); \ 139 } while(--h); \
140 }\ 140 }\
141 \ 141 \
142 static inline void OPNAME ## _no_rnd_pixels16_l2_aligned1(uint8_t *dst, const ui nt8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_strid e2, int h) \ 142 static inline void OPNAME ## _no_rnd_pixels16_l2_aligned1(uint8_t *dst, const ui nt8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_strid e2, int h) \
143 { OPNAME ## _no_rnd_pixels16_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,sr c_stride1,h); } \ 143 { OPNAME ## _no_rnd_pixels16_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,sr c_stride1,h); } \
144 \ 144 \
145 static inline void OPNAME ## _pixels16_l2_aligned1(uint8_t *dst, const uint8_t * src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ 145 static inline void OPNAME ## _pixels16_l2_aligned1(uint8_t *dst, const uint8_t * src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
146 { OPNAME ## _pixels16_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src_strid e1,h); } \ 146 { OPNAME ## _pixels16_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src_strid e1,h); } \
147 \ 147 \
148 static inline void OPNAME ## _no_rnd_pixels8_l2_aligned1(uint8_t *dst, const uin t8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride 2, int h) \ 148 static inline void OPNAME ## _no_rnd_pixels8_l2_aligned1(uint8_t *dst, const uin t8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride 2, int h) \
149 { OPNAME ## _no_rnd_pixels8_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src _stride1,h); } \ 149 { OPNAME ## _no_rnd_pixels8_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src _stride1,h); } \
150 \ 150 \
151 static inline void OPNAME ## _pixels8_l2_aligned1(uint8_t *dst, const uint8_t *s rc1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ 151 static inline void OPNAME ## _pixels8_l2_aligned1(uint8_t *dst, const uint8_t *s rc1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
152 { OPNAME ## _pixels8_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src_stride 1,h); } \ 152 { OPNAME ## _pixels8_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src_stride 1,h); } \
153 \ 153 \
154 static inline void OPNAME ## _pixels8_l4_aligned(uint8_t *dst, const uint8_t *sr c1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ 154 static inline void OPNAME ## _pixels8_l4_aligned(uint8_t *dst, const uint8_t *sr c1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
155 do { \ 155 do { \
156 uint32_t a0,a1,a2,a3; \ 156 uint32_t a0,a1,a2,a3; \
157 UNPACK(a0,a1,LP(src1),LP(src2)); \ 157 UNPACK(a0,a1,LPC(src1),LPC(src2)); \
158 UNPACK(a2,a3,LP(src3),LP(src4)); \ 158 UNPACK(a2,a3,LPC(src3),LPC(src4)); \
159 OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \ 159 OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \
160 UNPACK(a0,a1,LP(src1+4),LP(src2+4)); \ 160 UNPACK(a0,a1,LPC(src1+4),LPC(src2+4)); \
161 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ 161 UNPACK(a2,a3,LPC(src3+4),LPC(src4+4)); \
162 OP(LP(dst+4),rnd_PACK(a0,a1,a2,a3)); \ 162 OP(LP(dst+4),rnd_PACK(a0,a1,a2,a3)); \
163 src1+=src_stride1;\ 163 src1+=src_stride1;\
164 src2+=src_stride2;\ 164 src2+=src_stride2;\
165 src3+=src_stride3;\ 165 src3+=src_stride3;\
166 src4+=src_stride4;\ 166 src4+=src_stride4;\
167 dst+=dst_stride;\ 167 dst+=dst_stride;\
168 } while(--h); \ 168 } while(--h); \
169 } \ 169 } \
170 \ 170 \
171 static inline void OPNAME ## _no_rnd_pixels8_l4_aligned(uint8_t *dst, const uint 8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_s tride1, int src_stride2,int src_stride3,int src_stride4, int h){\ 171 static inline void OPNAME ## _no_rnd_pixels8_l4_aligned(uint8_t *dst, const uint 8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_s tride1, int src_stride2,int src_stride3,int src_stride4, int h){\
172 do { \ 172 do { \
173 uint32_t a0,a1,a2,a3; \ 173 uint32_t a0,a1,a2,a3; \
174 UNPACK(a0,a1,LP(src1),LP(src2)); \ 174 UNPACK(a0,a1,LPC(src1),LPC(src2)); \
175 UNPACK(a2,a3,LP(src3),LP(src4)); \ 175 UNPACK(a2,a3,LPC(src3),LPC(src4)); \
176 OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \ 176 OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \
177 UNPACK(a0,a1,LP(src1+4),LP(src2+4)); \ 177 UNPACK(a0,a1,LPC(src1+4),LPC(src2+4)); \
178 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ 178 UNPACK(a2,a3,LPC(src3+4),LPC(src4+4)); \
179 OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \ 179 OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \
180 src1+=src_stride1;\ 180 src1+=src_stride1;\
181 src2+=src_stride2;\ 181 src2+=src_stride2;\
182 src3+=src_stride3;\ 182 src3+=src_stride3;\
183 src4+=src_stride4;\ 183 src4+=src_stride4;\
184 dst+=dst_stride;\ 184 dst+=dst_stride;\
185 } while(--h); \ 185 } while(--h); \
186 } \ 186 } \
187 \ 187 \
188 static inline void OPNAME ## _pixels8_l4_aligned0(uint8_t *dst, const uint8_t *s rc1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1 , int src_stride2,int src_stride3,int src_stride4, int h){\ 188 static inline void OPNAME ## _pixels8_l4_aligned0(uint8_t *dst, const uint8_t *s rc1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1 , int src_stride2,int src_stride3,int src_stride4, int h){\
189 do { \ 189 do { \
190 uint32_t a0,a1,a2,a3; /* src1 only not aligned */\ 190 uint32_t a0,a1,a2,a3; /* src1 only not aligned */\
191 UNPACK(a0,a1,AV_RN32(src1),LP(src2)); \ 191 UNPACK(a0,a1,AV_RN32(src1),LPC(src2)); \
192 UNPACK(a2,a3,LP(src3),LP(src4)); \ 192 UNPACK(a2,a3,LPC(src3),LPC(src4)); \
193 OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \ 193 OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \
194 UNPACK(a0,a1,AV_RN32(src1+4),LP(src2+4)); \ 194 UNPACK(a0,a1,AV_RN32(src1+4),LPC(src2+4)); \
195 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ 195 UNPACK(a2,a3,LPC(src3+4),LPC(src4+4)); \
196 OP(LP(dst+4),rnd_PACK(a0,a1,a2,a3)); \ 196 OP(LP(dst+4),rnd_PACK(a0,a1,a2,a3)); \
197 src1+=src_stride1;\ 197 src1+=src_stride1;\
198 src2+=src_stride2;\ 198 src2+=src_stride2;\
199 src3+=src_stride3;\ 199 src3+=src_stride3;\
200 src4+=src_stride4;\ 200 src4+=src_stride4;\
201 dst+=dst_stride;\ 201 dst+=dst_stride;\
202 } while(--h); \ 202 } while(--h); \
203 } \ 203 } \
204 \ 204 \
205 static inline void OPNAME ## _no_rnd_pixels8_l4_aligned0(uint8_t *dst, const uin t8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_ stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ 205 static inline void OPNAME ## _no_rnd_pixels8_l4_aligned0(uint8_t *dst, const uin t8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_ stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
206 do { \ 206 do { \
207 uint32_t a0,a1,a2,a3; \ 207 uint32_t a0,a1,a2,a3; \
208 UNPACK(a0,a1,AV_RN32(src1),LP(src2)); \ 208 UNPACK(a0,a1,AV_RN32(src1),LPC(src2)); \
209 UNPACK(a2,a3,LP(src3),LP(src4)); \ 209 UNPACK(a2,a3,LPC(src3),LPC(src4)); \
210 OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \ 210 OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \
211 UNPACK(a0,a1,AV_RN32(src1+4),LP(src2+4)); \ 211 UNPACK(a0,a1,AV_RN32(src1+4),LPC(src2+4)); \
212 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ 212 UNPACK(a2,a3,LPC(src3+4),LPC(src4+4)); \
213 OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \ 213 OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \
214 src1+=src_stride1;\ 214 src1+=src_stride1;\
215 src2+=src_stride2;\ 215 src2+=src_stride2;\
216 src3+=src_stride3;\ 216 src3+=src_stride3;\
217 src4+=src_stride4;\ 217 src4+=src_stride4;\
218 dst+=dst_stride;\ 218 dst+=dst_stride;\
219 } while(--h); \ 219 } while(--h); \
220 } \ 220 } \
221 \ 221 \
222 static inline void OPNAME ## _pixels16_l4_aligned(uint8_t *dst, const uint8_t *s rc1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1 , int src_stride2,int src_stride3,int src_stride4, int h){\ 222 static inline void OPNAME ## _pixels16_l4_aligned(uint8_t *dst, const uint8_t *s rc1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1 , int src_stride2,int src_stride3,int src_stride4, int h){\
223 do { \ 223 do { \
224 uint32_t a0,a1,a2,a3; \ 224 uint32_t a0,a1,a2,a3; \
225 UNPACK(a0,a1,LP(src1),LP(src2)); \ 225 UNPACK(a0,a1,LPC(src1),LPC(src2)); \
226 UNPACK(a2,a3,LP(src3),LP(src4)); \ 226 UNPACK(a2,a3,LPC(src3),LPC(src4)); \
227 OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \ 227 OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \
228 UNPACK(a0,a1,LP(src1+4),LP(src2+4)); \ 228 UNPACK(a0,a1,LPC(src1+4),LPC(src2+4)); \
229 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ 229 UNPACK(a2,a3,LPC(src3+4),LPC(src4+4)); \
230 OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \ 230 OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \
231 UNPACK(a0,a1,LP(src1+8),LP(src2+8)); \ 231 UNPACK(a0,a1,LPC(src1+8),LPC(src2+8)); \
232 UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \ 232 UNPACK(a2,a3,LPC(src3+8),LPC(src4+8)); \
233 OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \ 233 OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \
234 UNPACK(a0,a1,LP(src1+12),LP(src2+12)); \ 234 UNPACK(a0,a1,LPC(src1+12),LPC(src2+12)); \
235 UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \ 235 UNPACK(a2,a3,LPC(src3+12),LPC(src4+12)); \
236 OP(LP(dst+12),rnd_PACK(a0,a1,a2,a3)); \ 236 OP(LP(dst+12),rnd_PACK(a0,a1,a2,a3)); \
237 src1+=src_stride1;\ 237 src1+=src_stride1;\
238 src2+=src_stride2;\ 238 src2+=src_stride2;\
239 src3+=src_stride3;\ 239 src3+=src_stride3;\
240 src4+=src_stride4;\ 240 src4+=src_stride4;\
241 dst+=dst_stride;\ 241 dst+=dst_stride;\
242 } while(--h); \ 242 } while(--h); \
243 } \ 243 } \
244 \ 244 \
245 static inline void OPNAME ## _no_rnd_pixels16_l4_aligned(uint8_t *dst, const uin t8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_ stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ 245 static inline void OPNAME ## _no_rnd_pixels16_l4_aligned(uint8_t *dst, const uin t8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_ stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
246 do { \ 246 do { \
247 uint32_t a0,a1,a2,a3; \ 247 uint32_t a0,a1,a2,a3; \
248 UNPACK(a0,a1,LP(src1),LP(src2)); \ 248 UNPACK(a0,a1,LPC(src1),LPC(src2)); \
249 UNPACK(a2,a3,LP(src3),LP(src4)); \ 249 UNPACK(a2,a3,LPC(src3),LPC(src4)); \
250 OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \ 250 OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \
251 UNPACK(a0,a1,LP(src1+4),LP(src2+4)); \ 251 UNPACK(a0,a1,LPC(src1+4),LPC(src2+4)); \
252 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ 252 UNPACK(a2,a3,LPC(src3+4),LPC(src4+4)); \
253 OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \ 253 OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \
254 UNPACK(a0,a1,LP(src1+8),LP(src2+8)); \ 254 UNPACK(a0,a1,LPC(src1+8),LPC(src2+8)); \
255 UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \ 255 UNPACK(a2,a3,LPC(src3+8),LPC(src4+8)); \
256 OP(LP(dst+8),no_rnd_PACK(a0,a1,a2,a3)); \ 256 OP(LP(dst+8),no_rnd_PACK(a0,a1,a2,a3)); \
257 UNPACK(a0,a1,LP(src1+12),LP(src2+12)); \ 257 UNPACK(a0,a1,LPC(src1+12),LPC(src2+12)); \
258 UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \ 258 UNPACK(a2,a3,LPC(src3+12),LPC(src4+12)); \
259 OP(LP(dst+12),no_rnd_PACK(a0,a1,a2,a3)); \ 259 OP(LP(dst+12),no_rnd_PACK(a0,a1,a2,a3)); \
260 src1+=src_stride1;\ 260 src1+=src_stride1;\
261 src2+=src_stride2;\ 261 src2+=src_stride2;\
262 src3+=src_stride3;\ 262 src3+=src_stride3;\
263 src4+=src_stride4;\ 263 src4+=src_stride4;\
264 dst+=dst_stride;\ 264 dst+=dst_stride;\
265 } while(--h); \ 265 } while(--h); \
266 } \ 266 } \
267 \ 267 \
268 static inline void OPNAME ## _pixels16_l4_aligned0(uint8_t *dst, const uint8_t * src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride 1, int src_stride2,int src_stride3,int src_stride4, int h){\ 268 static inline void OPNAME ## _pixels16_l4_aligned0(uint8_t *dst, const uint8_t * src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride 1, int src_stride2,int src_stride3,int src_stride4, int h){\
269 do { /* src1 is unaligned */\ 269 do { /* src1 is unaligned */\
270 uint32_t a0,a1,a2,a3; \ 270 uint32_t a0,a1,a2,a3; \
271 UNPACK(a0,a1,AV_RN32(src1),LP(src2)); \ 271 UNPACK(a0,a1,AV_RN32(src1),LPC(src2)); \
272 UNPACK(a2,a3,LP(src3),LP(src4)); \ 272 UNPACK(a2,a3,LPC(src3),LPC(src4)); \
273 OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \ 273 OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \
274 UNPACK(a0,a1,AV_RN32(src1+4),LP(src2+4)); \ 274 UNPACK(a0,a1,AV_RN32(src1+4),LPC(src2+4)); \
275 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ 275 UNPACK(a2,a3,LPC(src3+4),LPC(src4+4)); \
276 OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \ 276 OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \
277 UNPACK(a0,a1,AV_RN32(src1+8),LP(src2+8)); \ 277 UNPACK(a0,a1,AV_RN32(src1+8),LPC(src2+8)); \
278 UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \ 278 UNPACK(a2,a3,LPC(src3+8),LPC(src4+8)); \
279 OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \ 279 OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \
280 UNPACK(a0,a1,AV_RN32(src1+12),LP(src2+12)); \ 280 UNPACK(a0,a1,AV_RN32(src1+12),LPC(src2+12)); \
281 UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \ 281 UNPACK(a2,a3,LPC(src3+12),LPC(src4+12)); \
282 OP(LP(dst+12),rnd_PACK(a0,a1,a2,a3)); \ 282 OP(LP(dst+12),rnd_PACK(a0,a1,a2,a3)); \
283 src1+=src_stride1;\ 283 src1+=src_stride1;\
284 src2+=src_stride2;\ 284 src2+=src_stride2;\
285 src3+=src_stride3;\ 285 src3+=src_stride3;\
286 src4+=src_stride4;\ 286 src4+=src_stride4;\
287 dst+=dst_stride;\ 287 dst+=dst_stride;\
288 } while(--h); \ 288 } while(--h); \
289 } \ 289 } \
290 \ 290 \
291 static inline void OPNAME ## _no_rnd_pixels16_l4_aligned0(uint8_t *dst, const ui nt8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src _stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ 291 static inline void OPNAME ## _no_rnd_pixels16_l4_aligned0(uint8_t *dst, const ui nt8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src _stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
292 do { \ 292 do { \
293 uint32_t a0,a1,a2,a3; \ 293 uint32_t a0,a1,a2,a3; \
294 UNPACK(a0,a1,AV_RN32(src1),LP(src2)); \ 294 UNPACK(a0,a1,AV_RN32(src1),LPC(src2)); \
295 UNPACK(a2,a3,LP(src3),LP(src4)); \ 295 UNPACK(a2,a3,LPC(src3),LPC(src4)); \
296 OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \ 296 OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \
297 UNPACK(a0,a1,AV_RN32(src1+4),LP(src2+4)); \ 297 UNPACK(a0,a1,AV_RN32(src1+4),LPC(src2+4)); \
298 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ 298 UNPACK(a2,a3,LPC(src3+4),LPC(src4+4)); \
299 OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \ 299 OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \
300 UNPACK(a0,a1,AV_RN32(src1+8),LP(src2+8)); \ 300 UNPACK(a0,a1,AV_RN32(src1+8),LPC(src2+8)); \
301 UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \ 301 UNPACK(a2,a3,LPC(src3+8),LPC(src4+8)); \
302 OP(LP(dst+8),no_rnd_PACK(a0,a1,a2,a3)); \ 302 OP(LP(dst+8),no_rnd_PACK(a0,a1,a2,a3)); \
303 UNPACK(a0,a1,AV_RN32(src1+12),LP(src2+12)); \ 303 UNPACK(a0,a1,AV_RN32(src1+12),LPC(src2+12)); \
304 UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \ 304 UNPACK(a2,a3,LPC(src3+12),LPC(src4+12)); \
305 OP(LP(dst+12),no_rnd_PACK(a0,a1,a2,a3)); \ 305 OP(LP(dst+12),no_rnd_PACK(a0,a1,a2,a3)); \
306 src1+=src_stride1;\ 306 src1+=src_stride1;\
307 src2+=src_stride2;\ 307 src2+=src_stride2;\
308 src3+=src_stride3;\ 308 src3+=src_stride3;\
309 src4+=src_stride4;\ 309 src4+=src_stride4;\
310 dst+=dst_stride;\ 310 dst+=dst_stride;\
311 } while(--h); \ 311 } while(--h); \
312 } \ 312 } \
313 \ 313 \
314 314
(...skipping 1085 matching lines...) Expand 10 before | Expand all | Expand 10 after
1400 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); 1400 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1401 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8); 1401 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
1402 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8); 1402 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
1403 put_pixels8_l2_aligned(dst, halfV, halfHV, stride, 8, 8, 8); 1403 put_pixels8_l2_aligned(dst, halfV, halfHV, stride, 8, 8, 8);
1404 } 1404 }
1405 static void put_mspel8_mc22_sh4(uint8_t *dst, uint8_t *src, int stride){ 1405 static void put_mspel8_mc22_sh4(uint8_t *dst, uint8_t *src, int stride){
1406 uint8_t halfH[88]; 1406 uint8_t halfH[88];
1407 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); 1407 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1408 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8); 1408 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
1409 } 1409 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698