OLD | NEW |
1 /* | 1 /* |
2 * aligned/packed access motion | 2 * aligned/packed access motion |
3 * | 3 * |
4 * Copyright (c) 2001-2003 BERO <bero@geocities.co.jp> | 4 * Copyright (c) 2001-2003 BERO <bero@geocities.co.jp> |
5 * | 5 * |
6 * This file is part of FFmpeg. | 6 * This file is part of FFmpeg. |
7 * | 7 * |
8 * FFmpeg is free software; you can redistribute it and/or | 8 * FFmpeg is free software; you can redistribute it and/or |
9 * modify it under the terms of the GNU Lesser General Public | 9 * modify it under the terms of the GNU Lesser General Public |
10 * License as published by the Free Software Foundation; either | 10 * License as published by the Free Software Foundation; either |
11 * version 2.1 of the License, or (at your option) any later version. | 11 * version 2.1 of the License, or (at your option) any later version. |
12 * | 12 * |
13 * FFmpeg is distributed in the hope that it will be useful, | 13 * FFmpeg is distributed in the hope that it will be useful, |
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of | 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
16 * Lesser General Public License for more details. | 16 * Lesser General Public License for more details. |
17 * | 17 * |
18 * You should have received a copy of the GNU Lesser General Public | 18 * You should have received a copy of the GNU Lesser General Public |
19 * License along with FFmpeg; if not, write to the Free Software | 19 * License along with FFmpeg; if not, write to the Free Software |
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | 20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
21 */ | 21 */ |
22 | 22 |
23 | 23 |
24 #include "libavcodec/avcodec.h" | 24 #include "libavcodec/avcodec.h" |
25 #include "libavcodec/dsputil.h" | 25 #include "libavcodec/dsputil.h" |
| 26 #include "dsputil_sh4.h" |
26 | 27 |
27 | 28 |
28 #define LP(p) *(uint32_t*)(p) | 29 #define LP(p) *(uint32_t*)(p) |
| 30 #define LPC(p) *(const uint32_t*)(p) |
29 | 31 |
30 | 32 |
31 #define UNPACK(ph,pl,tt0,tt1) do { \ | 33 #define UNPACK(ph,pl,tt0,tt1) do { \ |
32 uint32_t t0,t1; t0=tt0;t1=tt1; \ | 34 uint32_t t0,t1; t0=tt0;t1=tt1; \ |
33 ph = ( (t0 & ~BYTE_VEC32(0x03))>>2) + ( (t1 & ~BYTE_VEC32(0x03))>>2); \ | 35 ph = ( (t0 & ~BYTE_VEC32(0x03))>>2) + ( (t1 & ~BYTE_VEC32(0x03))>>2); \ |
34 pl = (t0 & BYTE_VEC32(0x03)) + (t1 & BYTE_VEC32(0x03)); } while(0) | 36 pl = (t0 & BYTE_VEC32(0x03)) + (t1 & BYTE_VEC32(0x03)); } while(0) |
35 | 37 |
36 #define rnd_PACK(ph,pl,nph,npl) ph + nph + (((pl + npl + BYTE_VEC32(0x02
))>>2) & BYTE_VEC32(0x03)) | 38 #define rnd_PACK(ph,pl,nph,npl) ph + nph + (((pl + npl + BYTE_VEC32(0x02
))>>2) & BYTE_VEC32(0x03)) |
37 #define no_rnd_PACK(ph,pl,nph,npl) ph + nph + (((pl + npl + BYTE_VE
C32(0x01))>>2) & BYTE_VEC32(0x03)) | 39 #define no_rnd_PACK(ph,pl,nph,npl) ph + nph + (((pl + npl + BYTE_VE
C32(0x01))>>2) & BYTE_VEC32(0x03)) |
38 | 40 |
39 /* little endian */ | 41 /* little endian */ |
40 #define MERGE1(a,b,ofs) (ofs==0)?a:( ((a)>>(8*ofs))|((b)<<(32-8*ofs)) ) | 42 #define MERGE1(a,b,ofs) (ofs==0)?a:( ((a)>>(8*ofs))|((b)<<(32-8*ofs)) ) |
41 #define MERGE2(a,b,ofs) (ofs==3)?b:( ((a)>>(8*(ofs+1)))|((b)<<(32-8*(ofs
+1))) ) | 43 #define MERGE2(a,b,ofs) (ofs==3)?b:( ((a)>>(8*(ofs+1)))|((b)<<(32-8*(ofs
+1))) ) |
42 /* big | 44 /* big |
43 #define MERGE1(a,b,ofs) (ofs==0)?a:( ((a)<<(8*ofs))|((b)>>(32-8*ofs)) ) | 45 #define MERGE1(a,b,ofs) (ofs==0)?a:( ((a)<<(8*ofs))|((b)>>(32-8*ofs)) ) |
44 #define MERGE2(a,b,ofs) (ofs==3)?b:( ((a)<<(8+8*ofs))|((b)>>(32-8-8*ofs)
) ) | 46 #define MERGE2(a,b,ofs) (ofs==3)?b:( ((a)<<(8+8*ofs))|((b)>>(32-8-8*ofs)
) ) |
45 */ | 47 */ |
46 | 48 |
47 | 49 |
48 #define put(d,s) d = s | 50 #define put(d,s) d = s |
49 #define avg(d,s) d = rnd_avg32(s,d) | 51 #define avg(d,s) d = rnd_avg32(s,d) |
50 | 52 |
51 #define OP_C4(ofs) \ | 53 #define OP_C4(ofs) \ |
52 ref-=ofs; \ | 54 ref-=ofs; \ |
53 do { \ | 55 do { \ |
54 OP(LP(dest),MERGE1(LP(ref),LP(ref+4),ofs)); \ | 56 OP(LP(dest),MERGE1(LPC(ref),LPC(ref+4),ofs)); \ |
55 ref+=stride; \ | 57 ref+=stride; \ |
56 dest+=stride; \ | 58 dest+=stride; \ |
57 } while(--height) | 59 } while(--height) |
58 | 60 |
59 #define OP_C40() \ | 61 #define OP_C40() \ |
60 do { \ | 62 do { \ |
61 OP(LP(dest),LP(ref)); \ | 63 OP(LP(dest),LPC(ref)); \ |
62 ref+=stride; \ | 64 ref+=stride; \ |
63 dest+=stride; \ | 65 dest+=stride; \ |
64 } while(--height) | 66 } while(--height) |
65 | 67 |
66 | 68 |
67 #define OP put | 69 #define OP put |
68 | 70 |
69 static void put_pixels4_c(uint8_t *dest,const uint8_t *ref, const int stride,int
height) | 71 static void put_pixels4_c(uint8_t *dest,const uint8_t *ref, const int stride,int
height) |
70 { | 72 { |
71 switch((int)ref&3){ | 73 switch((int)ref&3){ |
(...skipping 17 matching lines...) Expand all Loading... |
89 } | 91 } |
90 } | 92 } |
91 | 93 |
92 #undef OP | 94 #undef OP |
93 | 95 |
94 #define OP_C(ofs,sz,avg2) \ | 96 #define OP_C(ofs,sz,avg2) \ |
95 { \ | 97 { \ |
96 ref-=ofs; \ | 98 ref-=ofs; \ |
97 do { \ | 99 do { \ |
98 uint32_t t0,t1; \ | 100 uint32_t t0,t1; \ |
99 t0 = LP(ref+0); \ | 101 t0 = LPC(ref+0); \ |
100 t1 = LP(ref+4); \ | 102 t1 = LPC(ref+4); \ |
101 OP(LP(dest+0), MERGE1(t0,t1,ofs)); \ | 103 OP(LP(dest+0), MERGE1(t0,t1,ofs)); \ |
102 t0 = LP(ref+8); \ | 104 t0 = LPC(ref+8); \ |
103 OP(LP(dest+4), MERGE1(t1,t0,ofs)); \ | 105 OP(LP(dest+4), MERGE1(t1,t0,ofs)); \ |
104 if (sz==16) { \ | 106 if (sz==16) { \ |
105 t1 = LP(ref+12); \ | 107 t1 = LPC(ref+12); \ |
106 OP(LP(dest+8), MERGE1(t0,t1,ofs)); \ | 108 OP(LP(dest+8), MERGE1(t0,t1,ofs)); \ |
107 t0 = LP(ref+16); \ | 109 t0 = LPC(ref+16); \ |
108 OP(LP(dest+12), MERGE1(t1,t0,ofs)); \ | 110 OP(LP(dest+12), MERGE1(t1,t0,ofs)); \ |
109 } \ | 111 } \ |
110 ref+=stride; \ | 112 ref+=stride; \ |
111 dest+= stride; \ | 113 dest+= stride; \ |
112 } while(--height); \ | 114 } while(--height); \ |
113 } | 115 } |
114 | 116 |
115 /* aligned */ | 117 /* aligned */ |
116 #define OP_C0(sz,avg2) \ | 118 #define OP_C0(sz,avg2) \ |
117 { \ | 119 { \ |
118 do { \ | 120 do { \ |
119 OP(LP(dest+0), LP(ref+0)); \ | 121 OP(LP(dest+0), LPC(ref+0)); \ |
120 OP(LP(dest+4), LP(ref+4)); \ | 122 OP(LP(dest+4), LPC(ref+4)); \ |
121 if (sz==16) { \ | 123 if (sz==16) { \ |
122 OP(LP(dest+8), LP(ref+8)); \ | 124 OP(LP(dest+8), LPC(ref+8)); \ |
123 OP(LP(dest+12), LP(ref+12)); \ | 125 OP(LP(dest+12), LPC(ref+12)); \ |
124 } \ | 126 } \ |
125 ref+=stride; \ | 127 ref+=stride; \ |
126 dest+= stride; \ | 128 dest+= stride; \ |
127 } while(--height); \ | 129 } while(--height); \ |
128 } | 130 } |
129 | 131 |
130 #define OP_X(ofs,sz,avg2) \ | 132 #define OP_X(ofs,sz,avg2) \ |
131 { \ | 133 { \ |
132 ref-=ofs; \ | 134 ref-=ofs; \ |
133 do { \ | 135 do { \ |
134 uint32_t t0,t1; \ | 136 uint32_t t0,t1; \ |
135 t0 = LP(ref+0); \ | 137 t0 = LPC(ref+0); \ |
136 t1 = LP(ref+4); \ | 138 t1 = LPC(ref+4); \ |
137 OP(LP(dest+0), avg2(MERGE1(t0,t1,ofs),MERGE2(t0,t1,ofs))); \ | 139 OP(LP(dest+0), avg2(MERGE1(t0,t1,ofs),MERGE2(t0,t1,ofs))); \ |
138 t0 = LP(ref+8); \ | 140 t0 = LPC(ref+8); \ |
139 OP(LP(dest+4), avg2(MERGE1(t1,t0,ofs),MERGE2(t1,t0,ofs))); \ | 141 OP(LP(dest+4), avg2(MERGE1(t1,t0,ofs),MERGE2(t1,t0,ofs))); \ |
140 if (sz==16) { \ | 142 if (sz==16) { \ |
141 t1 = LP(ref+12); \ | 143 t1 = LPC(ref+12); \ |
142 OP(LP(dest+8), avg2(MERGE1(t0,t1,ofs),MERGE2(t0,t1,ofs))); \ | 144 OP(LP(dest+8), avg2(MERGE1(t0,t1,ofs),MERGE2(t0,t1,ofs))); \ |
143 t0 = LP(ref+16); \ | 145 t0 = LPC(ref+16); \ |
144 OP(LP(dest+12), avg2(MERGE1(t1,t0,ofs),MERGE2(t1,t0,ofs))); \ | 146 OP(LP(dest+12), avg2(MERGE1(t1,t0,ofs),MERGE2(t1,t0,ofs))); \ |
145 } \ | 147 } \ |
146 ref+=stride; \ | 148 ref+=stride; \ |
147 dest+= stride; \ | 149 dest+= stride; \ |
148 } while(--height); \ | 150 } while(--height); \ |
149 } | 151 } |
150 | 152 |
151 /* aligned */ | 153 /* aligned */ |
152 #define OP_Y0(sz,avg2) \ | 154 #define OP_Y0(sz,avg2) \ |
153 { \ | 155 { \ |
154 uint32_t t0,t1,t2,t3,t; \ | 156 uint32_t t0,t1,t2,t3,t; \ |
155 \ | 157 \ |
156 t0 = LP(ref+0); \ | 158 t0 = LPC(ref+0); \ |
157 t1 = LP(ref+4); \ | 159 t1 = LPC(ref+4); \ |
158 if (sz==16) { \ | 160 if (sz==16) { \ |
159 t2 = LP(ref+8); \ | 161 t2 = LPC(ref+8); \ |
160 t3 = LP(ref+12); \ | 162 t3 = LPC(ref+12); \ |
161 } \ | 163 } \ |
162 do { \ | 164 do { \ |
163 ref += stride; \ | 165 ref += stride; \ |
164 \ | 166 \ |
165 t = LP(ref+0); \ | 167 t = LPC(ref+0); \ |
166 OP(LP(dest+0), avg2(t0,t)); t0 = t; \ | 168 OP(LP(dest+0), avg2(t0,t)); t0 = t; \ |
167 t = LP(ref+4); \ | 169 t = LPC(ref+4); \ |
168 OP(LP(dest+4), avg2(t1,t)); t1 = t; \ | 170 OP(LP(dest+4), avg2(t1,t)); t1 = t; \ |
169 if (sz==16) { \ | 171 if (sz==16) { \ |
170 t = LP(ref+8); \ | 172 t = LPC(ref+8); \ |
171 OP(LP(dest+8), avg2(t2,t)); t2 = t; \ | 173 OP(LP(dest+8), avg2(t2,t)); t2 = t; \ |
172 t = LP(ref+12); \ | 174 t = LPC(ref+12); \ |
173 OP(LP(dest+12), avg2(t3,t)); t3 = t; \ | 175 OP(LP(dest+12), avg2(t3,t)); t3 = t; \ |
174 } \ | 176 } \ |
175 dest+= stride; \ | 177 dest+= stride; \ |
176 } while(--height); \ | 178 } while(--height); \ |
177 } | 179 } |
178 | 180 |
179 #define OP_Y(ofs,sz,avg2) \ | 181 #define OP_Y(ofs,sz,avg2) \ |
180 { \ | 182 { \ |
181 uint32_t t0,t1,t2,t3,t,w0,w1; \ | 183 uint32_t t0,t1,t2,t3,t,w0,w1; \ |
182 \ | 184 \ |
183 ref-=ofs; \ | 185 ref-=ofs; \ |
184 w0 = LP(ref+0); \ | 186 w0 = LPC(ref+0); \ |
185 w1 = LP(ref+4); \ | 187 w1 = LPC(ref+4); \ |
186 t0 = MERGE1(w0,w1,ofs); \ | 188 t0 = MERGE1(w0,w1,ofs); \ |
187 w0 = LP(ref+8); \ | 189 w0 = LPC(ref+8); \ |
188 t1 = MERGE1(w1,w0,ofs); \ | 190 t1 = MERGE1(w1,w0,ofs); \ |
189 if (sz==16) { \ | 191 if (sz==16) { \ |
190 w1 = LP(ref+12); \ | 192 w1 = LPC(ref+12); \ |
191 t2 = MERGE1(w0,w1,ofs); \ | 193 t2 = MERGE1(w0,w1,ofs); \ |
192 w0 = LP(ref+16); \ | 194 w0 = LPC(ref+16); \ |
193 t3 = MERGE1(w1,w0,ofs); \ | 195 t3 = MERGE1(w1,w0,ofs); \ |
194 } \ | 196 } \ |
195 do { \ | 197 do { \ |
196 ref += stride; \ | 198 ref += stride; \ |
197 \ | 199 \ |
198 w0 = LP(ref+0); \ | 200 w0 = LPC(ref+0); \ |
199 w1 = LP(ref+4); \ | 201 w1 = LPC(ref+4); \ |
200 t = MERGE1(w0,w1,ofs); \ | 202 t = MERGE1(w0,w1,ofs); \ |
201 OP(LP(dest+0), avg2(t0,t)); t0 = t; \ | 203 OP(LP(dest+0), avg2(t0,t)); t0 = t; \ |
202 w0 = LP(ref+8); \ | 204 w0 = LPC(ref+8); \ |
203 t = MERGE1(w1,w0,ofs); \ | 205 t = MERGE1(w1,w0,ofs); \ |
204 OP(LP(dest+4), avg2(t1,t)); t1 = t; \ | 206 OP(LP(dest+4), avg2(t1,t)); t1 = t; \ |
205 if (sz==16) { \ | 207 if (sz==16) { \ |
206 w1 = LP(ref+12); \ | 208 w1 = LPC(ref+12); \ |
207 t = MERGE1(w0,w1,ofs); \ | 209 t = MERGE1(w0,w1,ofs); \ |
208 OP(LP(dest+8), avg2(t2,t)); t2 = t; \ | 210 OP(LP(dest+8), avg2(t2,t)); t2 = t; \ |
209 w0 = LP(ref+16); \ | 211 w0 = LPC(ref+16); \ |
210 t = MERGE1(w1,w0,ofs); \ | 212 t = MERGE1(w1,w0,ofs); \ |
211 OP(LP(dest+12), avg2(t3,t)); t3 = t; \ | 213 OP(LP(dest+12), avg2(t3,t)); t3 = t; \ |
212 } \ | 214 } \ |
213 dest+=stride; \ | 215 dest+=stride; \ |
214 } while(--height); \ | 216 } while(--height); \ |
215 } | 217 } |
216 | 218 |
217 #define OP_X0(sz,avg2) OP_X(0,sz,avg2) | 219 #define OP_X0(sz,avg2) OP_X(0,sz,avg2) |
218 #define OP_XY0(sz,PACK) OP_XY(0,sz,PACK) | 220 #define OP_XY0(sz,PACK) OP_XY(0,sz,PACK) |
219 #define OP_XY(ofs,sz,PACK) \ | 221 #define OP_XY(ofs,sz,PACK) \ |
220 { \ | 222 { \ |
221 uint32_t t2,t3,w0,w1; \ | 223 uint32_t t2,t3,w0,w1; \ |
222 uint32_t a0,a1,a2,a3,a4,a5,a6,a7; \ | 224 uint32_t a0,a1,a2,a3,a4,a5,a6,a7; \ |
223 \ | 225 \ |
224 ref -= ofs; \ | 226 ref -= ofs; \ |
225 w0 = LP(ref+0); \ | 227 w0 = LPC(ref+0); \ |
226 w1 = LP(ref+4); \ | 228 w1 = LPC(ref+4); \ |
227 UNPACK(a0,a1,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \ | 229 UNPACK(a0,a1,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \ |
228 w0 = LP(ref+8); \ | 230 w0 = LPC(ref+8); \ |
229 UNPACK(a2,a3,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \ | 231 UNPACK(a2,a3,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \ |
230 if (sz==16) { \ | 232 if (sz==16) { \ |
231 w1 = LP(ref+12); \ | 233 w1 = LPC(ref+12); \ |
232 UNPACK(a4,a5,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \ | 234 UNPACK(a4,a5,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \ |
233 w0 = LP(ref+16); \ | 235 w0 = LPC(ref+16); \ |
234 UNPACK(a6,a7,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \ | 236 UNPACK(a6,a7,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \ |
235 } \ | 237 } \ |
236 do { \ | 238 do { \ |
237 ref+=stride; \ | 239 ref+=stride; \ |
238 w0 = LP(ref+0); \ | 240 w0 = LPC(ref+0); \ |
239 w1 = LP(ref+4); \ | 241 w1 = LPC(ref+4); \ |
240 UNPACK(t2,t3,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \ | 242 UNPACK(t2,t3,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \ |
241 OP(LP(dest+0),PACK(a0,a1,t2,t3)); \ | 243 OP(LP(dest+0),PACK(a0,a1,t2,t3)); \ |
242 a0 = t2; a1 = t3; \ | 244 a0 = t2; a1 = t3; \ |
243 w0 = LP(ref+8); \ | 245 w0 = LPC(ref+8); \ |
244 UNPACK(t2,t3,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \ | 246 UNPACK(t2,t3,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \ |
245 OP(LP(dest+4),PACK(a2,a3,t2,t3)); \ | 247 OP(LP(dest+4),PACK(a2,a3,t2,t3)); \ |
246 a2 = t2; a3 = t3; \ | 248 a2 = t2; a3 = t3; \ |
247 if (sz==16) { \ | 249 if (sz==16) { \ |
248 w1 = LP(ref+12); \ | 250 w1 = LPC(ref+12); \ |
249 UNPACK(t2,t3,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \ | 251 UNPACK(t2,t3,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \ |
250 OP(LP(dest+8),PACK(a4,a5,t2,t3)); \ | 252 OP(LP(dest+8),PACK(a4,a5,t2,t3)); \ |
251 a4 = t2; a5 = t3; \ | 253 a4 = t2; a5 = t3; \ |
252 w0 = LP(ref+16); \ | 254 w0 = LPC(ref+16); \ |
253 UNPACK(t2,t3,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \ | 255 UNPACK(t2,t3,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \ |
254 OP(LP(dest+12),PACK(a6,a7,t2,t3)); \ | 256 OP(LP(dest+12),PACK(a6,a7,t2,t3)); \ |
255 a6 = t2; a7 = t3; \ | 257 a6 = t2; a7 = t3; \ |
256 } \ | 258 } \ |
257 dest+=stride; \ | 259 dest+=stride; \ |
258 } while(--height); \ | 260 } while(--height); \ |
259 } | 261 } |
260 | 262 |
261 #define DEFFUNC(op,rnd,xy,sz,OP_N,avgfunc) \ | 263 #define DEFFUNC(op,rnd,xy,sz,OP_N,avgfunc) \ |
262 static void op##_##rnd##_pixels##sz##_##xy (uint8_t * dest, const uint8_t * ref,
\ | 264 static void op##_##rnd##_pixels##sz##_##xy (uint8_t * dest, const uint8_t * ref,
\ |
(...skipping 158 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
421 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_sh4; | 423 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_sh4; |
422 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_sh4; | 424 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_sh4; |
423 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_sh4; | 425 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_sh4; |
424 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_sh4; | 426 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_sh4; |
425 | 427 |
426 c->gmc1 = gmc1_c; | 428 c->gmc1 = gmc1_c; |
427 c->gmc = gmc_c; | 429 c->gmc = gmc_c; |
428 | 430 |
429 #endif | 431 #endif |
430 } | 432 } |
OLD | NEW |