OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2002 Brian Foley | 2 * Copyright (c) 2002 Brian Foley |
3 * Copyright (c) 2002 Dieter Shirley | 3 * Copyright (c) 2002 Dieter Shirley |
4 * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org> | 4 * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org> |
5 * | 5 * |
6 * This file is part of FFmpeg. | 6 * This file is part of FFmpeg. |
7 * | 7 * |
8 * FFmpeg is free software; you can redistribute it and/or | 8 * FFmpeg is free software; you can redistribute it and/or |
9 * modify it under the terms of the GNU Lesser General Public | 9 * modify it under the terms of the GNU Lesser General Public |
10 * License as published by the Free Software Foundation; either | 10 * License as published by the Free Software Foundation; either |
11 * version 2.1 of the License, or (at your option) any later version. | 11 * version 2.1 of the License, or (at your option) any later version. |
12 * | 12 * |
13 * FFmpeg is distributed in the hope that it will be useful, | 13 * FFmpeg is distributed in the hope that it will be useful, |
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of | 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
16 * Lesser General Public License for more details. | 16 * Lesser General Public License for more details. |
17 * | 17 * |
18 * You should have received a copy of the GNU Lesser General Public | 18 * You should have received a copy of the GNU Lesser General Public |
19 * License along with FFmpeg; if not, write to the Free Software | 19 * License along with FFmpeg; if not, write to the Free Software |
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | 20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
21 */ | 21 */ |
22 | 22 |
23 #include "libavcodec/dsputil.h" | 23 #include "libavcodec/dsputil.h" |
24 | 24 |
25 #include "dsputil_ppc.h" | 25 #include "dsputil_ppc.h" |
26 | 26 |
27 #include "dsputil_altivec.h" | 27 #include "dsputil_altivec.h" |
28 | 28 |
29 void fdct_altivec(int16_t *block); | |
30 void gmc1_altivec(uint8_t *dst, uint8_t *src, int stride, int h, | |
31 int x16, int y16, int rounder); | |
32 void idct_put_altivec(uint8_t *dest, int line_size, int16_t *block); | |
33 void idct_add_altivec(uint8_t *dest, int line_size, int16_t *block); | |
34 | |
35 void ff_vp3_idct_altivec(DCTELEM *block); | |
36 void ff_vp3_idct_put_altivec(uint8_t *dest, int line_size, DCTELEM *block); | |
37 void ff_vp3_idct_add_altivec(uint8_t *dest, int line_size, DCTELEM *block); | |
38 | |
39 void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx); | |
40 | |
41 void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx); | |
42 void vc1dsp_init_altivec(DSPContext* c, AVCodecContext *avctx); | |
43 void float_init_altivec(DSPContext* c, AVCodecContext *avctx); | |
44 void int_init_altivec(DSPContext* c, AVCodecContext *avctx); | |
45 | |
46 int mm_flags = 0; | 29 int mm_flags = 0; |
47 | 30 |
48 int mm_support(void) | 31 int mm_support(void) |
49 { | 32 { |
50 int result = 0; | 33 int result = 0; |
51 #if HAVE_ALTIVEC | 34 #if HAVE_ALTIVEC |
52 if (has_altivec()) { | 35 if (has_altivec()) { |
53 result |= FF_MM_ALTIVEC; | 36 result |= FF_MM_ALTIVEC; |
54 } | 37 } |
55 #endif /* result */ | 38 #endif /* result */ |
(...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
126 update 24/06/2003 : Apple released yesterday the G5, with a PPC970. cache line | 109 update 24/06/2003 : Apple released yesterday the G5, with a PPC970. cache line |
127 size: 128 bytes. Oups. | 110 size: 128 bytes. Oups. |
128 The semantic of dcbz was changed, it always clear 32 bytes. so the function | 111 The semantic of dcbz was changed, it always clear 32 bytes. so the function |
129 below will work, but will be slow. So I fixed check_dcbz_effect to use dcbzl, | 112 below will work, but will be slow. So I fixed check_dcbz_effect to use dcbzl, |
130 which is defined to clear a cache line (as dcbz before). So we still can | 113 which is defined to clear a cache line (as dcbz before). So we still can |
131 distinguish, and use dcbz (32 bytes) or dcbzl (one cache line) as required. | 114 distinguish, and use dcbz (32 bytes) or dcbzl (one cache line) as required. |
132 | 115 |
133 see <http://developer.apple.com/technotes/tn/tn2087.html> | 116 see <http://developer.apple.com/technotes/tn/tn2087.html> |
134 and <http://developer.apple.com/technotes/tn/tn2086.html> | 117 and <http://developer.apple.com/technotes/tn/tn2086.html> |
135 */ | 118 */ |
136 void clear_blocks_dcbz32_ppc(DCTELEM *blocks) | 119 static void clear_blocks_dcbz32_ppc(DCTELEM *blocks) |
137 { | 120 { |
138 POWERPC_PERF_DECLARE(powerpc_clear_blocks_dcbz32, 1); | 121 POWERPC_PERF_DECLARE(powerpc_clear_blocks_dcbz32, 1); |
139 register int misal = ((unsigned long)blocks & 0x00000010); | 122 register int misal = ((unsigned long)blocks & 0x00000010); |
140 register int i = 0; | 123 register int i = 0; |
141 POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz32, 1); | 124 POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz32, 1); |
142 #if 1 | 125 #if 1 |
143 if (misal) { | 126 if (misal) { |
144 ((unsigned long*)blocks)[0] = 0L; | 127 ((unsigned long*)blocks)[0] = 0L; |
145 ((unsigned long*)blocks)[1] = 0L; | 128 ((unsigned long*)blocks)[1] = 0L; |
146 ((unsigned long*)blocks)[2] = 0L; | 129 ((unsigned long*)blocks)[2] = 0L; |
(...skipping 12 matching lines...) Expand all Loading... |
159 } | 142 } |
160 #else | 143 #else |
161 memset(blocks, 0, sizeof(DCTELEM)*6*64); | 144 memset(blocks, 0, sizeof(DCTELEM)*6*64); |
162 #endif | 145 #endif |
163 POWERPC_PERF_STOP_COUNT(powerpc_clear_blocks_dcbz32, 1); | 146 POWERPC_PERF_STOP_COUNT(powerpc_clear_blocks_dcbz32, 1); |
164 } | 147 } |
165 | 148 |
166 /* same as above, when dcbzl clear a whole 128B cache line | 149 /* same as above, when dcbzl clear a whole 128B cache line |
167 i.e. the PPC970 aka G5 */ | 150 i.e. the PPC970 aka G5 */ |
168 #if HAVE_DCBZL | 151 #if HAVE_DCBZL |
169 void clear_blocks_dcbz128_ppc(DCTELEM *blocks) | 152 static void clear_blocks_dcbz128_ppc(DCTELEM *blocks) |
170 { | 153 { |
171 POWERPC_PERF_DECLARE(powerpc_clear_blocks_dcbz128, 1); | 154 POWERPC_PERF_DECLARE(powerpc_clear_blocks_dcbz128, 1); |
172 register int misal = ((unsigned long)blocks & 0x0000007f); | 155 register int misal = ((unsigned long)blocks & 0x0000007f); |
173 register int i = 0; | 156 register int i = 0; |
174 POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz128, 1); | 157 POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz128, 1); |
175 #if 1 | 158 #if 1 |
176 if (misal) { | 159 if (misal) { |
177 // we could probably also optimize this case, | 160 // we could probably also optimize this case, |
178 // but there's not much point as the machines | 161 // but there's not much point as the machines |
179 // aren't available yet (2003-06-26) | 162 // aren't available yet (2003-06-26) |
180 memset(blocks, 0, sizeof(DCTELEM)*6*64); | 163 memset(blocks, 0, sizeof(DCTELEM)*6*64); |
181 } | 164 } |
182 else | 165 else |
183 for ( ; i < sizeof(DCTELEM)*6*64 ; i += 128) { | 166 for ( ; i < sizeof(DCTELEM)*6*64 ; i += 128) { |
184 __asm__ volatile("dcbzl %0,%1" : : "b" (blocks), "r" (i) : "memory")
; | 167 __asm__ volatile("dcbzl %0,%1" : : "b" (blocks), "r" (i) : "memory")
; |
185 } | 168 } |
186 #else | 169 #else |
187 memset(blocks, 0, sizeof(DCTELEM)*6*64); | 170 memset(blocks, 0, sizeof(DCTELEM)*6*64); |
188 #endif | 171 #endif |
189 POWERPC_PERF_STOP_COUNT(powerpc_clear_blocks_dcbz128, 1); | 172 POWERPC_PERF_STOP_COUNT(powerpc_clear_blocks_dcbz128, 1); |
190 } | 173 } |
191 #else | 174 #else |
192 void clear_blocks_dcbz128_ppc(DCTELEM *blocks) | 175 static void clear_blocks_dcbz128_ppc(DCTELEM *blocks) |
193 { | 176 { |
194 memset(blocks, 0, sizeof(DCTELEM)*6*64); | 177 memset(blocks, 0, sizeof(DCTELEM)*6*64); |
195 } | 178 } |
196 #endif | 179 #endif |
197 | 180 |
198 #if HAVE_DCBZL | 181 #if HAVE_DCBZL |
199 /* check dcbz report how many bytes are set to 0 by dcbz */ | 182 /* check dcbz report how many bytes are set to 0 by dcbz */ |
200 /* update 24/06/2003 : replace dcbz by dcbzl to get | 183 /* update 24/06/2003 : replace dcbz by dcbzl to get |
201 the intended effect (Apple "fixed" dcbz) | 184 the intended effect (Apple "fixed" dcbz) |
202 unfortunately this cannot be used unless the assembler | 185 unfortunately this cannot be used unless the assembler |
203 knows about dcbzl ... */ | 186 knows about dcbzl ... */ |
204 long check_dcbzl_effect(void) | 187 static long check_dcbzl_effect(void) |
205 { | 188 { |
206 register char *fakedata = av_malloc(1024); | 189 register char *fakedata = av_malloc(1024); |
207 register char *fakedata_middle; | 190 register char *fakedata_middle; |
208 register long zero = 0; | 191 register long zero = 0; |
209 register long i = 0; | 192 register long i = 0; |
210 long count = 0; | 193 long count = 0; |
211 | 194 |
212 if (!fakedata) { | 195 if (!fakedata) { |
213 return 0L; | 196 return 0L; |
214 } | 197 } |
215 | 198 |
216 fakedata_middle = (fakedata + 512); | 199 fakedata_middle = (fakedata + 512); |
217 | 200 |
218 memset(fakedata, 0xFF, 1024); | 201 memset(fakedata, 0xFF, 1024); |
219 | 202 |
220 /* below the constraint "b" seems to mean "Address base register" | 203 /* below the constraint "b" seems to mean "Address base register" |
221 in gcc-3.3 / RS/6000 speaks. seems to avoid using r0, so.... */ | 204 in gcc-3.3 / RS/6000 speaks. seems to avoid using r0, so.... */ |
222 __asm__ volatile("dcbzl %0, %1" : : "b" (fakedata_middle), "r" (zero)); | 205 __asm__ volatile("dcbzl %0, %1" : : "b" (fakedata_middle), "r" (zero)); |
223 | 206 |
224 for (i = 0; i < 1024 ; i ++) { | 207 for (i = 0; i < 1024 ; i ++) { |
225 if (fakedata[i] == (char)0) | 208 if (fakedata[i] == (char)0) |
226 count++; | 209 count++; |
227 } | 210 } |
228 | 211 |
229 av_free(fakedata); | 212 av_free(fakedata); |
230 | 213 |
231 return count; | 214 return count; |
232 } | 215 } |
233 #else | 216 #else |
234 long check_dcbzl_effect(void) | 217 static long check_dcbzl_effect(void) |
235 { | 218 { |
236 return 0; | 219 return 0; |
237 } | 220 } |
238 #endif | 221 #endif |
239 | 222 |
240 static void prefetch_ppc(void *mem, int stride, int h) | 223 static void prefetch_ppc(void *mem, int stride, int h) |
241 { | 224 { |
242 register const uint8_t *p = mem; | 225 register const uint8_t *p = mem; |
243 do { | 226 do { |
244 __asm__ volatile ("dcbt 0,%0" : : "r" (p)); | 227 __asm__ volatile ("dcbt 0,%0" : : "r" (p)); |
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
305 perfdata[j][i][powerpc_data_max] = 0x0000000000000000ULL; | 288 perfdata[j][i][powerpc_data_max] = 0x0000000000000000ULL; |
306 perfdata[j][i][powerpc_data_sum] = 0x0000000000000000ULL; | 289 perfdata[j][i][powerpc_data_sum] = 0x0000000000000000ULL; |
307 perfdata[j][i][powerpc_data_num] = 0x0000000000000000ULL; | 290 perfdata[j][i][powerpc_data_num] = 0x0000000000000000ULL; |
308 } | 291 } |
309 } | 292 } |
310 } | 293 } |
311 #endif /* CONFIG_POWERPC_PERF */ | 294 #endif /* CONFIG_POWERPC_PERF */ |
312 } | 295 } |
313 #endif /* HAVE_ALTIVEC */ | 296 #endif /* HAVE_ALTIVEC */ |
314 } | 297 } |
OLD | NEW |