| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2002 Brian Foley | 2 * Copyright (c) 2002 Brian Foley |
| 3 * Copyright (c) 2002 Dieter Shirley | 3 * Copyright (c) 2002 Dieter Shirley |
| 4 * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org> | 4 * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org> |
| 5 * | 5 * |
| 6 * This file is part of FFmpeg. | 6 * This file is part of FFmpeg. |
| 7 * | 7 * |
| 8 * FFmpeg is free software; you can redistribute it and/or | 8 * FFmpeg is free software; you can redistribute it and/or |
| 9 * modify it under the terms of the GNU Lesser General Public | 9 * modify it under the terms of the GNU Lesser General Public |
| 10 * License as published by the Free Software Foundation; either | 10 * License as published by the Free Software Foundation; either |
| 11 * version 2.1 of the License, or (at your option) any later version. | 11 * version 2.1 of the License, or (at your option) any later version. |
| 12 * | 12 * |
| 13 * FFmpeg is distributed in the hope that it will be useful, | 13 * FFmpeg is distributed in the hope that it will be useful, |
| 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of | 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 16 * Lesser General Public License for more details. | 16 * Lesser General Public License for more details. |
| 17 * | 17 * |
| 18 * You should have received a copy of the GNU Lesser General Public | 18 * You should have received a copy of the GNU Lesser General Public |
| 19 * License along with FFmpeg; if not, write to the Free Software | 19 * License along with FFmpeg; if not, write to the Free Software |
| 20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | 20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 21 */ | 21 */ |
| 22 | 22 |
| 23 #include "libavcodec/dsputil.h" | 23 #include "libavcodec/dsputil.h" |
| 24 | 24 |
| 25 #include "dsputil_ppc.h" | 25 #include "dsputil_ppc.h" |
| 26 | 26 |
| 27 #include "dsputil_altivec.h" | 27 #include "dsputil_altivec.h" |
| 28 | 28 |
| 29 void fdct_altivec(int16_t *block); | |
| 30 void gmc1_altivec(uint8_t *dst, uint8_t *src, int stride, int h, | |
| 31 int x16, int y16, int rounder); | |
| 32 void idct_put_altivec(uint8_t *dest, int line_size, int16_t *block); | |
| 33 void idct_add_altivec(uint8_t *dest, int line_size, int16_t *block); | |
| 34 | |
| 35 void ff_vp3_idct_altivec(DCTELEM *block); | |
| 36 void ff_vp3_idct_put_altivec(uint8_t *dest, int line_size, DCTELEM *block); | |
| 37 void ff_vp3_idct_add_altivec(uint8_t *dest, int line_size, DCTELEM *block); | |
| 38 | |
| 39 void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx); | |
| 40 | |
| 41 void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx); | |
| 42 void vc1dsp_init_altivec(DSPContext* c, AVCodecContext *avctx); | |
| 43 void float_init_altivec(DSPContext* c, AVCodecContext *avctx); | |
| 44 void int_init_altivec(DSPContext* c, AVCodecContext *avctx); | |
| 45 | |
| 46 int mm_flags = 0; | 29 int mm_flags = 0; |
| 47 | 30 |
| 48 int mm_support(void) | 31 int mm_support(void) |
| 49 { | 32 { |
| 50 int result = 0; | 33 int result = 0; |
| 51 #if HAVE_ALTIVEC | 34 #if HAVE_ALTIVEC |
| 52 if (has_altivec()) { | 35 if (has_altivec()) { |
| 53 result |= FF_MM_ALTIVEC; | 36 result |= FF_MM_ALTIVEC; |
| 54 } | 37 } |
| 55 #endif /* result */ | 38 #endif /* result */ |
| (...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 126 update 24/06/2003 : Apple released yesterday the G5, with a PPC970. cache line | 109 update 24/06/2003 : Apple released yesterday the G5, with a PPC970. cache line |
| 127 size: 128 bytes. Oups. | 110 size: 128 bytes. Oups. |
| 128 The semantic of dcbz was changed, it always clear 32 bytes. so the function | 111 The semantic of dcbz was changed, it always clear 32 bytes. so the function |
| 129 below will work, but will be slow. So I fixed check_dcbz_effect to use dcbzl, | 112 below will work, but will be slow. So I fixed check_dcbz_effect to use dcbzl, |
| 130 which is defined to clear a cache line (as dcbz before). So we still can | 113 which is defined to clear a cache line (as dcbz before). So we still can |
| 131 distinguish, and use dcbz (32 bytes) or dcbzl (one cache line) as required. | 114 distinguish, and use dcbz (32 bytes) or dcbzl (one cache line) as required. |
| 132 | 115 |
| 133 see <http://developer.apple.com/technotes/tn/tn2087.html> | 116 see <http://developer.apple.com/technotes/tn/tn2087.html> |
| 134 and <http://developer.apple.com/technotes/tn/tn2086.html> | 117 and <http://developer.apple.com/technotes/tn/tn2086.html> |
| 135 */ | 118 */ |
| 136 void clear_blocks_dcbz32_ppc(DCTELEM *blocks) | 119 static void clear_blocks_dcbz32_ppc(DCTELEM *blocks) |
| 137 { | 120 { |
| 138 POWERPC_PERF_DECLARE(powerpc_clear_blocks_dcbz32, 1); | 121 POWERPC_PERF_DECLARE(powerpc_clear_blocks_dcbz32, 1); |
| 139 register int misal = ((unsigned long)blocks & 0x00000010); | 122 register int misal = ((unsigned long)blocks & 0x00000010); |
| 140 register int i = 0; | 123 register int i = 0; |
| 141 POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz32, 1); | 124 POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz32, 1); |
| 142 #if 1 | 125 #if 1 |
| 143 if (misal) { | 126 if (misal) { |
| 144 ((unsigned long*)blocks)[0] = 0L; | 127 ((unsigned long*)blocks)[0] = 0L; |
| 145 ((unsigned long*)blocks)[1] = 0L; | 128 ((unsigned long*)blocks)[1] = 0L; |
| 146 ((unsigned long*)blocks)[2] = 0L; | 129 ((unsigned long*)blocks)[2] = 0L; |
| (...skipping 12 matching lines...) Expand all Loading... |
| 159 } | 142 } |
| 160 #else | 143 #else |
| 161 memset(blocks, 0, sizeof(DCTELEM)*6*64); | 144 memset(blocks, 0, sizeof(DCTELEM)*6*64); |
| 162 #endif | 145 #endif |
| 163 POWERPC_PERF_STOP_COUNT(powerpc_clear_blocks_dcbz32, 1); | 146 POWERPC_PERF_STOP_COUNT(powerpc_clear_blocks_dcbz32, 1); |
| 164 } | 147 } |
| 165 | 148 |
| 166 /* same as above, when dcbzl clear a whole 128B cache line | 149 /* same as above, when dcbzl clear a whole 128B cache line |
| 167 i.e. the PPC970 aka G5 */ | 150 i.e. the PPC970 aka G5 */ |
| 168 #if HAVE_DCBZL | 151 #if HAVE_DCBZL |
| 169 void clear_blocks_dcbz128_ppc(DCTELEM *blocks) | 152 static void clear_blocks_dcbz128_ppc(DCTELEM *blocks) |
| 170 { | 153 { |
| 171 POWERPC_PERF_DECLARE(powerpc_clear_blocks_dcbz128, 1); | 154 POWERPC_PERF_DECLARE(powerpc_clear_blocks_dcbz128, 1); |
| 172 register int misal = ((unsigned long)blocks & 0x0000007f); | 155 register int misal = ((unsigned long)blocks & 0x0000007f); |
| 173 register int i = 0; | 156 register int i = 0; |
| 174 POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz128, 1); | 157 POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz128, 1); |
| 175 #if 1 | 158 #if 1 |
| 176 if (misal) { | 159 if (misal) { |
| 177 // we could probably also optimize this case, | 160 // we could probably also optimize this case, |
| 178 // but there's not much point as the machines | 161 // but there's not much point as the machines |
| 179 // aren't available yet (2003-06-26) | 162 // aren't available yet (2003-06-26) |
| 180 memset(blocks, 0, sizeof(DCTELEM)*6*64); | 163 memset(blocks, 0, sizeof(DCTELEM)*6*64); |
| 181 } | 164 } |
| 182 else | 165 else |
| 183 for ( ; i < sizeof(DCTELEM)*6*64 ; i += 128) { | 166 for ( ; i < sizeof(DCTELEM)*6*64 ; i += 128) { |
| 184 __asm__ volatile("dcbzl %0,%1" : : "b" (blocks), "r" (i) : "memory")
; | 167 __asm__ volatile("dcbzl %0,%1" : : "b" (blocks), "r" (i) : "memory")
; |
| 185 } | 168 } |
| 186 #else | 169 #else |
| 187 memset(blocks, 0, sizeof(DCTELEM)*6*64); | 170 memset(blocks, 0, sizeof(DCTELEM)*6*64); |
| 188 #endif | 171 #endif |
| 189 POWERPC_PERF_STOP_COUNT(powerpc_clear_blocks_dcbz128, 1); | 172 POWERPC_PERF_STOP_COUNT(powerpc_clear_blocks_dcbz128, 1); |
| 190 } | 173 } |
| 191 #else | 174 #else |
| 192 void clear_blocks_dcbz128_ppc(DCTELEM *blocks) | 175 static void clear_blocks_dcbz128_ppc(DCTELEM *blocks) |
| 193 { | 176 { |
| 194 memset(blocks, 0, sizeof(DCTELEM)*6*64); | 177 memset(blocks, 0, sizeof(DCTELEM)*6*64); |
| 195 } | 178 } |
| 196 #endif | 179 #endif |
| 197 | 180 |
| 198 #if HAVE_DCBZL | 181 #if HAVE_DCBZL |
| 199 /* check dcbz report how many bytes are set to 0 by dcbz */ | 182 /* check dcbz report how many bytes are set to 0 by dcbz */ |
| 200 /* update 24/06/2003 : replace dcbz by dcbzl to get | 183 /* update 24/06/2003 : replace dcbz by dcbzl to get |
| 201 the intended effect (Apple "fixed" dcbz) | 184 the intended effect (Apple "fixed" dcbz) |
| 202 unfortunately this cannot be used unless the assembler | 185 unfortunately this cannot be used unless the assembler |
| 203 knows about dcbzl ... */ | 186 knows about dcbzl ... */ |
| 204 long check_dcbzl_effect(void) | 187 static long check_dcbzl_effect(void) |
| 205 { | 188 { |
| 206 register char *fakedata = av_malloc(1024); | 189 register char *fakedata = av_malloc(1024); |
| 207 register char *fakedata_middle; | 190 register char *fakedata_middle; |
| 208 register long zero = 0; | 191 register long zero = 0; |
| 209 register long i = 0; | 192 register long i = 0; |
| 210 long count = 0; | 193 long count = 0; |
| 211 | 194 |
| 212 if (!fakedata) { | 195 if (!fakedata) { |
| 213 return 0L; | 196 return 0L; |
| 214 } | 197 } |
| 215 | 198 |
| 216 fakedata_middle = (fakedata + 512); | 199 fakedata_middle = (fakedata + 512); |
| 217 | 200 |
| 218 memset(fakedata, 0xFF, 1024); | 201 memset(fakedata, 0xFF, 1024); |
| 219 | 202 |
| 220 /* below the constraint "b" seems to mean "Address base register" | 203 /* below the constraint "b" seems to mean "Address base register" |
| 221 in gcc-3.3 / RS/6000 speaks. seems to avoid using r0, so.... */ | 204 in gcc-3.3 / RS/6000 speaks. seems to avoid using r0, so.... */ |
| 222 __asm__ volatile("dcbzl %0, %1" : : "b" (fakedata_middle), "r" (zero)); | 205 __asm__ volatile("dcbzl %0, %1" : : "b" (fakedata_middle), "r" (zero)); |
| 223 | 206 |
| 224 for (i = 0; i < 1024 ; i ++) { | 207 for (i = 0; i < 1024 ; i ++) { |
| 225 if (fakedata[i] == (char)0) | 208 if (fakedata[i] == (char)0) |
| 226 count++; | 209 count++; |
| 227 } | 210 } |
| 228 | 211 |
| 229 av_free(fakedata); | 212 av_free(fakedata); |
| 230 | 213 |
| 231 return count; | 214 return count; |
| 232 } | 215 } |
| 233 #else | 216 #else |
| 234 long check_dcbzl_effect(void) | 217 static long check_dcbzl_effect(void) |
| 235 { | 218 { |
| 236 return 0; | 219 return 0; |
| 237 } | 220 } |
| 238 #endif | 221 #endif |
| 239 | 222 |
| 240 static void prefetch_ppc(void *mem, int stride, int h) | 223 static void prefetch_ppc(void *mem, int stride, int h) |
| 241 { | 224 { |
| 242 register const uint8_t *p = mem; | 225 register const uint8_t *p = mem; |
| 243 do { | 226 do { |
| 244 __asm__ volatile ("dcbt 0,%0" : : "r" (p)); | 227 __asm__ volatile ("dcbt 0,%0" : : "r" (p)); |
| (...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 305 perfdata[j][i][powerpc_data_max] = 0x0000000000000000ULL; | 288 perfdata[j][i][powerpc_data_max] = 0x0000000000000000ULL; |
| 306 perfdata[j][i][powerpc_data_sum] = 0x0000000000000000ULL; | 289 perfdata[j][i][powerpc_data_sum] = 0x0000000000000000ULL; |
| 307 perfdata[j][i][powerpc_data_num] = 0x0000000000000000ULL; | 290 perfdata[j][i][powerpc_data_num] = 0x0000000000000000ULL; |
| 308 } | 291 } |
| 309 } | 292 } |
| 310 } | 293 } |
| 311 #endif /* CONFIG_POWERPC_PERF */ | 294 #endif /* CONFIG_POWERPC_PERF */ |
| 312 } | 295 } |
| 313 #endif /* HAVE_ALTIVEC */ | 296 #endif /* HAVE_ALTIVEC */ |
| 314 } | 297 } |
| OLD | NEW |