OLD | NEW |
1 /* | 1 /* |
2 * MMX optimized DSP utils | 2 * MMX optimized DSP utils |
3 * Copyright (c) 2000, 2001 Fabrice Bellard | 3 * Copyright (c) 2000, 2001 Fabrice Bellard |
4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> | 4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> |
5 * | 5 * |
6 * This file is part of FFmpeg. | 6 * This file is part of FFmpeg. |
7 * | 7 * |
8 * FFmpeg is free software; you can redistribute it and/or | 8 * FFmpeg is free software; you can redistribute it and/or |
9 * modify it under the terms of the GNU Lesser General Public | 9 * modify it under the terms of the GNU Lesser General Public |
10 * License as published by the Free Software Foundation; either | 10 * License as published by the Free Software Foundation; either |
(...skipping 1045 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1056 "movhlps "#a", "#t" \n\t"\ | 1056 "movhlps "#a", "#t" \n\t"\ |
1057 "paddusw "#t", "#a" \n\t"\ | 1057 "paddusw "#t", "#a" \n\t"\ |
1058 "pshuflw $0x0E, "#a", "#t" \n\t"\ | 1058 "pshuflw $0x0E, "#a", "#t" \n\t"\ |
1059 "paddusw "#t", "#a" \n\t"\ | 1059 "paddusw "#t", "#a" \n\t"\ |
1060 "pshuflw $0x01, "#a", "#t" \n\t"\ | 1060 "pshuflw $0x01, "#a", "#t" \n\t"\ |
1061 "paddusw "#t", "#a" \n\t"\ | 1061 "paddusw "#t", "#a" \n\t"\ |
1062 "movd "#a", "#dst" \n\t"\ | 1062 "movd "#a", "#dst" \n\t"\ |
1063 | 1063 |
1064 #define HADAMARD8_DIFF_MMX(cpu) \ | 1064 #define HADAMARD8_DIFF_MMX(cpu) \ |
1065 static int hadamard8_diff_##cpu(void *s, uint8_t *src1, uint8_t *src2, int strid
e, int h){\ | 1065 static int hadamard8_diff_##cpu(void *s, uint8_t *src1, uint8_t *src2, int strid
e, int h){\ |
1066 DECLARE_ALIGNED_8(uint64_t, temp)[13];\ | 1066 DECLARE_ALIGNED(8, uint64_t, temp)[13];\ |
1067 int sum;\ | 1067 int sum;\ |
1068 \ | 1068 \ |
1069 assert(h==8);\ | 1069 assert(h==8);\ |
1070 \ | 1070 \ |
1071 DIFF_PIXELS_4x8(src1, src2, stride, temp[0]);\ | 1071 DIFF_PIXELS_4x8(src1, src2, stride, temp[0]);\ |
1072 \ | 1072 \ |
1073 __asm__ volatile(\ | 1073 __asm__ volatile(\ |
1074 HADAMARD48\ | 1074 HADAMARD48\ |
1075 \ | 1075 \ |
1076 "movq %%mm7, 96(%1) \n\t"\ | 1076 "movq %%mm7, 96(%1) \n\t"\ |
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1139 \ | 1139 \ |
1140 : "=r" (sum)\ | 1140 : "=r" (sum)\ |
1141 : "r"(temp)\ | 1141 : "r"(temp)\ |
1142 );\ | 1142 );\ |
1143 return sum&0xFFFF;\ | 1143 return sum&0xFFFF;\ |
1144 }\ | 1144 }\ |
1145 WRAPPER8_16_SQ(hadamard8_diff_##cpu, hadamard8_diff16_##cpu) | 1145 WRAPPER8_16_SQ(hadamard8_diff_##cpu, hadamard8_diff16_##cpu) |
1146 | 1146 |
1147 #define HADAMARD8_DIFF_SSE2(cpu) \ | 1147 #define HADAMARD8_DIFF_SSE2(cpu) \ |
1148 static int hadamard8_diff_##cpu(void *s, uint8_t *src1, uint8_t *src2, int strid
e, int h){\ | 1148 static int hadamard8_diff_##cpu(void *s, uint8_t *src1, uint8_t *src2, int strid
e, int h){\ |
1149 DECLARE_ALIGNED_16(uint64_t, temp)[4];\ | 1149 DECLARE_ALIGNED(16, uint64_t, temp)[4];\ |
1150 int sum;\ | 1150 int sum;\ |
1151 \ | 1151 \ |
1152 assert(h==8);\ | 1152 assert(h==8);\ |
1153 \ | 1153 \ |
1154 DIFF_PIXELS_8x8(src1, src2, stride, temp[0]);\ | 1154 DIFF_PIXELS_8x8(src1, src2, stride, temp[0]);\ |
1155 \ | 1155 \ |
1156 __asm__ volatile(\ | 1156 __asm__ volatile(\ |
1157 HADAMARD8(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm6, %%xmm7
)\ | 1157 HADAMARD8(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm6, %%xmm7
)\ |
1158 TRANSPOSE8(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm6, %%xmm
7, (%1))\ | 1158 TRANSPOSE8(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm6, %%xmm
7, (%1))\ |
1159 HADAMARD8(%%xmm0, %%xmm5, %%xmm7, %%xmm3, %%xmm6, %%xmm4, %%xmm2, %%xmm1
)\ | 1159 HADAMARD8(%%xmm0, %%xmm5, %%xmm7, %%xmm3, %%xmm6, %%xmm4, %%xmm2, %%xmm1
)\ |
(...skipping 181 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1341 #include "dsputil_mmx_qns_template.c" | 1341 #include "dsputil_mmx_qns_template.c" |
1342 | 1342 |
1343 #undef DEF | 1343 #undef DEF |
1344 #undef SET_RND | 1344 #undef SET_RND |
1345 #undef SCALE_OFFSET | 1345 #undef SCALE_OFFSET |
1346 #undef PMULHRW | 1346 #undef PMULHRW |
1347 #undef PHADDD | 1347 #undef PHADDD |
1348 #endif //HAVE_SSSE3 | 1348 #endif //HAVE_SSSE3 |
1349 | 1349 |
1350 | 1350 |
1351 void ff_lpc_compute_autocorr_sse2(const int32_t *data, int len, int lag, | |
1352 double *autoc); | |
1353 | |
1354 | |
1355 void dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx) | 1351 void dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx) |
1356 { | 1352 { |
1357 if (mm_flags & FF_MM_MMX) { | 1353 if (mm_flags & FF_MM_MMX) { |
1358 const int dct_algo = avctx->dct_algo; | 1354 const int dct_algo = avctx->dct_algo; |
1359 if(dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX){ | 1355 if(dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX){ |
1360 if(mm_flags & FF_MM_SSE2){ | 1356 if(mm_flags & FF_MM_SSE2){ |
1361 c->fdct = ff_fdct_sse2; | 1357 c->fdct = ff_fdct_sse2; |
1362 }else if(mm_flags & FF_MM_MMX2){ | 1358 }else if(mm_flags & FF_MM_MMX2){ |
1363 c->fdct = ff_fdct_mmx2; | 1359 c->fdct = ff_fdct_mmx2; |
1364 }else{ | 1360 }else{ |
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1433 if(mm_flags & FF_MM_3DNOW){ | 1429 if(mm_flags & FF_MM_3DNOW){ |
1434 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ | 1430 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ |
1435 c->try_8x8basis= try_8x8basis_3dnow; | 1431 c->try_8x8basis= try_8x8basis_3dnow; |
1436 } | 1432 } |
1437 c->add_8x8basis= add_8x8basis_3dnow; | 1433 c->add_8x8basis= add_8x8basis_3dnow; |
1438 } | 1434 } |
1439 } | 1435 } |
1440 | 1436 |
1441 dsputil_init_pix_mmx(c, avctx); | 1437 dsputil_init_pix_mmx(c, avctx); |
1442 } | 1438 } |
OLD | NEW |