| OLD | NEW |
| 1 /* | 1 /* |
| 2 * MMX optimized DSP utils | 2 * MMX optimized DSP utils |
| 3 * Copyright (c) 2000, 2001 Fabrice Bellard | 3 * Copyright (c) 2000, 2001 Fabrice Bellard |
| 4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> | 4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> |
| 5 * | 5 * |
| 6 * This file is part of FFmpeg. | 6 * This file is part of FFmpeg. |
| 7 * | 7 * |
| 8 * FFmpeg is free software; you can redistribute it and/or | 8 * FFmpeg is free software; you can redistribute it and/or |
| 9 * modify it under the terms of the GNU Lesser General Public | 9 * modify it under the terms of the GNU Lesser General Public |
| 10 * License as published by the Free Software Foundation; either | 10 * License as published by the Free Software Foundation; either |
| 11 * version 2.1 of the License, or (at your option) any later version. | 11 * version 2.1 of the License, or (at your option) any later version. |
| 12 * | 12 * |
| 13 * FFmpeg is distributed in the hope that it will be useful, | 13 * FFmpeg is distributed in the hope that it will be useful, |
| 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of | 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 16 * Lesser General Public License for more details. | 16 * Lesser General Public License for more details. |
| 17 * | 17 * |
| 18 * You should have received a copy of the GNU Lesser General Public | 18 * You should have received a copy of the GNU Lesser General Public |
| 19 * License along with FFmpeg; if not, write to the Free Software | 19 * License along with FFmpeg; if not, write to the Free Software |
| 20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | 20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 21 * | 21 * |
| 22 * MMX optimization by Nick Kurshev <nickols_k@mail.ru> | 22 * MMX optimization by Nick Kurshev <nickols_k@mail.ru> |
| 23 */ | 23 */ |
| 24 | 24 |
| 25 #include "libavutil/cpu.h" |
| 25 #include "libavutil/x86_cpu.h" | 26 #include "libavutil/x86_cpu.h" |
| 26 #include "libavcodec/dsputil.h" | 27 #include "libavcodec/dsputil.h" |
| 27 #include "libavcodec/mpegvideo.h" | 28 #include "libavcodec/mpegvideo.h" |
| 28 #include "libavcodec/mathops.h" | 29 #include "libavcodec/mathops.h" |
| 29 #include "dsputil_mmx.h" | 30 #include "dsputil_mmx.h" |
| 30 | 31 |
| 31 | 32 |
| 32 static void get_pixels_mmx(DCTELEM *block, const uint8_t *pixels, int line_size) | 33 static void get_pixels_mmx(DCTELEM *block, const uint8_t *pixels, int line_size) |
| 33 { | 34 { |
| 34 __asm__ volatile( | 35 __asm__ volatile( |
| (...skipping 1308 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1343 #undef DEF | 1344 #undef DEF |
| 1344 #undef SET_RND | 1345 #undef SET_RND |
| 1345 #undef SCALE_OFFSET | 1346 #undef SCALE_OFFSET |
| 1346 #undef PMULHRW | 1347 #undef PMULHRW |
| 1347 #undef PHADDD | 1348 #undef PHADDD |
| 1348 #endif //HAVE_SSSE3 | 1349 #endif //HAVE_SSSE3 |
| 1349 | 1350 |
| 1350 | 1351 |
| 1351 void dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx) | 1352 void dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx) |
| 1352 { | 1353 { |
| 1353 if (mm_flags & FF_MM_MMX) { | 1354 int mm_flags = av_get_cpu_flags(); |
| 1355 |
| 1356 if (mm_flags & AV_CPU_FLAG_MMX) { |
| 1354 const int dct_algo = avctx->dct_algo; | 1357 const int dct_algo = avctx->dct_algo; |
| 1355 if(dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX){ | 1358 if(dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX){ |
| 1356 if(mm_flags & FF_MM_SSE2){ | 1359 if(mm_flags & AV_CPU_FLAG_SSE2){ |
| 1357 c->fdct = ff_fdct_sse2; | 1360 c->fdct = ff_fdct_sse2; |
| 1358 }else if(mm_flags & FF_MM_MMX2){ | 1361 }else if(mm_flags & AV_CPU_FLAG_MMX2){ |
| 1359 c->fdct = ff_fdct_mmx2; | 1362 c->fdct = ff_fdct_mmx2; |
| 1360 }else{ | 1363 }else{ |
| 1361 c->fdct = ff_fdct_mmx; | 1364 c->fdct = ff_fdct_mmx; |
| 1362 } | 1365 } |
| 1363 } | 1366 } |
| 1364 | 1367 |
| 1365 c->get_pixels = get_pixels_mmx; | 1368 c->get_pixels = get_pixels_mmx; |
| 1366 c->diff_pixels = diff_pixels_mmx; | 1369 c->diff_pixels = diff_pixels_mmx; |
| 1367 c->pix_sum = pix_sum16_mmx; | 1370 c->pix_sum = pix_sum16_mmx; |
| 1368 | 1371 |
| 1369 c->diff_bytes= diff_bytes_mmx; | 1372 c->diff_bytes= diff_bytes_mmx; |
| 1370 c->sum_abs_dctelem= sum_abs_dctelem_mmx; | 1373 c->sum_abs_dctelem= sum_abs_dctelem_mmx; |
| 1371 | 1374 |
| 1372 c->hadamard8_diff[0]= hadamard8_diff16_mmx; | 1375 c->hadamard8_diff[0]= hadamard8_diff16_mmx; |
| 1373 c->hadamard8_diff[1]= hadamard8_diff_mmx; | 1376 c->hadamard8_diff[1]= hadamard8_diff_mmx; |
| 1374 | 1377 |
| 1375 c->pix_norm1 = pix_norm1_mmx; | 1378 c->pix_norm1 = pix_norm1_mmx; |
| 1376 c->sse[0] = (mm_flags & FF_MM_SSE2) ? sse16_sse2 : sse16_mmx; | 1379 c->sse[0] = (mm_flags & AV_CPU_FLAG_SSE2) ? sse16_sse2 : sse16_mmx; |
| 1377 c->sse[1] = sse8_mmx; | 1380 c->sse[1] = sse8_mmx; |
| 1378 c->vsad[4]= vsad_intra16_mmx; | 1381 c->vsad[4]= vsad_intra16_mmx; |
| 1379 | 1382 |
| 1380 c->nsse[0] = nsse16_mmx; | 1383 c->nsse[0] = nsse16_mmx; |
| 1381 c->nsse[1] = nsse8_mmx; | 1384 c->nsse[1] = nsse8_mmx; |
| 1382 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ | 1385 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ |
| 1383 c->vsad[0] = vsad16_mmx; | 1386 c->vsad[0] = vsad16_mmx; |
| 1384 } | 1387 } |
| 1385 | 1388 |
| 1386 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ | 1389 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ |
| 1387 c->try_8x8basis= try_8x8basis_mmx; | 1390 c->try_8x8basis= try_8x8basis_mmx; |
| 1388 } | 1391 } |
| 1389 c->add_8x8basis= add_8x8basis_mmx; | 1392 c->add_8x8basis= add_8x8basis_mmx; |
| 1390 | 1393 |
| 1391 c->ssd_int8_vs_int16 = ssd_int8_vs_int16_mmx; | 1394 c->ssd_int8_vs_int16 = ssd_int8_vs_int16_mmx; |
| 1392 | 1395 |
| 1393 | 1396 |
| 1394 if (mm_flags & FF_MM_MMX2) { | 1397 if (mm_flags & AV_CPU_FLAG_MMX2) { |
| 1395 c->sum_abs_dctelem= sum_abs_dctelem_mmx2; | 1398 c->sum_abs_dctelem= sum_abs_dctelem_mmx2; |
| 1396 c->hadamard8_diff[0]= hadamard8_diff16_mmx2; | 1399 c->hadamard8_diff[0]= hadamard8_diff16_mmx2; |
| 1397 c->hadamard8_diff[1]= hadamard8_diff_mmx2; | 1400 c->hadamard8_diff[1]= hadamard8_diff_mmx2; |
| 1398 c->vsad[4]= vsad_intra16_mmx2; | 1401 c->vsad[4]= vsad_intra16_mmx2; |
| 1399 | 1402 |
| 1400 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ | 1403 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ |
| 1401 c->vsad[0] = vsad16_mmx2; | 1404 c->vsad[0] = vsad16_mmx2; |
| 1402 } | 1405 } |
| 1403 | 1406 |
| 1404 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_mmx2; | 1407 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_mmx2; |
| 1405 } | 1408 } |
| 1406 | 1409 |
| 1407 if(mm_flags & FF_MM_SSE2){ | 1410 if(mm_flags & AV_CPU_FLAG_SSE2){ |
| 1408 c->get_pixels = get_pixels_sse2; | 1411 c->get_pixels = get_pixels_sse2; |
| 1409 c->sum_abs_dctelem= sum_abs_dctelem_sse2; | 1412 c->sum_abs_dctelem= sum_abs_dctelem_sse2; |
| 1410 c->hadamard8_diff[0]= hadamard8_diff16_sse2; | 1413 c->hadamard8_diff[0]= hadamard8_diff16_sse2; |
| 1411 c->hadamard8_diff[1]= hadamard8_diff_sse2; | 1414 c->hadamard8_diff[1]= hadamard8_diff_sse2; |
| 1412 } | 1415 } |
| 1413 | 1416 |
| 1414 if (CONFIG_LPC && mm_flags & (FF_MM_SSE2|FF_MM_SSE2SLOW)) { | 1417 if (CONFIG_LPC && mm_flags & (AV_CPU_FLAG_SSE2|AV_CPU_FLAG_SSE2SLOW)) { |
| 1415 c->lpc_compute_autocorr = ff_lpc_compute_autocorr_sse2; | 1418 c->lpc_compute_autocorr = ff_lpc_compute_autocorr_sse2; |
| 1416 } | 1419 } |
| 1417 | 1420 |
| 1418 #if HAVE_SSSE3 | 1421 #if HAVE_SSSE3 |
| 1419 if(mm_flags & FF_MM_SSSE3){ | 1422 if(mm_flags & AV_CPU_FLAG_SSSE3){ |
| 1420 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ | 1423 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ |
| 1421 c->try_8x8basis= try_8x8basis_ssse3; | 1424 c->try_8x8basis= try_8x8basis_ssse3; |
| 1422 } | 1425 } |
| 1423 c->add_8x8basis= add_8x8basis_ssse3; | 1426 c->add_8x8basis= add_8x8basis_ssse3; |
| 1424 c->sum_abs_dctelem= sum_abs_dctelem_ssse3; | 1427 c->sum_abs_dctelem= sum_abs_dctelem_ssse3; |
| 1425 c->hadamard8_diff[0]= hadamard8_diff16_ssse3; | 1428 c->hadamard8_diff[0]= hadamard8_diff16_ssse3; |
| 1426 c->hadamard8_diff[1]= hadamard8_diff_ssse3; | 1429 c->hadamard8_diff[1]= hadamard8_diff_ssse3; |
| 1427 } | 1430 } |
| 1428 #endif | 1431 #endif |
| 1429 | 1432 |
| 1430 if(mm_flags & FF_MM_3DNOW){ | 1433 if(mm_flags & AV_CPU_FLAG_3DNOW){ |
| 1431 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ | 1434 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ |
| 1432 c->try_8x8basis= try_8x8basis_3dnow; | 1435 c->try_8x8basis= try_8x8basis_3dnow; |
| 1433 } | 1436 } |
| 1434 c->add_8x8basis= add_8x8basis_3dnow; | 1437 c->add_8x8basis= add_8x8basis_3dnow; |
| 1435 } | 1438 } |
| 1436 } | 1439 } |
| 1437 | 1440 |
| 1438 dsputil_init_pix_mmx(c, avctx); | 1441 dsputil_init_pix_mmx(c, avctx); |
| 1439 } | 1442 } |
| OLD | NEW |