Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(220)

Side by Side Diff: patched-ffmpeg-mt/libavcodec/x86/dsputilenc_mmx.c

Issue 789004: ffmpeg roll of source to mar 9 version... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/ffmpeg/
Patch Set: '' Created 10 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 /* 1 /*
2 * MMX optimized DSP utils 2 * MMX optimized DSP utils
3 * Copyright (c) 2000, 2001 Fabrice Bellard 3 * Copyright (c) 2000, 2001 Fabrice Bellard
4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> 4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5 * 5 *
6 * This file is part of FFmpeg. 6 * This file is part of FFmpeg.
7 * 7 *
8 * FFmpeg is free software; you can redistribute it and/or 8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public 9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either 10 * License as published by the Free Software Foundation; either
(...skipping 1045 matching lines...) Expand 10 before | Expand all | Expand 10 after
1056 "movhlps "#a", "#t" \n\t"\ 1056 "movhlps "#a", "#t" \n\t"\
1057 "paddusw "#t", "#a" \n\t"\ 1057 "paddusw "#t", "#a" \n\t"\
1058 "pshuflw $0x0E, "#a", "#t" \n\t"\ 1058 "pshuflw $0x0E, "#a", "#t" \n\t"\
1059 "paddusw "#t", "#a" \n\t"\ 1059 "paddusw "#t", "#a" \n\t"\
1060 "pshuflw $0x01, "#a", "#t" \n\t"\ 1060 "pshuflw $0x01, "#a", "#t" \n\t"\
1061 "paddusw "#t", "#a" \n\t"\ 1061 "paddusw "#t", "#a" \n\t"\
1062 "movd "#a", "#dst" \n\t"\ 1062 "movd "#a", "#dst" \n\t"\
1063 1063
1064 #define HADAMARD8_DIFF_MMX(cpu) \ 1064 #define HADAMARD8_DIFF_MMX(cpu) \
1065 static int hadamard8_diff_##cpu(void *s, uint8_t *src1, uint8_t *src2, int strid e, int h){\ 1065 static int hadamard8_diff_##cpu(void *s, uint8_t *src1, uint8_t *src2, int strid e, int h){\
1066 DECLARE_ALIGNED_8(uint64_t, temp)[13];\ 1066 DECLARE_ALIGNED(8, uint64_t, temp)[13];\
1067 int sum;\ 1067 int sum;\
1068 \ 1068 \
1069 assert(h==8);\ 1069 assert(h==8);\
1070 \ 1070 \
1071 DIFF_PIXELS_4x8(src1, src2, stride, temp[0]);\ 1071 DIFF_PIXELS_4x8(src1, src2, stride, temp[0]);\
1072 \ 1072 \
1073 __asm__ volatile(\ 1073 __asm__ volatile(\
1074 HADAMARD48\ 1074 HADAMARD48\
1075 \ 1075 \
1076 "movq %%mm7, 96(%1) \n\t"\ 1076 "movq %%mm7, 96(%1) \n\t"\
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after
1139 \ 1139 \
1140 : "=r" (sum)\ 1140 : "=r" (sum)\
1141 : "r"(temp)\ 1141 : "r"(temp)\
1142 );\ 1142 );\
1143 return sum&0xFFFF;\ 1143 return sum&0xFFFF;\
1144 }\ 1144 }\
1145 WRAPPER8_16_SQ(hadamard8_diff_##cpu, hadamard8_diff16_##cpu) 1145 WRAPPER8_16_SQ(hadamard8_diff_##cpu, hadamard8_diff16_##cpu)
1146 1146
1147 #define HADAMARD8_DIFF_SSE2(cpu) \ 1147 #define HADAMARD8_DIFF_SSE2(cpu) \
1148 static int hadamard8_diff_##cpu(void *s, uint8_t *src1, uint8_t *src2, int strid e, int h){\ 1148 static int hadamard8_diff_##cpu(void *s, uint8_t *src1, uint8_t *src2, int strid e, int h){\
1149 DECLARE_ALIGNED_16(uint64_t, temp)[4];\ 1149 DECLARE_ALIGNED(16, uint64_t, temp)[4];\
1150 int sum;\ 1150 int sum;\
1151 \ 1151 \
1152 assert(h==8);\ 1152 assert(h==8);\
1153 \ 1153 \
1154 DIFF_PIXELS_8x8(src1, src2, stride, temp[0]);\ 1154 DIFF_PIXELS_8x8(src1, src2, stride, temp[0]);\
1155 \ 1155 \
1156 __asm__ volatile(\ 1156 __asm__ volatile(\
1157 HADAMARD8(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm6, %%xmm7 )\ 1157 HADAMARD8(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm6, %%xmm7 )\
1158 TRANSPOSE8(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm6, %%xmm 7, (%1))\ 1158 TRANSPOSE8(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm6, %%xmm 7, (%1))\
1159 HADAMARD8(%%xmm0, %%xmm5, %%xmm7, %%xmm3, %%xmm6, %%xmm4, %%xmm2, %%xmm1 )\ 1159 HADAMARD8(%%xmm0, %%xmm5, %%xmm7, %%xmm3, %%xmm6, %%xmm4, %%xmm2, %%xmm1 )\
(...skipping 181 matching lines...) Expand 10 before | Expand all | Expand 10 after
1341 #include "dsputil_mmx_qns_template.c" 1341 #include "dsputil_mmx_qns_template.c"
1342 1342
1343 #undef DEF 1343 #undef DEF
1344 #undef SET_RND 1344 #undef SET_RND
1345 #undef SCALE_OFFSET 1345 #undef SCALE_OFFSET
1346 #undef PMULHRW 1346 #undef PMULHRW
1347 #undef PHADDD 1347 #undef PHADDD
1348 #endif //HAVE_SSSE3 1348 #endif //HAVE_SSSE3
1349 1349
1350 1350
1351 void ff_lpc_compute_autocorr_sse2(const int32_t *data, int len, int lag,
1352 double *autoc);
1353
1354
1355 void dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx) 1351 void dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx)
1356 { 1352 {
1357 if (mm_flags & FF_MM_MMX) { 1353 if (mm_flags & FF_MM_MMX) {
1358 const int dct_algo = avctx->dct_algo; 1354 const int dct_algo = avctx->dct_algo;
1359 if(dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX){ 1355 if(dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX){
1360 if(mm_flags & FF_MM_SSE2){ 1356 if(mm_flags & FF_MM_SSE2){
1361 c->fdct = ff_fdct_sse2; 1357 c->fdct = ff_fdct_sse2;
1362 }else if(mm_flags & FF_MM_MMX2){ 1358 }else if(mm_flags & FF_MM_MMX2){
1363 c->fdct = ff_fdct_mmx2; 1359 c->fdct = ff_fdct_mmx2;
1364 }else{ 1360 }else{
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after
1433 if(mm_flags & FF_MM_3DNOW){ 1429 if(mm_flags & FF_MM_3DNOW){
1434 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ 1430 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
1435 c->try_8x8basis= try_8x8basis_3dnow; 1431 c->try_8x8basis= try_8x8basis_3dnow;
1436 } 1432 }
1437 c->add_8x8basis= add_8x8basis_3dnow; 1433 c->add_8x8basis= add_8x8basis_3dnow;
1438 } 1434 }
1439 } 1435 }
1440 1436
1441 dsputil_init_pix_mmx(c, avctx); 1437 dsputil_init_pix_mmx(c, avctx);
1442 } 1438 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698