patched-ffmpeg-mt/libavcodec/x86/vc1dsp_mmx.c - Issue 789004: ffmpeg roll of source to mar 9 version...

Side by Side Diff: patched-ffmpeg-mt/libavcodec/x86/vc1dsp_mmx.c

Issue 789004: ffmpeg roll of source to mar 9 version... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/ffmpeg/

Patch Set: '' Created 10 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 /*	1 /*

2 * VC-1 and WMV3 - DSP functions MMX-optimized	2 * VC-1 and WMV3 - DSP functions MMX-optimized

3 * Copyright (c) 2007 Christophe GISQUET <christophe.gisquet@free.fr>	3 * Copyright (c) 2007 Christophe GISQUET <christophe.gisquet@free.fr>

4 *	4 *

5 * Permission is hereby granted, free of charge, to any person	5 * Permission is hereby granted, free of charge, to any person

6 * obtaining a copy of this software and associated documentation	6 * obtaining a copy of this software and associated documentation

7 * files (the "Software"), to deal in the Software without	7 * files (the "Software"), to deal in the Software without

8 * restriction, including without limitation the rights to use,	8 * restriction, including without limitation the rights to use,

9 * copy, modify, merge, publish, distribute, sublicense, and/or sell	9 * copy, modify, merge, publish, distribute, sublicense, and/or sell

10 * copies of the Software, and to permit persons to whom the	10 * copies of the Software, and to permit persons to whom the

(...skipping 55 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
66 "punpcklbw %%mm0, %%mm"#R0" \n\t" \	66 "punpcklbw %%mm0, %%mm"#R0" \n\t" \

67 "movd (%0,%2), %%mm"#R3" \n\t" \	67 "movd (%0,%2), %%mm"#R3" \n\t" \

68 "psubw %%mm"#R0", %%mm"#R1" \n\t" \	68 "psubw %%mm"#R0", %%mm"#R1" \n\t" \

69 "punpcklbw %%mm0, %%mm"#R3" \n\t" \	69 "punpcklbw %%mm0, %%mm"#R3" \n\t" \

70 "paddw %%mm7, %%mm"#R1" \n\t" \	70 "paddw %%mm7, %%mm"#R1" \n\t" \

71 "psubw %%mm"#R3", %%mm"#R1" \n\t" \	71 "psubw %%mm"#R3", %%mm"#R1" \n\t" \

72 "psraw %4, %%mm"#R1" \n\t" \	72 "psraw %4, %%mm"#R1" \n\t" \

73 "movq %%mm"#R1", "#OFF"(%1) \n\t" \	73 "movq %%mm"#R1", "#OFF"(%1) \n\t" \

74 "add %2, %0 \n\t"	74 "add %2, %0 \n\t"

75	75

76 DECLARE_ALIGNED_16(const uint64_t, ff_pw_9) = 0x0009000900090009ULL;	76 DECLARE_ALIGNED(16, const uint64_t, ff_pw_9) = 0x0009000900090009ULL;

77	77

78 /** Sacrifying mm6 allows to pipeline loads from src */	78 /** Sacrifying mm6 allows to pipeline loads from src */

79 static void vc1_put_ver_16b_shift2_mmx(int16_t *dst,	79 static void vc1_put_ver_16b_shift2_mmx(int16_t *dst,

80 const uint8_t *src, x86_reg stride,	80 const uint8_t *src, x86_reg stride,

81 int rnd, int64_t shift)	81 int rnd, int64_t shift)

82 {	82 {

83 __asm__ volatile(	83 __asm__ volatile(

84 "mov $3, %%"REG_c" \n\t"	84 "mov $3, %%"REG_c" \n\t"

85 LOAD_ROUNDER_MMX("%5")	85 LOAD_ROUNDER_MMX("%5")

86 "movq "MANGLE(ff_pw_9)", %%mm6 \n\t"	86 "movq "MANGLE(ff_pw_9)", %%mm6 \n\t"

(...skipping 348 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
435 __asm__ volatile(\	435 __asm__ volatile(\

436 "pxor %%mm0, %%mm0 \n\t"\	436 "pxor %%mm0, %%mm0 \n\t"\

437 ::: "memory"\	437 ::: "memory"\

438 );\	438 );\

439 \	439 \

440 if (vmode) { /* Vertical filter to apply */\	440 if (vmode) { /* Vertical filter to apply */\

441 if (hmode) { /* Horizontal filter to apply, output to tmp */\	441 if (hmode) { /* Horizontal filter to apply, output to tmp */\

442 static const int shift_value[] = { 0, 5, 1, 5 };\	442 static const int shift_value[] = { 0, 5, 1, 5 };\

443 int shift = (shift_value[hmode]+shift_value[vmode])>>1; \	443 int shift = (shift_value[hmode]+shift_value[vmode])>>1; \

444 int r;\	444 int r;\

445 DECLARE_ALIGNED_16(int16_t, tmp)[12*8];\	445 DECLARE_ALIGNED(16, int16_t, tmp)[12*8];\

446 \	446 \

447 r = (1<<(shift-1)) + rnd-1;\	447 r = (1<<(shift-1)) + rnd-1;\

448 vc1_put_shift_ver_16bits[vmode](tmp, src-1, stride, r, shift);\	448 vc1_put_shift_ver_16bits[vmode](tmp, src-1, stride, r, shift);\

449 \	449 \

450 vc1_put_shift_hor_16bits[hmode](dst, stride, tmp+1, 64-rnd);\	450 vc1_put_shift_hor_16bits[hmode](dst, stride, tmp+1, 64-rnd);\

451 return;\	451 return;\

452 }\	452 }\

453 else { /* No horizontal filter, output 8 lines to dst */\	453 else { /* No horizontal filter, output 8 lines to dst */\

454 vc1_put_shift_8bits[vmode](dst, src, stride, 1-rnd, stride);\	454 vc1_put_shift_8bits[vmode](dst, src, stride, 1-rnd, stride);\

455 return;\	455 return;\

456 }\	456 }\

457 }\	457 }\

458 \	458 \

459 /* Horizontal mode with no vertical mode */\	459 /* Horizontal mode with no vertical mode */\

460 vc1_put_shift_8bits[hmode](dst, src, stride, rnd, 1);\	460 vc1_put_shift_8bits[hmode](dst, src, stride, rnd, 1);\

461 }	461 }

462	462

463 VC1_MSPEL_MC(put_)	463 VC1_MSPEL_MC(put_)

464 VC1_MSPEL_MC(avg_)	464 VC1_MSPEL_MC(avg_)

465	465

466 void ff_put_vc1_mspel_mc00_mmx(uint8_t dst, const uint8_t src, int stride, int rnd);

467 void ff_avg_vc1_mspel_mc00_mmx2(uint8_t dst, const uint8_t src, int stride, in t rnd);

468

469 /** Macro to ease bicubic filter interpolation functions declarations */	466 /** Macro to ease bicubic filter interpolation functions declarations */

470 #define DECLARE_FUNCTION(a, b) \	467 #define DECLARE_FUNCTION(a, b) \

471 static void put_vc1_mspel_mc ## a ## b ## _mmx(uint8_t dst, const uint8_t src, int stride, int rnd) { \	468 static void put_vc1_mspel_mc ## a ## b ## _mmx(uint8_t dst, const uint8_t src, int stride, int rnd) { \

472 put_vc1_mspel_mc(dst, src, stride, a, b, rnd); \	469 put_vc1_mspel_mc(dst, src, stride, a, b, rnd); \

473 }\	470 }\

474 static void avg_vc1_mspel_mc ## a ## b ## _mmx2(uint8_t dst, const uint8_t src , int stride, int rnd) { \	471 static void avg_vc1_mspel_mc ## a ## b ## _mmx2(uint8_t dst, const uint8_t src , int stride, int rnd) { \

475 avg_vc1_mspel_mc(dst, src, stride, a, b, rnd); \	472 avg_vc1_mspel_mc(dst, src, stride, a, b, rnd); \

476 }	473 }

477	474

478 DECLARE_FUNCTION(0, 1)	475 DECLARE_FUNCTION(0, 1)

(...skipping 256 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
735 dsp->avg_vc1_mspel_pixels_tab[ 7] = avg_vc1_mspel_mc31_mmx2;	732 dsp->avg_vc1_mspel_pixels_tab[ 7] = avg_vc1_mspel_mc31_mmx2;

736 dsp->avg_vc1_mspel_pixels_tab[11] = avg_vc1_mspel_mc32_mmx2;	733 dsp->avg_vc1_mspel_pixels_tab[11] = avg_vc1_mspel_mc32_mmx2;

737 dsp->avg_vc1_mspel_pixels_tab[15] = avg_vc1_mspel_mc33_mmx2;	734 dsp->avg_vc1_mspel_pixels_tab[15] = avg_vc1_mspel_mc33_mmx2;

738	735

739 dsp->vc1_inv_trans_8x8_dc = vc1_inv_trans_8x8_dc_mmx2;	736 dsp->vc1_inv_trans_8x8_dc = vc1_inv_trans_8x8_dc_mmx2;

740 dsp->vc1_inv_trans_4x8_dc = vc1_inv_trans_4x8_dc_mmx2;	737 dsp->vc1_inv_trans_4x8_dc = vc1_inv_trans_4x8_dc_mmx2;

741 dsp->vc1_inv_trans_8x4_dc = vc1_inv_trans_8x4_dc_mmx2;	738 dsp->vc1_inv_trans_8x4_dc = vc1_inv_trans_8x4_dc_mmx2;

742 dsp->vc1_inv_trans_4x4_dc = vc1_inv_trans_4x4_dc_mmx2;	739 dsp->vc1_inv_trans_4x4_dc = vc1_inv_trans_4x4_dc_mmx2;

743 }	740 }

744 }	741 }

OLD	NEW