| Index: source/libvpx/vp8/encoder/x86/x86_csystemdependent.c | 
| =================================================================== | 
| --- source/libvpx/vp8/encoder/x86/x86_csystemdependent.c	(revision 96967) | 
| +++ source/libvpx/vp8/encoder/x86/x86_csystemdependent.c	(working copy) | 
| @@ -16,7 +16,7 @@ | 
|  | 
|  | 
| #if HAVE_MMX | 
| -static void short_fdct8x4_mmx(short *input, short *output, int pitch) | 
| +void vp8_short_fdct8x4_mmx(short *input, short *output, int pitch) | 
| { | 
| vp8_short_fdct4x4_mmx(input,   output,    pitch); | 
| vp8_short_fdct4x4_mmx(input + 4, output + 16, pitch); | 
| @@ -26,7 +26,7 @@ | 
| short *qcoeff_ptr, short *dequant_ptr, | 
| short *scan_mask, short *round_ptr, | 
| short *quant_ptr, short *dqcoeff_ptr); | 
| -static void fast_quantize_b_mmx(BLOCK *b, BLOCKD *d) | 
| +void vp8_fast_quantize_b_mmx(BLOCK *b, BLOCKD *d) | 
| { | 
| short *scan_mask   = vp8_default_zig_zag_mask;//d->scan_order_mask_ptr; | 
| short *coeff_ptr   = b->coeff; | 
| @@ -51,7 +51,7 @@ | 
| } | 
|  | 
| int vp8_mbblock_error_mmx_impl(short *coeff_ptr, short *dcoef_ptr, int dc); | 
| -static int mbblock_error_mmx(MACROBLOCK *mb, int dc) | 
| +int vp8_mbblock_error_mmx(MACROBLOCK *mb, int dc) | 
| { | 
| short *coeff_ptr =  mb->block[0].coeff; | 
| short *dcoef_ptr =  mb->e_mbd.block[0].dqcoeff; | 
| @@ -59,7 +59,7 @@ | 
| } | 
|  | 
| int vp8_mbuverror_mmx_impl(short *s_ptr, short *d_ptr); | 
| -static int mbuverror_mmx(MACROBLOCK *mb) | 
| +int vp8_mbuverror_mmx(MACROBLOCK *mb) | 
| { | 
| short *s_ptr = &mb->coeff[256]; | 
| short *d_ptr = &mb->e_mbd.dqcoeff[256]; | 
| @@ -69,7 +69,7 @@ | 
| void vp8_subtract_b_mmx_impl(unsigned char *z,  int src_stride, | 
| short *diff, unsigned char *predictor, | 
| int pitch); | 
| -static void subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch) | 
| +void vp8_subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch) | 
| { | 
| unsigned char *z = *(be->base_src) + be->src; | 
| unsigned int  src_stride = be->src_stride; | 
| @@ -81,33 +81,8 @@ | 
| #endif | 
|  | 
| #if HAVE_SSE2 | 
| -int vp8_fast_quantize_b_impl_sse2(short *coeff_ptr, | 
| -                                 short *qcoeff_ptr, short *dequant_ptr, | 
| -                                 const short *inv_scan_order, short *round_ptr, | 
| -                                 short *quant_ptr, short *dqcoeff_ptr); | 
| -static void fast_quantize_b_sse2(BLOCK *b, BLOCKD *d) | 
| -{ | 
| -    short *scan_mask   = vp8_default_zig_zag_mask;//d->scan_order_mask_ptr; | 
| -    short *coeff_ptr   = b->coeff; | 
| -    short *round_ptr   = b->round; | 
| -    short *quant_ptr   = b->quant_fast; | 
| -    short *qcoeff_ptr  = d->qcoeff; | 
| -    short *dqcoeff_ptr = d->dqcoeff; | 
| -    short *dequant_ptr = d->dequant; | 
| - | 
| -    d->eob = vp8_fast_quantize_b_impl_sse2( | 
| -                 coeff_ptr, | 
| -                 qcoeff_ptr, | 
| -                 dequant_ptr, | 
| -                 vp8_default_inv_zig_zag, | 
| -                 round_ptr, | 
| -                 quant_ptr, | 
| -                 dqcoeff_ptr | 
| -             ); | 
| -} | 
| - | 
| int vp8_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr, int dc); | 
| -static int mbblock_error_xmm(MACROBLOCK *mb, int dc) | 
| +int vp8_mbblock_error_xmm(MACROBLOCK *mb, int dc) | 
| { | 
| short *coeff_ptr =  mb->block[0].coeff; | 
| short *dcoef_ptr =  mb->e_mbd.block[0].dqcoeff; | 
| @@ -115,7 +90,7 @@ | 
| } | 
|  | 
| int vp8_mbuverror_xmm_impl(short *s_ptr, short *d_ptr); | 
| -static int mbuverror_xmm(MACROBLOCK *mb) | 
| +int vp8_mbuverror_xmm(MACROBLOCK *mb) | 
| { | 
| short *s_ptr = &mb->coeff[256]; | 
| short *d_ptr = &mb->e_mbd.dqcoeff[256]; | 
| @@ -125,7 +100,7 @@ | 
| void vp8_subtract_b_sse2_impl(unsigned char *z,  int src_stride, | 
| short *diff, unsigned char *predictor, | 
| int pitch); | 
| -static void subtract_b_sse2(BLOCK *be, BLOCKD *bd, int pitch) | 
| +void vp8_subtract_b_sse2(BLOCK *be, BLOCKD *bd, int pitch) | 
| { | 
| unsigned char *z = *(be->base_src) + be->src; | 
| unsigned int  src_stride = be->src_stride; | 
| @@ -137,22 +112,7 @@ | 
| #endif | 
|  | 
| #if HAVE_SSSE3 | 
| -int vp8_fast_quantize_b_impl_ssse3(short *coeff_ptr, | 
| -                                 short *qcoeff_ptr, short *dequant_ptr, | 
| -                                 short *round_ptr, | 
| -                                 short *quant_ptr, short *dqcoeff_ptr); | 
| -static void fast_quantize_b_ssse3(BLOCK *b, BLOCKD *d) | 
| -{ | 
| -    d->eob = vp8_fast_quantize_b_impl_ssse3( | 
| -                    b->coeff, | 
| -                    d->qcoeff, | 
| -                    d->dequant, | 
| -                    b->round, | 
| -                    b->quant_fast, | 
| -                    d->dqcoeff | 
| -               ); | 
| -} | 
| -#if CONFIG_PSNR | 
| +#if CONFIG_INTERNAL_STATS | 
| #if ARCH_X86_64 | 
| typedef void ssimpf | 
| ( | 
| @@ -178,12 +138,6 @@ | 
| { | 
| #if CONFIG_RUNTIME_CPU_DETECT | 
| int flags = x86_simd_caps(); | 
| -    int mmx_enabled = flags & HAS_MMX; | 
| -    int xmm_enabled = flags & HAS_SSE; | 
| -    int wmt_enabled = flags & HAS_SSE2; | 
| -    int SSE3Enabled = flags & HAS_SSE3; | 
| -    int SSSE3Enabled = flags & HAS_SSSE3; | 
| -    int SSE4_1Enabled = flags & HAS_SSE4_1; | 
|  | 
| /* Note: | 
| * | 
| @@ -194,7 +148,7 @@ | 
|  | 
| /* Override default functions with fastest ones for this CPU. */ | 
| #if HAVE_MMX | 
| -    if (mmx_enabled) | 
| +    if (flags & HAS_MMX) | 
| { | 
| cpi->rtcd.variance.sad16x16              = vp8_sad16x16_mmx; | 
| cpi->rtcd.variance.sad16x8               = vp8_sad16x8_mmx; | 
| @@ -221,37 +175,35 @@ | 
| cpi->rtcd.variance.mse16x16              = vp8_mse16x16_mmx; | 
| cpi->rtcd.variance.getmbss               = vp8_get_mb_ss_mmx; | 
|  | 
| -        cpi->rtcd.variance.get16x16prederror     = vp8_get16x16pred_error_mmx; | 
| -        cpi->rtcd.variance.get8x8var             = vp8_get8x8var_mmx; | 
| -        cpi->rtcd.variance.get16x16var           = vp8_get16x16var_mmx; | 
| cpi->rtcd.variance.get4x4sse_cs          = vp8_get4x4sse_cs_mmx; | 
|  | 
| cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_mmx; | 
| -        cpi->rtcd.fdct.short8x4                  = short_fdct8x4_mmx; | 
| +        cpi->rtcd.fdct.short8x4                  = vp8_short_fdct8x4_mmx; | 
| cpi->rtcd.fdct.fast4x4                   = vp8_short_fdct4x4_mmx; | 
| -        cpi->rtcd.fdct.fast8x4                   = short_fdct8x4_mmx; | 
| +        cpi->rtcd.fdct.fast8x4                   = vp8_short_fdct8x4_mmx; | 
|  | 
| cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_c; | 
|  | 
| cpi->rtcd.encodemb.berr                  = vp8_block_error_mmx; | 
| -        cpi->rtcd.encodemb.mberr                 = mbblock_error_mmx; | 
| -        cpi->rtcd.encodemb.mbuverr               = mbuverror_mmx; | 
| -        cpi->rtcd.encodemb.subb                  = subtract_b_mmx; | 
| +        cpi->rtcd.encodemb.mberr                 = vp8_mbblock_error_mmx; | 
| +        cpi->rtcd.encodemb.mbuverr               = vp8_mbuverror_mmx; | 
| +        cpi->rtcd.encodemb.subb                  = vp8_subtract_b_mmx; | 
| cpi->rtcd.encodemb.submby                = vp8_subtract_mby_mmx; | 
| cpi->rtcd.encodemb.submbuv               = vp8_subtract_mbuv_mmx; | 
|  | 
| -        /*cpi->rtcd.quantize.fastquantb            = fast_quantize_b_mmx;*/ | 
| +        /*cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_mmx;*/ | 
| } | 
| #endif | 
|  | 
| #if HAVE_SSE2 | 
| -    if (wmt_enabled) | 
| +    if (flags & HAS_SSE2) | 
| { | 
| cpi->rtcd.variance.sad16x16              = vp8_sad16x16_wmt; | 
| cpi->rtcd.variance.sad16x8               = vp8_sad16x8_wmt; | 
| cpi->rtcd.variance.sad8x16               = vp8_sad8x16_wmt; | 
| cpi->rtcd.variance.sad8x8                = vp8_sad8x8_wmt; | 
| cpi->rtcd.variance.sad4x4                = vp8_sad4x4_wmt; | 
| +        cpi->rtcd.variance.copy32xn              = vp8_copy32xn_sse2; | 
|  | 
| cpi->rtcd.variance.var4x4                = vp8_variance4x4_wmt; | 
| cpi->rtcd.variance.var8x8                = vp8_variance8x8_wmt; | 
| @@ -272,11 +224,6 @@ | 
| cpi->rtcd.variance.mse16x16              = vp8_mse16x16_wmt; | 
| cpi->rtcd.variance.getmbss               = vp8_get_mb_ss_sse2; | 
|  | 
| -        cpi->rtcd.variance.get16x16prederror     = vp8_get16x16pred_error_sse2; | 
| -        cpi->rtcd.variance.get8x8var             = vp8_get8x8var_sse2; | 
| -        cpi->rtcd.variance.get16x16var           = vp8_get16x16var_sse2; | 
| - | 
| - | 
| /* cpi->rtcd.variance.get4x4sse_cs  not implemented for wmt */; | 
|  | 
| cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_sse2; | 
| @@ -287,14 +234,14 @@ | 
| cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_sse2 ; | 
|  | 
| cpi->rtcd.encodemb.berr                  = vp8_block_error_xmm; | 
| -        cpi->rtcd.encodemb.mberr                 = mbblock_error_xmm; | 
| -        cpi->rtcd.encodemb.mbuverr               = mbuverror_xmm; | 
| -        cpi->rtcd.encodemb.subb                  = subtract_b_sse2; | 
| +        cpi->rtcd.encodemb.mberr                 = vp8_mbblock_error_xmm; | 
| +        cpi->rtcd.encodemb.mbuverr               = vp8_mbuverror_xmm; | 
| +        cpi->rtcd.encodemb.subb                  = vp8_subtract_b_sse2; | 
| cpi->rtcd.encodemb.submby                = vp8_subtract_mby_sse2; | 
| cpi->rtcd.encodemb.submbuv               = vp8_subtract_mbuv_sse2; | 
|  | 
| cpi->rtcd.quantize.quantb                = vp8_regular_quantize_b_sse2; | 
| -        cpi->rtcd.quantize.fastquantb            = fast_quantize_b_sse2; | 
| +        cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_sse2; | 
|  | 
| #if !(CONFIG_REALTIME_ONLY) | 
| cpi->rtcd.temporal.apply                 = vp8_temporal_filter_apply_sse2; | 
| @@ -303,7 +250,7 @@ | 
| #endif | 
|  | 
| #if HAVE_SSE3 | 
| -    if (SSE3Enabled) | 
| +    if (flags & HAS_SSE3) | 
| { | 
| cpi->rtcd.variance.sad16x16              = vp8_sad16x16_sse3; | 
| cpi->rtcd.variance.sad16x16x3            = vp8_sad16x16x3_sse3; | 
| @@ -311,20 +258,20 @@ | 
| cpi->rtcd.variance.sad8x16x3             = vp8_sad8x16x3_sse3; | 
| cpi->rtcd.variance.sad8x8x3              = vp8_sad8x8x3_sse3; | 
| cpi->rtcd.variance.sad4x4x3              = vp8_sad4x4x3_sse3; | 
| -#if !(CONFIG_REALTIME_ONLY) | 
| cpi->rtcd.search.full_search             = vp8_full_search_sadx3; | 
| -#endif | 
| cpi->rtcd.variance.sad16x16x4d           = vp8_sad16x16x4d_sse3; | 
| cpi->rtcd.variance.sad16x8x4d            = vp8_sad16x8x4d_sse3; | 
| cpi->rtcd.variance.sad8x16x4d            = vp8_sad8x16x4d_sse3; | 
| cpi->rtcd.variance.sad8x8x4d             = vp8_sad8x8x4d_sse3; | 
| cpi->rtcd.variance.sad4x4x4d             = vp8_sad4x4x4d_sse3; | 
| +        cpi->rtcd.variance.copy32xn              = vp8_copy32xn_sse3; | 
| cpi->rtcd.search.diamond_search          = vp8_diamond_search_sadx4; | 
| +        cpi->rtcd.search.refining_search         = vp8_refining_search_sadx4; | 
| } | 
| #endif | 
|  | 
| #if HAVE_SSSE3 | 
| -    if (SSSE3Enabled) | 
| +    if (flags & HAS_SSSE3) | 
| { | 
| cpi->rtcd.variance.sad16x16x3            = vp8_sad16x16x3_ssse3; | 
| cpi->rtcd.variance.sad16x8x3             = vp8_sad16x8x3_ssse3; | 
| @@ -332,9 +279,9 @@ | 
| cpi->rtcd.variance.subpixvar16x8         = vp8_sub_pixel_variance16x8_ssse3; | 
| cpi->rtcd.variance.subpixvar16x16        = vp8_sub_pixel_variance16x16_ssse3; | 
|  | 
| -        cpi->rtcd.quantize.fastquantb            = fast_quantize_b_ssse3; | 
| +        cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_ssse3; | 
|  | 
| -#if CONFIG_PSNR | 
| +#if CONFIG_INTERNAL_STATS | 
| #if ARCH_X86_64 | 
| cpi->rtcd.variance.ssimpf_8x8            = vp8_ssim_parms_8x8_sse3; | 
| cpi->rtcd.variance.ssimpf                = vp8_ssim_parms_16x16_sse3; | 
| @@ -347,16 +294,16 @@ | 
|  | 
|  | 
| #if HAVE_SSE4_1 | 
| -    if (SSE4_1Enabled) | 
| +    if (flags & HAS_SSE4_1) | 
| { | 
| cpi->rtcd.variance.sad16x16x8            = vp8_sad16x16x8_sse4; | 
| cpi->rtcd.variance.sad16x8x8             = vp8_sad16x8x8_sse4; | 
| cpi->rtcd.variance.sad8x16x8             = vp8_sad8x16x8_sse4; | 
| cpi->rtcd.variance.sad8x8x8              = vp8_sad8x8x8_sse4; | 
| cpi->rtcd.variance.sad4x4x8              = vp8_sad4x4x8_sse4; | 
| -#if !(CONFIG_REALTIME_ONLY) | 
| cpi->rtcd.search.full_search             = vp8_full_search_sadx8; | 
| -#endif | 
| + | 
| +        cpi->rtcd.quantize.quantb                = vp8_regular_quantize_b_sse4; | 
| } | 
| #endif | 
|  | 
|  |