Index: libvpx/source/libvpx/vp8/encoder/x86/x86_csystemdependent.c |
diff --git a/libvpx/source/libvpx/vp8/encoder/x86/x86_csystemdependent.c b/libvpx/source/libvpx/vp8/encoder/x86/x86_csystemdependent.c |
index badb9f04451b040f08713ca76d0263af16e82bda..2b6bd98eb436110a1bc306040b97b183d92a5057 100644 |
--- a/libvpx/source/libvpx/vp8/encoder/x86/x86_csystemdependent.c |
+++ b/libvpx/source/libvpx/vp8/encoder/x86/x86_csystemdependent.c |
@@ -16,7 +16,7 @@ |
#if HAVE_MMX |
-void vp8_short_fdct8x4_mmx(short *input, short *output, int pitch) |
+static void short_fdct8x4_mmx(short *input, short *output, int pitch) |
{ |
vp8_short_fdct4x4_mmx(input, output, pitch); |
vp8_short_fdct4x4_mmx(input + 4, output + 16, pitch); |
@@ -26,7 +26,7 @@ int vp8_fast_quantize_b_impl_mmx(short *coeff_ptr, short *zbin_ptr, |
short *qcoeff_ptr, short *dequant_ptr, |
short *scan_mask, short *round_ptr, |
short *quant_ptr, short *dqcoeff_ptr); |
-void vp8_fast_quantize_b_mmx(BLOCK *b, BLOCKD *d) |
+static void fast_quantize_b_mmx(BLOCK *b, BLOCKD *d) |
{ |
short *scan_mask = vp8_default_zig_zag_mask;//d->scan_order_mask_ptr; |
short *coeff_ptr = b->coeff; |
@@ -51,7 +51,7 @@ void vp8_fast_quantize_b_mmx(BLOCK *b, BLOCKD *d) |
} |
int vp8_mbblock_error_mmx_impl(short *coeff_ptr, short *dcoef_ptr, int dc); |
-int vp8_mbblock_error_mmx(MACROBLOCK *mb, int dc) |
+static int mbblock_error_mmx(MACROBLOCK *mb, int dc) |
{ |
short *coeff_ptr = mb->block[0].coeff; |
short *dcoef_ptr = mb->e_mbd.block[0].dqcoeff; |
@@ -59,7 +59,7 @@ int vp8_mbblock_error_mmx(MACROBLOCK *mb, int dc) |
} |
int vp8_mbuverror_mmx_impl(short *s_ptr, short *d_ptr); |
-int vp8_mbuverror_mmx(MACROBLOCK *mb) |
+static int mbuverror_mmx(MACROBLOCK *mb) |
{ |
short *s_ptr = &mb->coeff[256]; |
short *d_ptr = &mb->e_mbd.dqcoeff[256]; |
@@ -69,7 +69,7 @@ int vp8_mbuverror_mmx(MACROBLOCK *mb) |
void vp8_subtract_b_mmx_impl(unsigned char *z, int src_stride, |
short *diff, unsigned char *predictor, |
int pitch); |
-void vp8_subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch) |
+static void subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch) |
{ |
unsigned char *z = *(be->base_src) + be->src; |
unsigned int src_stride = be->src_stride; |
@@ -81,8 +81,33 @@ void vp8_subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch) |
#endif |
#if HAVE_SSE2 |
+int vp8_fast_quantize_b_impl_sse2(short *coeff_ptr, |
+ short *qcoeff_ptr, short *dequant_ptr, |
+ const short *inv_scan_order, short *round_ptr, |
+ short *quant_ptr, short *dqcoeff_ptr); |
+static void fast_quantize_b_sse2(BLOCK *b, BLOCKD *d) |
+{ |
+ short *scan_mask = vp8_default_zig_zag_mask;//d->scan_order_mask_ptr; |
+ short *coeff_ptr = b->coeff; |
+ short *round_ptr = b->round; |
+ short *quant_ptr = b->quant_fast; |
+ short *qcoeff_ptr = d->qcoeff; |
+ short *dqcoeff_ptr = d->dqcoeff; |
+ short *dequant_ptr = d->dequant; |
+ |
+ d->eob = vp8_fast_quantize_b_impl_sse2( |
+ coeff_ptr, |
+ qcoeff_ptr, |
+ dequant_ptr, |
+ vp8_default_inv_zig_zag, |
+ round_ptr, |
+ quant_ptr, |
+ dqcoeff_ptr |
+ ); |
+} |
+ |
int vp8_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr, int dc); |
-int vp8_mbblock_error_xmm(MACROBLOCK *mb, int dc) |
+static int mbblock_error_xmm(MACROBLOCK *mb, int dc) |
{ |
short *coeff_ptr = mb->block[0].coeff; |
short *dcoef_ptr = mb->e_mbd.block[0].dqcoeff; |
@@ -90,7 +115,7 @@ int vp8_mbblock_error_xmm(MACROBLOCK *mb, int dc) |
} |
int vp8_mbuverror_xmm_impl(short *s_ptr, short *d_ptr); |
-int vp8_mbuverror_xmm(MACROBLOCK *mb) |
+static int mbuverror_xmm(MACROBLOCK *mb) |
{ |
short *s_ptr = &mb->coeff[256]; |
short *d_ptr = &mb->e_mbd.dqcoeff[256]; |
@@ -100,7 +125,7 @@ int vp8_mbuverror_xmm(MACROBLOCK *mb) |
void vp8_subtract_b_sse2_impl(unsigned char *z, int src_stride, |
short *diff, unsigned char *predictor, |
int pitch); |
-void vp8_subtract_b_sse2(BLOCK *be, BLOCKD *bd, int pitch) |
+static void subtract_b_sse2(BLOCK *be, BLOCKD *bd, int pitch) |
{ |
unsigned char *z = *(be->base_src) + be->src; |
unsigned int src_stride = be->src_stride; |
@@ -112,7 +137,22 @@ void vp8_subtract_b_sse2(BLOCK *be, BLOCKD *bd, int pitch) |
#endif |
#if HAVE_SSSE3 |
-#if CONFIG_INTERNAL_STATS |
+int vp8_fast_quantize_b_impl_ssse3(short *coeff_ptr, |
+ short *qcoeff_ptr, short *dequant_ptr, |
+ short *round_ptr, |
+ short *quant_ptr, short *dqcoeff_ptr); |
+static void fast_quantize_b_ssse3(BLOCK *b, BLOCKD *d) |
+{ |
+ d->eob = vp8_fast_quantize_b_impl_ssse3( |
+ b->coeff, |
+ d->qcoeff, |
+ d->dequant, |
+ b->round, |
+ b->quant_fast, |
+ d->dqcoeff |
+ ); |
+} |
+#if CONFIG_PSNR |
#if ARCH_X86_64 |
typedef void ssimpf |
( |
@@ -138,6 +178,12 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi) |
{ |
#if CONFIG_RUNTIME_CPU_DETECT |
int flags = x86_simd_caps(); |
+ int mmx_enabled = flags & HAS_MMX; |
+ int xmm_enabled = flags & HAS_SSE; |
+ int wmt_enabled = flags & HAS_SSE2; |
+ int SSE3Enabled = flags & HAS_SSE3; |
+ int SSSE3Enabled = flags & HAS_SSSE3; |
+ int SSE4_1Enabled = flags & HAS_SSE4_1; |
/* Note: |
* |
@@ -148,7 +194,7 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi) |
/* Override default functions with fastest ones for this CPU. */ |
#if HAVE_MMX |
- if (flags & HAS_MMX) |
+ if (mmx_enabled) |
{ |
cpi->rtcd.variance.sad16x16 = vp8_sad16x16_mmx; |
cpi->rtcd.variance.sad16x8 = vp8_sad16x8_mmx; |
@@ -175,35 +221,37 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi) |
cpi->rtcd.variance.mse16x16 = vp8_mse16x16_mmx; |
cpi->rtcd.variance.getmbss = vp8_get_mb_ss_mmx; |
+ cpi->rtcd.variance.get16x16prederror = vp8_get16x16pred_error_mmx; |
+ cpi->rtcd.variance.get8x8var = vp8_get8x8var_mmx; |
+ cpi->rtcd.variance.get16x16var = vp8_get16x16var_mmx; |
cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_mmx; |
cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_mmx; |
- cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_mmx; |
+ cpi->rtcd.fdct.short8x4 = short_fdct8x4_mmx; |
cpi->rtcd.fdct.fast4x4 = vp8_short_fdct4x4_mmx; |
- cpi->rtcd.fdct.fast8x4 = vp8_short_fdct8x4_mmx; |
+ cpi->rtcd.fdct.fast8x4 = short_fdct8x4_mmx; |
cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_c; |
cpi->rtcd.encodemb.berr = vp8_block_error_mmx; |
- cpi->rtcd.encodemb.mberr = vp8_mbblock_error_mmx; |
- cpi->rtcd.encodemb.mbuverr = vp8_mbuverror_mmx; |
- cpi->rtcd.encodemb.subb = vp8_subtract_b_mmx; |
+ cpi->rtcd.encodemb.mberr = mbblock_error_mmx; |
+ cpi->rtcd.encodemb.mbuverr = mbuverror_mmx; |
+ cpi->rtcd.encodemb.subb = subtract_b_mmx; |
cpi->rtcd.encodemb.submby = vp8_subtract_mby_mmx; |
cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_mmx; |
- /*cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_mmx;*/ |
+ /*cpi->rtcd.quantize.fastquantb = fast_quantize_b_mmx;*/ |
} |
#endif |
#if HAVE_SSE2 |
- if (flags & HAS_SSE2) |
+ if (wmt_enabled) |
{ |
cpi->rtcd.variance.sad16x16 = vp8_sad16x16_wmt; |
cpi->rtcd.variance.sad16x8 = vp8_sad16x8_wmt; |
cpi->rtcd.variance.sad8x16 = vp8_sad8x16_wmt; |
cpi->rtcd.variance.sad8x8 = vp8_sad8x8_wmt; |
cpi->rtcd.variance.sad4x4 = vp8_sad4x4_wmt; |
- cpi->rtcd.variance.copy32xn = vp8_copy32xn_sse2; |
cpi->rtcd.variance.var4x4 = vp8_variance4x4_wmt; |
cpi->rtcd.variance.var8x8 = vp8_variance8x8_wmt; |
@@ -224,6 +272,11 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi) |
cpi->rtcd.variance.mse16x16 = vp8_mse16x16_wmt; |
cpi->rtcd.variance.getmbss = vp8_get_mb_ss_sse2; |
+ cpi->rtcd.variance.get16x16prederror = vp8_get16x16pred_error_sse2; |
+ cpi->rtcd.variance.get8x8var = vp8_get8x8var_sse2; |
+ cpi->rtcd.variance.get16x16var = vp8_get16x16var_sse2; |
+ |
+ |
/* cpi->rtcd.variance.get4x4sse_cs not implemented for wmt */; |
cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_sse2; |
@@ -234,14 +287,14 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi) |
cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_sse2 ; |
cpi->rtcd.encodemb.berr = vp8_block_error_xmm; |
- cpi->rtcd.encodemb.mberr = vp8_mbblock_error_xmm; |
- cpi->rtcd.encodemb.mbuverr = vp8_mbuverror_xmm; |
- cpi->rtcd.encodemb.subb = vp8_subtract_b_sse2; |
+ cpi->rtcd.encodemb.mberr = mbblock_error_xmm; |
+ cpi->rtcd.encodemb.mbuverr = mbuverror_xmm; |
+ cpi->rtcd.encodemb.subb = subtract_b_sse2; |
cpi->rtcd.encodemb.submby = vp8_subtract_mby_sse2; |
cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_sse2; |
cpi->rtcd.quantize.quantb = vp8_regular_quantize_b_sse2; |
- cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_sse2; |
+ cpi->rtcd.quantize.fastquantb = fast_quantize_b_sse2; |
#if !(CONFIG_REALTIME_ONLY) |
cpi->rtcd.temporal.apply = vp8_temporal_filter_apply_sse2; |
@@ -250,7 +303,7 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi) |
#endif |
#if HAVE_SSE3 |
- if (flags & HAS_SSE3) |
+ if (SSE3Enabled) |
{ |
cpi->rtcd.variance.sad16x16 = vp8_sad16x16_sse3; |
cpi->rtcd.variance.sad16x16x3 = vp8_sad16x16x3_sse3; |
@@ -258,20 +311,20 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi) |
cpi->rtcd.variance.sad8x16x3 = vp8_sad8x16x3_sse3; |
cpi->rtcd.variance.sad8x8x3 = vp8_sad8x8x3_sse3; |
cpi->rtcd.variance.sad4x4x3 = vp8_sad4x4x3_sse3; |
+#if !(CONFIG_REALTIME_ONLY) |
cpi->rtcd.search.full_search = vp8_full_search_sadx3; |
+#endif |
cpi->rtcd.variance.sad16x16x4d = vp8_sad16x16x4d_sse3; |
cpi->rtcd.variance.sad16x8x4d = vp8_sad16x8x4d_sse3; |
cpi->rtcd.variance.sad8x16x4d = vp8_sad8x16x4d_sse3; |
cpi->rtcd.variance.sad8x8x4d = vp8_sad8x8x4d_sse3; |
cpi->rtcd.variance.sad4x4x4d = vp8_sad4x4x4d_sse3; |
- cpi->rtcd.variance.copy32xn = vp8_copy32xn_sse3; |
cpi->rtcd.search.diamond_search = vp8_diamond_search_sadx4; |
- cpi->rtcd.search.refining_search = vp8_refining_search_sadx4; |
} |
#endif |
#if HAVE_SSSE3 |
- if (flags & HAS_SSSE3) |
+ if (SSSE3Enabled) |
{ |
cpi->rtcd.variance.sad16x16x3 = vp8_sad16x16x3_ssse3; |
cpi->rtcd.variance.sad16x8x3 = vp8_sad16x8x3_ssse3; |
@@ -279,9 +332,9 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi) |
cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_ssse3; |
cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_ssse3; |
- cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_ssse3; |
+ cpi->rtcd.quantize.fastquantb = fast_quantize_b_ssse3; |
-#if CONFIG_INTERNAL_STATS |
+#if CONFIG_PSNR |
#if ARCH_X86_64 |
cpi->rtcd.variance.ssimpf_8x8 = vp8_ssim_parms_8x8_sse3; |
cpi->rtcd.variance.ssimpf = vp8_ssim_parms_16x16_sse3; |
@@ -294,16 +347,16 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi) |
#if HAVE_SSE4_1 |
- if (flags & HAS_SSE4_1) |
+ if (SSE4_1Enabled) |
{ |
cpi->rtcd.variance.sad16x16x8 = vp8_sad16x16x8_sse4; |
cpi->rtcd.variance.sad16x8x8 = vp8_sad16x8x8_sse4; |
cpi->rtcd.variance.sad8x16x8 = vp8_sad8x16x8_sse4; |
cpi->rtcd.variance.sad8x8x8 = vp8_sad8x8x8_sse4; |
cpi->rtcd.variance.sad4x4x8 = vp8_sad4x4x8_sse4; |
+#if !(CONFIG_REALTIME_ONLY) |
cpi->rtcd.search.full_search = vp8_full_search_sadx8; |
- |
- cpi->rtcd.quantize.quantb = vp8_regular_quantize_b_sse4; |
+#endif |
} |
#endif |