| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| 11 | 11 |
| 12 #include "vpx_ports/config.h" | 12 #include "vpx_ports/config.h" |
| 13 #include "vpx_ports/x86.h" | 13 #include "vpx_ports/x86.h" |
| 14 #include "vp8/encoder/variance.h" | 14 #include "vp8/encoder/variance.h" |
| 15 #include "vp8/encoder/onyx_int.h" | 15 #include "vp8/encoder/onyx_int.h" |
| 16 | 16 |
| 17 | 17 |
| 18 #if HAVE_MMX | 18 #if HAVE_MMX |
| 19 static void short_fdct8x4_mmx(short *input, short *output, int pitch) | 19 void vp8_short_fdct8x4_mmx(short *input, short *output, int pitch) |
| 20 { | 20 { |
| 21 vp8_short_fdct4x4_mmx(input, output, pitch); | 21 vp8_short_fdct4x4_mmx(input, output, pitch); |
| 22 vp8_short_fdct4x4_mmx(input + 4, output + 16, pitch); | 22 vp8_short_fdct4x4_mmx(input + 4, output + 16, pitch); |
| 23 } | 23 } |
| 24 | 24 |
| 25 int vp8_fast_quantize_b_impl_mmx(short *coeff_ptr, short *zbin_ptr, | 25 int vp8_fast_quantize_b_impl_mmx(short *coeff_ptr, short *zbin_ptr, |
| 26 short *qcoeff_ptr, short *dequant_ptr, | 26 short *qcoeff_ptr, short *dequant_ptr, |
| 27 short *scan_mask, short *round_ptr, | 27 short *scan_mask, short *round_ptr, |
| 28 short *quant_ptr, short *dqcoeff_ptr); | 28 short *quant_ptr, short *dqcoeff_ptr); |
| 29 static void fast_quantize_b_mmx(BLOCK *b, BLOCKD *d) | 29 void vp8_fast_quantize_b_mmx(BLOCK *b, BLOCKD *d) |
| 30 { | 30 { |
| 31 short *scan_mask = vp8_default_zig_zag_mask;//d->scan_order_mask_ptr; | 31 short *scan_mask = vp8_default_zig_zag_mask;//d->scan_order_mask_ptr; |
| 32 short *coeff_ptr = b->coeff; | 32 short *coeff_ptr = b->coeff; |
| 33 short *zbin_ptr = b->zbin; | 33 short *zbin_ptr = b->zbin; |
| 34 short *round_ptr = b->round; | 34 short *round_ptr = b->round; |
| 35 short *quant_ptr = b->quant_fast; | 35 short *quant_ptr = b->quant_fast; |
| 36 short *qcoeff_ptr = d->qcoeff; | 36 short *qcoeff_ptr = d->qcoeff; |
| 37 short *dqcoeff_ptr = d->dqcoeff; | 37 short *dqcoeff_ptr = d->dqcoeff; |
| 38 short *dequant_ptr = d->dequant; | 38 short *dequant_ptr = d->dequant; |
| 39 | 39 |
| 40 d->eob = vp8_fast_quantize_b_impl_mmx( | 40 d->eob = vp8_fast_quantize_b_impl_mmx( |
| 41 coeff_ptr, | 41 coeff_ptr, |
| 42 zbin_ptr, | 42 zbin_ptr, |
| 43 qcoeff_ptr, | 43 qcoeff_ptr, |
| 44 dequant_ptr, | 44 dequant_ptr, |
| 45 scan_mask, | 45 scan_mask, |
| 46 | 46 |
| 47 round_ptr, | 47 round_ptr, |
| 48 quant_ptr, | 48 quant_ptr, |
| 49 dqcoeff_ptr | 49 dqcoeff_ptr |
| 50 ); | 50 ); |
| 51 } | 51 } |
| 52 | 52 |
| 53 int vp8_mbblock_error_mmx_impl(short *coeff_ptr, short *dcoef_ptr, int dc); | 53 int vp8_mbblock_error_mmx_impl(short *coeff_ptr, short *dcoef_ptr, int dc); |
| 54 static int mbblock_error_mmx(MACROBLOCK *mb, int dc) | 54 int vp8_mbblock_error_mmx(MACROBLOCK *mb, int dc) |
| 55 { | 55 { |
| 56 short *coeff_ptr = mb->block[0].coeff; | 56 short *coeff_ptr = mb->block[0].coeff; |
| 57 short *dcoef_ptr = mb->e_mbd.block[0].dqcoeff; | 57 short *dcoef_ptr = mb->e_mbd.block[0].dqcoeff; |
| 58 return vp8_mbblock_error_mmx_impl(coeff_ptr, dcoef_ptr, dc); | 58 return vp8_mbblock_error_mmx_impl(coeff_ptr, dcoef_ptr, dc); |
| 59 } | 59 } |
| 60 | 60 |
| 61 int vp8_mbuverror_mmx_impl(short *s_ptr, short *d_ptr); | 61 int vp8_mbuverror_mmx_impl(short *s_ptr, short *d_ptr); |
| 62 static int mbuverror_mmx(MACROBLOCK *mb) | 62 int vp8_mbuverror_mmx(MACROBLOCK *mb) |
| 63 { | 63 { |
| 64 short *s_ptr = &mb->coeff[256]; | 64 short *s_ptr = &mb->coeff[256]; |
| 65 short *d_ptr = &mb->e_mbd.dqcoeff[256]; | 65 short *d_ptr = &mb->e_mbd.dqcoeff[256]; |
| 66 return vp8_mbuverror_mmx_impl(s_ptr, d_ptr); | 66 return vp8_mbuverror_mmx_impl(s_ptr, d_ptr); |
| 67 } | 67 } |
| 68 | 68 |
| 69 void vp8_subtract_b_mmx_impl(unsigned char *z, int src_stride, | 69 void vp8_subtract_b_mmx_impl(unsigned char *z, int src_stride, |
| 70 short *diff, unsigned char *predictor, | 70 short *diff, unsigned char *predictor, |
| 71 int pitch); | 71 int pitch); |
| 72 static void subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch) | 72 void vp8_subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch) |
| 73 { | 73 { |
| 74 unsigned char *z = *(be->base_src) + be->src; | 74 unsigned char *z = *(be->base_src) + be->src; |
| 75 unsigned int src_stride = be->src_stride; | 75 unsigned int src_stride = be->src_stride; |
| 76 short *diff = &be->src_diff[0]; | 76 short *diff = &be->src_diff[0]; |
| 77 unsigned char *predictor = &bd->predictor[0]; | 77 unsigned char *predictor = &bd->predictor[0]; |
| 78 vp8_subtract_b_mmx_impl(z, src_stride, diff, predictor, pitch); | 78 vp8_subtract_b_mmx_impl(z, src_stride, diff, predictor, pitch); |
| 79 } | 79 } |
| 80 | 80 |
| 81 #endif | 81 #endif |
| 82 | 82 |
| 83 #if HAVE_SSE2 | 83 #if HAVE_SSE2 |
| 84 int vp8_fast_quantize_b_impl_sse2(short *coeff_ptr, | |
| 85 short *qcoeff_ptr, short *dequant_ptr, | |
| 86 const short *inv_scan_order, short *round_ptr, | |
| 87 short *quant_ptr, short *dqcoeff_ptr); | |
| 88 static void fast_quantize_b_sse2(BLOCK *b, BLOCKD *d) | |
| 89 { | |
| 90 short *scan_mask = vp8_default_zig_zag_mask;//d->scan_order_mask_ptr; | |
| 91 short *coeff_ptr = b->coeff; | |
| 92 short *round_ptr = b->round; | |
| 93 short *quant_ptr = b->quant_fast; | |
| 94 short *qcoeff_ptr = d->qcoeff; | |
| 95 short *dqcoeff_ptr = d->dqcoeff; | |
| 96 short *dequant_ptr = d->dequant; | |
| 97 | |
| 98 d->eob = vp8_fast_quantize_b_impl_sse2( | |
| 99 coeff_ptr, | |
| 100 qcoeff_ptr, | |
| 101 dequant_ptr, | |
| 102 vp8_default_inv_zig_zag, | |
| 103 round_ptr, | |
| 104 quant_ptr, | |
| 105 dqcoeff_ptr | |
| 106 ); | |
| 107 } | |
| 108 | |
| 109 int vp8_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr, int dc); | 84 int vp8_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr, int dc); |
| 110 static int mbblock_error_xmm(MACROBLOCK *mb, int dc) | 85 int vp8_mbblock_error_xmm(MACROBLOCK *mb, int dc) |
| 111 { | 86 { |
| 112 short *coeff_ptr = mb->block[0].coeff; | 87 short *coeff_ptr = mb->block[0].coeff; |
| 113 short *dcoef_ptr = mb->e_mbd.block[0].dqcoeff; | 88 short *dcoef_ptr = mb->e_mbd.block[0].dqcoeff; |
| 114 return vp8_mbblock_error_xmm_impl(coeff_ptr, dcoef_ptr, dc); | 89 return vp8_mbblock_error_xmm_impl(coeff_ptr, dcoef_ptr, dc); |
| 115 } | 90 } |
| 116 | 91 |
| 117 int vp8_mbuverror_xmm_impl(short *s_ptr, short *d_ptr); | 92 int vp8_mbuverror_xmm_impl(short *s_ptr, short *d_ptr); |
| 118 static int mbuverror_xmm(MACROBLOCK *mb) | 93 int vp8_mbuverror_xmm(MACROBLOCK *mb) |
| 119 { | 94 { |
| 120 short *s_ptr = &mb->coeff[256]; | 95 short *s_ptr = &mb->coeff[256]; |
| 121 short *d_ptr = &mb->e_mbd.dqcoeff[256]; | 96 short *d_ptr = &mb->e_mbd.dqcoeff[256]; |
| 122 return vp8_mbuverror_xmm_impl(s_ptr, d_ptr); | 97 return vp8_mbuverror_xmm_impl(s_ptr, d_ptr); |
| 123 } | 98 } |
| 124 | 99 |
| 125 void vp8_subtract_b_sse2_impl(unsigned char *z, int src_stride, | 100 void vp8_subtract_b_sse2_impl(unsigned char *z, int src_stride, |
| 126 short *diff, unsigned char *predictor, | 101 short *diff, unsigned char *predictor, |
| 127 int pitch); | 102 int pitch); |
| 128 static void subtract_b_sse2(BLOCK *be, BLOCKD *bd, int pitch) | 103 void vp8_subtract_b_sse2(BLOCK *be, BLOCKD *bd, int pitch) |
| 129 { | 104 { |
| 130 unsigned char *z = *(be->base_src) + be->src; | 105 unsigned char *z = *(be->base_src) + be->src; |
| 131 unsigned int src_stride = be->src_stride; | 106 unsigned int src_stride = be->src_stride; |
| 132 short *diff = &be->src_diff[0]; | 107 short *diff = &be->src_diff[0]; |
| 133 unsigned char *predictor = &bd->predictor[0]; | 108 unsigned char *predictor = &bd->predictor[0]; |
| 134 vp8_subtract_b_sse2_impl(z, src_stride, diff, predictor, pitch); | 109 vp8_subtract_b_sse2_impl(z, src_stride, diff, predictor, pitch); |
| 135 } | 110 } |
| 136 | 111 |
| 137 #endif | 112 #endif |
| 138 | 113 |
| 139 #if HAVE_SSSE3 | 114 #if HAVE_SSSE3 |
| 140 int vp8_fast_quantize_b_impl_ssse3(short *coeff_ptr, | 115 #if CONFIG_INTERNAL_STATS |
| 141 short *qcoeff_ptr, short *dequant_ptr, | |
| 142 short *round_ptr, | |
| 143 short *quant_ptr, short *dqcoeff_ptr); | |
| 144 static void fast_quantize_b_ssse3(BLOCK *b, BLOCKD *d) | |
| 145 { | |
| 146 d->eob = vp8_fast_quantize_b_impl_ssse3( | |
| 147 b->coeff, | |
| 148 d->qcoeff, | |
| 149 d->dequant, | |
| 150 b->round, | |
| 151 b->quant_fast, | |
| 152 d->dqcoeff | |
| 153 ); | |
| 154 } | |
| 155 #if CONFIG_PSNR | |
| 156 #if ARCH_X86_64 | 116 #if ARCH_X86_64 |
| 157 typedef void ssimpf | 117 typedef void ssimpf |
| 158 ( | 118 ( |
| 159 unsigned char *s, | 119 unsigned char *s, |
| 160 int sp, | 120 int sp, |
| 161 unsigned char *r, | 121 unsigned char *r, |
| 162 int rp, | 122 int rp, |
| 163 unsigned long *sum_s, | 123 unsigned long *sum_s, |
| 164 unsigned long *sum_r, | 124 unsigned long *sum_r, |
| 165 unsigned long *sum_sq_s, | 125 unsigned long *sum_sq_s, |
| 166 unsigned long *sum_sq_r, | 126 unsigned long *sum_sq_r, |
| 167 unsigned long *sum_sxr | 127 unsigned long *sum_sxr |
| 168 ); | 128 ); |
| 169 | 129 |
| 170 extern ssimpf vp8_ssim_parms_16x16_sse3; | 130 extern ssimpf vp8_ssim_parms_16x16_sse3; |
| 171 extern ssimpf vp8_ssim_parms_8x8_sse3; | 131 extern ssimpf vp8_ssim_parms_8x8_sse3; |
| 172 #endif | 132 #endif |
| 173 #endif | 133 #endif |
| 174 #endif | 134 #endif |
| 175 | 135 |
| 176 | 136 |
| 177 void vp8_arch_x86_encoder_init(VP8_COMP *cpi) | 137 void vp8_arch_x86_encoder_init(VP8_COMP *cpi) |
| 178 { | 138 { |
| 179 #if CONFIG_RUNTIME_CPU_DETECT | 139 #if CONFIG_RUNTIME_CPU_DETECT |
| 180 int flags = x86_simd_caps(); | 140 int flags = x86_simd_caps(); |
| 181 int mmx_enabled = flags & HAS_MMX; | |
| 182 int xmm_enabled = flags & HAS_SSE; | |
| 183 int wmt_enabled = flags & HAS_SSE2; | |
| 184 int SSE3Enabled = flags & HAS_SSE3; | |
| 185 int SSSE3Enabled = flags & HAS_SSSE3; | |
| 186 int SSE4_1Enabled = flags & HAS_SSE4_1; | |
| 187 | 141 |
| 188 /* Note: | 142 /* Note: |
| 189 * | 143 * |
| 190 * This platform can be built without runtime CPU detection as well. If | 144 * This platform can be built without runtime CPU detection as well. If |
| 191 * you modify any of the function mappings present in this file, be sure | 145 * you modify any of the function mappings present in this file, be sure |
| 192 * to also update them in static mapings (<arch>/filename_<arch>.h) | 146 * to also update them in static mapings (<arch>/filename_<arch>.h) |
| 193 */ | 147 */ |
| 194 | 148 |
| 195 /* Override default functions with fastest ones for this CPU. */ | 149 /* Override default functions with fastest ones for this CPU. */ |
| 196 #if HAVE_MMX | 150 #if HAVE_MMX |
| 197 if (mmx_enabled) | 151 if (flags & HAS_MMX) |
| 198 { | 152 { |
| 199 cpi->rtcd.variance.sad16x16 = vp8_sad16x16_mmx; | 153 cpi->rtcd.variance.sad16x16 = vp8_sad16x16_mmx; |
| 200 cpi->rtcd.variance.sad16x8 = vp8_sad16x8_mmx; | 154 cpi->rtcd.variance.sad16x8 = vp8_sad16x8_mmx; |
| 201 cpi->rtcd.variance.sad8x16 = vp8_sad8x16_mmx; | 155 cpi->rtcd.variance.sad8x16 = vp8_sad8x16_mmx; |
| 202 cpi->rtcd.variance.sad8x8 = vp8_sad8x8_mmx; | 156 cpi->rtcd.variance.sad8x8 = vp8_sad8x8_mmx; |
| 203 cpi->rtcd.variance.sad4x4 = vp8_sad4x4_mmx; | 157 cpi->rtcd.variance.sad4x4 = vp8_sad4x4_mmx; |
| 204 | 158 |
| 205 cpi->rtcd.variance.var4x4 = vp8_variance4x4_mmx; | 159 cpi->rtcd.variance.var4x4 = vp8_variance4x4_mmx; |
| 206 cpi->rtcd.variance.var8x8 = vp8_variance8x8_mmx; | 160 cpi->rtcd.variance.var8x8 = vp8_variance8x8_mmx; |
| 207 cpi->rtcd.variance.var8x16 = vp8_variance8x16_mmx; | 161 cpi->rtcd.variance.var8x16 = vp8_variance8x16_mmx; |
| 208 cpi->rtcd.variance.var16x8 = vp8_variance16x8_mmx; | 162 cpi->rtcd.variance.var16x8 = vp8_variance16x8_mmx; |
| 209 cpi->rtcd.variance.var16x16 = vp8_variance16x16_mmx; | 163 cpi->rtcd.variance.var16x16 = vp8_variance16x16_mmx; |
| 210 | 164 |
| 211 cpi->rtcd.variance.subpixvar4x4 = vp8_sub_pixel_variance4x4_mmx
; | 165 cpi->rtcd.variance.subpixvar4x4 = vp8_sub_pixel_variance4x4_mmx
; |
| 212 cpi->rtcd.variance.subpixvar8x8 = vp8_sub_pixel_variance8x8_mmx
; | 166 cpi->rtcd.variance.subpixvar8x8 = vp8_sub_pixel_variance8x8_mmx
; |
| 213 cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_mm
x; | 167 cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_mm
x; |
| 214 cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_mm
x; | 168 cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_mm
x; |
| 215 cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_m
mx; | 169 cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_m
mx; |
| 216 cpi->rtcd.variance.halfpixvar16x16_h = vp8_variance_halfpixvar16x16_
h_mmx; | 170 cpi->rtcd.variance.halfpixvar16x16_h = vp8_variance_halfpixvar16x16_
h_mmx; |
| 217 cpi->rtcd.variance.halfpixvar16x16_v = vp8_variance_halfpixvar16x16_
v_mmx; | 171 cpi->rtcd.variance.halfpixvar16x16_v = vp8_variance_halfpixvar16x16_
v_mmx; |
| 218 cpi->rtcd.variance.halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_
hv_mmx; | 172 cpi->rtcd.variance.halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_
hv_mmx; |
| 219 cpi->rtcd.variance.subpixmse16x16 = vp8_sub_pixel_mse16x16_mmx; | 173 cpi->rtcd.variance.subpixmse16x16 = vp8_sub_pixel_mse16x16_mmx; |
| 220 | 174 |
| 221 cpi->rtcd.variance.mse16x16 = vp8_mse16x16_mmx; | 175 cpi->rtcd.variance.mse16x16 = vp8_mse16x16_mmx; |
| 222 cpi->rtcd.variance.getmbss = vp8_get_mb_ss_mmx; | 176 cpi->rtcd.variance.getmbss = vp8_get_mb_ss_mmx; |
| 223 | 177 |
| 224 cpi->rtcd.variance.get16x16prederror = vp8_get16x16pred_error_mmx; | |
| 225 cpi->rtcd.variance.get8x8var = vp8_get8x8var_mmx; | |
| 226 cpi->rtcd.variance.get16x16var = vp8_get16x16var_mmx; | |
| 227 cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_mmx; | 178 cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_mmx; |
| 228 | 179 |
| 229 cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_mmx; | 180 cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_mmx; |
| 230 cpi->rtcd.fdct.short8x4 = short_fdct8x4_mmx; | 181 cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_mmx; |
| 231 cpi->rtcd.fdct.fast4x4 = vp8_short_fdct4x4_mmx; | 182 cpi->rtcd.fdct.fast4x4 = vp8_short_fdct4x4_mmx; |
| 232 cpi->rtcd.fdct.fast8x4 = short_fdct8x4_mmx; | 183 cpi->rtcd.fdct.fast8x4 = vp8_short_fdct8x4_mmx; |
| 233 | 184 |
| 234 cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_c; | 185 cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_c; |
| 235 | 186 |
| 236 cpi->rtcd.encodemb.berr = vp8_block_error_mmx; | 187 cpi->rtcd.encodemb.berr = vp8_block_error_mmx; |
| 237 cpi->rtcd.encodemb.mberr = mbblock_error_mmx; | 188 cpi->rtcd.encodemb.mberr = vp8_mbblock_error_mmx; |
| 238 cpi->rtcd.encodemb.mbuverr = mbuverror_mmx; | 189 cpi->rtcd.encodemb.mbuverr = vp8_mbuverror_mmx; |
| 239 cpi->rtcd.encodemb.subb = subtract_b_mmx; | 190 cpi->rtcd.encodemb.subb = vp8_subtract_b_mmx; |
| 240 cpi->rtcd.encodemb.submby = vp8_subtract_mby_mmx; | 191 cpi->rtcd.encodemb.submby = vp8_subtract_mby_mmx; |
| 241 cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_mmx; | 192 cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_mmx; |
| 242 | 193 |
| 243 /*cpi->rtcd.quantize.fastquantb = fast_quantize_b_mmx;*/ | 194 /*cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_mmx;*/ |
| 244 } | 195 } |
| 245 #endif | 196 #endif |
| 246 | 197 |
| 247 #if HAVE_SSE2 | 198 #if HAVE_SSE2 |
| 248 if (wmt_enabled) | 199 if (flags & HAS_SSE2) |
| 249 { | 200 { |
| 250 cpi->rtcd.variance.sad16x16 = vp8_sad16x16_wmt; | 201 cpi->rtcd.variance.sad16x16 = vp8_sad16x16_wmt; |
| 251 cpi->rtcd.variance.sad16x8 = vp8_sad16x8_wmt; | 202 cpi->rtcd.variance.sad16x8 = vp8_sad16x8_wmt; |
| 252 cpi->rtcd.variance.sad8x16 = vp8_sad8x16_wmt; | 203 cpi->rtcd.variance.sad8x16 = vp8_sad8x16_wmt; |
| 253 cpi->rtcd.variance.sad8x8 = vp8_sad8x8_wmt; | 204 cpi->rtcd.variance.sad8x8 = vp8_sad8x8_wmt; |
| 254 cpi->rtcd.variance.sad4x4 = vp8_sad4x4_wmt; | 205 cpi->rtcd.variance.sad4x4 = vp8_sad4x4_wmt; |
| 206 cpi->rtcd.variance.copy32xn = vp8_copy32xn_sse2; |
| 255 | 207 |
| 256 cpi->rtcd.variance.var4x4 = vp8_variance4x4_wmt; | 208 cpi->rtcd.variance.var4x4 = vp8_variance4x4_wmt; |
| 257 cpi->rtcd.variance.var8x8 = vp8_variance8x8_wmt; | 209 cpi->rtcd.variance.var8x8 = vp8_variance8x8_wmt; |
| 258 cpi->rtcd.variance.var8x16 = vp8_variance8x16_wmt; | 210 cpi->rtcd.variance.var8x16 = vp8_variance8x16_wmt; |
| 259 cpi->rtcd.variance.var16x8 = vp8_variance16x8_wmt; | 211 cpi->rtcd.variance.var16x8 = vp8_variance16x8_wmt; |
| 260 cpi->rtcd.variance.var16x16 = vp8_variance16x16_wmt; | 212 cpi->rtcd.variance.var16x16 = vp8_variance16x16_wmt; |
| 261 | 213 |
| 262 cpi->rtcd.variance.subpixvar4x4 = vp8_sub_pixel_variance4x4_wmt
; | 214 cpi->rtcd.variance.subpixvar4x4 = vp8_sub_pixel_variance4x4_wmt
; |
| 263 cpi->rtcd.variance.subpixvar8x8 = vp8_sub_pixel_variance8x8_wmt
; | 215 cpi->rtcd.variance.subpixvar8x8 = vp8_sub_pixel_variance8x8_wmt
; |
| 264 cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_wm
t; | 216 cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_wm
t; |
| 265 cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_wm
t; | 217 cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_wm
t; |
| 266 cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_w
mt; | 218 cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_w
mt; |
| 267 cpi->rtcd.variance.halfpixvar16x16_h = vp8_variance_halfpixvar16x16_
h_wmt; | 219 cpi->rtcd.variance.halfpixvar16x16_h = vp8_variance_halfpixvar16x16_
h_wmt; |
| 268 cpi->rtcd.variance.halfpixvar16x16_v = vp8_variance_halfpixvar16x16_
v_wmt; | 220 cpi->rtcd.variance.halfpixvar16x16_v = vp8_variance_halfpixvar16x16_
v_wmt; |
| 269 cpi->rtcd.variance.halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_
hv_wmt; | 221 cpi->rtcd.variance.halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_
hv_wmt; |
| 270 cpi->rtcd.variance.subpixmse16x16 = vp8_sub_pixel_mse16x16_wmt; | 222 cpi->rtcd.variance.subpixmse16x16 = vp8_sub_pixel_mse16x16_wmt; |
| 271 | 223 |
| 272 cpi->rtcd.variance.mse16x16 = vp8_mse16x16_wmt; | 224 cpi->rtcd.variance.mse16x16 = vp8_mse16x16_wmt; |
| 273 cpi->rtcd.variance.getmbss = vp8_get_mb_ss_sse2; | 225 cpi->rtcd.variance.getmbss = vp8_get_mb_ss_sse2; |
| 274 | 226 |
| 275 cpi->rtcd.variance.get16x16prederror = vp8_get16x16pred_error_sse2; | |
| 276 cpi->rtcd.variance.get8x8var = vp8_get8x8var_sse2; | |
| 277 cpi->rtcd.variance.get16x16var = vp8_get16x16var_sse2; | |
| 278 | |
| 279 | |
| 280 /* cpi->rtcd.variance.get4x4sse_cs not implemented for wmt */; | 227 /* cpi->rtcd.variance.get4x4sse_cs not implemented for wmt */; |
| 281 | 228 |
| 282 cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_sse2; | 229 cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_sse2; |
| 283 cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_sse2; | 230 cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_sse2; |
| 284 cpi->rtcd.fdct.fast4x4 = vp8_short_fdct4x4_sse2; | 231 cpi->rtcd.fdct.fast4x4 = vp8_short_fdct4x4_sse2; |
| 285 cpi->rtcd.fdct.fast8x4 = vp8_short_fdct8x4_sse2; | 232 cpi->rtcd.fdct.fast8x4 = vp8_short_fdct8x4_sse2; |
| 286 | 233 |
| 287 cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_sse2 ; | 234 cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_sse2 ; |
| 288 | 235 |
| 289 cpi->rtcd.encodemb.berr = vp8_block_error_xmm; | 236 cpi->rtcd.encodemb.berr = vp8_block_error_xmm; |
| 290 cpi->rtcd.encodemb.mberr = mbblock_error_xmm; | 237 cpi->rtcd.encodemb.mberr = vp8_mbblock_error_xmm; |
| 291 cpi->rtcd.encodemb.mbuverr = mbuverror_xmm; | 238 cpi->rtcd.encodemb.mbuverr = vp8_mbuverror_xmm; |
| 292 cpi->rtcd.encodemb.subb = subtract_b_sse2; | 239 cpi->rtcd.encodemb.subb = vp8_subtract_b_sse2; |
| 293 cpi->rtcd.encodemb.submby = vp8_subtract_mby_sse2; | 240 cpi->rtcd.encodemb.submby = vp8_subtract_mby_sse2; |
| 294 cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_sse2; | 241 cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_sse2; |
| 295 | 242 |
| 296 cpi->rtcd.quantize.quantb = vp8_regular_quantize_b_sse2; | 243 cpi->rtcd.quantize.quantb = vp8_regular_quantize_b_sse2; |
| 297 cpi->rtcd.quantize.fastquantb = fast_quantize_b_sse2; | 244 cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_sse2; |
| 298 | 245 |
| 299 #if !(CONFIG_REALTIME_ONLY) | 246 #if !(CONFIG_REALTIME_ONLY) |
| 300 cpi->rtcd.temporal.apply = vp8_temporal_filter_apply_sse
2; | 247 cpi->rtcd.temporal.apply = vp8_temporal_filter_apply_sse
2; |
| 301 #endif | 248 #endif |
| 302 } | 249 } |
| 303 #endif | 250 #endif |
| 304 | 251 |
| 305 #if HAVE_SSE3 | 252 #if HAVE_SSE3 |
| 306 if (SSE3Enabled) | 253 if (flags & HAS_SSE3) |
| 307 { | 254 { |
| 308 cpi->rtcd.variance.sad16x16 = vp8_sad16x16_sse3; | 255 cpi->rtcd.variance.sad16x16 = vp8_sad16x16_sse3; |
| 309 cpi->rtcd.variance.sad16x16x3 = vp8_sad16x16x3_sse3; | 256 cpi->rtcd.variance.sad16x16x3 = vp8_sad16x16x3_sse3; |
| 310 cpi->rtcd.variance.sad16x8x3 = vp8_sad16x8x3_sse3; | 257 cpi->rtcd.variance.sad16x8x3 = vp8_sad16x8x3_sse3; |
| 311 cpi->rtcd.variance.sad8x16x3 = vp8_sad8x16x3_sse3; | 258 cpi->rtcd.variance.sad8x16x3 = vp8_sad8x16x3_sse3; |
| 312 cpi->rtcd.variance.sad8x8x3 = vp8_sad8x8x3_sse3; | 259 cpi->rtcd.variance.sad8x8x3 = vp8_sad8x8x3_sse3; |
| 313 cpi->rtcd.variance.sad4x4x3 = vp8_sad4x4x3_sse3; | 260 cpi->rtcd.variance.sad4x4x3 = vp8_sad4x4x3_sse3; |
| 314 #if !(CONFIG_REALTIME_ONLY) | |
| 315 cpi->rtcd.search.full_search = vp8_full_search_sadx3; | 261 cpi->rtcd.search.full_search = vp8_full_search_sadx3; |
| 316 #endif | |
| 317 cpi->rtcd.variance.sad16x16x4d = vp8_sad16x16x4d_sse3; | 262 cpi->rtcd.variance.sad16x16x4d = vp8_sad16x16x4d_sse3; |
| 318 cpi->rtcd.variance.sad16x8x4d = vp8_sad16x8x4d_sse3; | 263 cpi->rtcd.variance.sad16x8x4d = vp8_sad16x8x4d_sse3; |
| 319 cpi->rtcd.variance.sad8x16x4d = vp8_sad8x16x4d_sse3; | 264 cpi->rtcd.variance.sad8x16x4d = vp8_sad8x16x4d_sse3; |
| 320 cpi->rtcd.variance.sad8x8x4d = vp8_sad8x8x4d_sse3; | 265 cpi->rtcd.variance.sad8x8x4d = vp8_sad8x8x4d_sse3; |
| 321 cpi->rtcd.variance.sad4x4x4d = vp8_sad4x4x4d_sse3; | 266 cpi->rtcd.variance.sad4x4x4d = vp8_sad4x4x4d_sse3; |
| 267 cpi->rtcd.variance.copy32xn = vp8_copy32xn_sse3; |
| 322 cpi->rtcd.search.diamond_search = vp8_diamond_search_sadx4; | 268 cpi->rtcd.search.diamond_search = vp8_diamond_search_sadx4; |
| 269 cpi->rtcd.search.refining_search = vp8_refining_search_sadx4; |
| 323 } | 270 } |
| 324 #endif | 271 #endif |
| 325 | 272 |
| 326 #if HAVE_SSSE3 | 273 #if HAVE_SSSE3 |
| 327 if (SSSE3Enabled) | 274 if (flags & HAS_SSSE3) |
| 328 { | 275 { |
| 329 cpi->rtcd.variance.sad16x16x3 = vp8_sad16x16x3_ssse3; | 276 cpi->rtcd.variance.sad16x16x3 = vp8_sad16x16x3_ssse3; |
| 330 cpi->rtcd.variance.sad16x8x3 = vp8_sad16x8x3_ssse3; | 277 cpi->rtcd.variance.sad16x8x3 = vp8_sad16x8x3_ssse3; |
| 331 | 278 |
| 332 cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_ss
se3; | 279 cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_ss
se3; |
| 333 cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_s
sse3; | 280 cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_s
sse3; |
| 334 | 281 |
| 335 cpi->rtcd.quantize.fastquantb = fast_quantize_b_ssse3; | 282 cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_ssse3; |
| 336 | 283 |
| 337 #if CONFIG_PSNR | 284 #if CONFIG_INTERNAL_STATS |
| 338 #if ARCH_X86_64 | 285 #if ARCH_X86_64 |
| 339 cpi->rtcd.variance.ssimpf_8x8 = vp8_ssim_parms_8x8_sse3; | 286 cpi->rtcd.variance.ssimpf_8x8 = vp8_ssim_parms_8x8_sse3; |
| 340 cpi->rtcd.variance.ssimpf = vp8_ssim_parms_16x16_sse3; | 287 cpi->rtcd.variance.ssimpf = vp8_ssim_parms_16x16_sse3; |
| 341 #endif | 288 #endif |
| 342 #endif | 289 #endif |
| 343 | 290 |
| 344 } | 291 } |
| 345 #endif | 292 #endif |
| 346 | 293 |
| 347 | 294 |
| 348 | 295 |
| 349 #if HAVE_SSE4_1 | 296 #if HAVE_SSE4_1 |
| 350 if (SSE4_1Enabled) | 297 if (flags & HAS_SSE4_1) |
| 351 { | 298 { |
| 352 cpi->rtcd.variance.sad16x16x8 = vp8_sad16x16x8_sse4; | 299 cpi->rtcd.variance.sad16x16x8 = vp8_sad16x16x8_sse4; |
| 353 cpi->rtcd.variance.sad16x8x8 = vp8_sad16x8x8_sse4; | 300 cpi->rtcd.variance.sad16x8x8 = vp8_sad16x8x8_sse4; |
| 354 cpi->rtcd.variance.sad8x16x8 = vp8_sad8x16x8_sse4; | 301 cpi->rtcd.variance.sad8x16x8 = vp8_sad8x16x8_sse4; |
| 355 cpi->rtcd.variance.sad8x8x8 = vp8_sad8x8x8_sse4; | 302 cpi->rtcd.variance.sad8x8x8 = vp8_sad8x8x8_sse4; |
| 356 cpi->rtcd.variance.sad4x4x8 = vp8_sad4x4x8_sse4; | 303 cpi->rtcd.variance.sad4x4x8 = vp8_sad4x4x8_sse4; |
| 357 #if !(CONFIG_REALTIME_ONLY) | |
| 358 cpi->rtcd.search.full_search = vp8_full_search_sadx8; | 304 cpi->rtcd.search.full_search = vp8_full_search_sadx8; |
| 359 #endif | 305 |
| 306 cpi->rtcd.quantize.quantb = vp8_regular_quantize_b_sse4; |
| 360 } | 307 } |
| 361 #endif | 308 #endif |
| 362 | 309 |
| 363 #endif | 310 #endif |
| 364 } | 311 } |
| OLD | NEW |