| OLD | NEW | 
|---|
| 1 /* | 1 /* | 
| 2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 
| 3  * | 3  * | 
| 4  *  Use of this source code is governed by a BSD-style license | 4  *  Use of this source code is governed by a BSD-style license | 
| 5  *  that can be found in the LICENSE file in the root of the source | 5  *  that can be found in the LICENSE file in the root of the source | 
| 6  *  tree. An additional intellectual property rights grant can be found | 6  *  tree. An additional intellectual property rights grant can be found | 
| 7  *  in the file PATENTS.  All contributing project authors may | 7  *  in the file PATENTS.  All contributing project authors may | 
| 8  *  be found in the AUTHORS file in the root of the source tree. | 8  *  be found in the AUTHORS file in the root of the source tree. | 
| 9  */ | 9  */ | 
| 10 | 10 | 
| 11 | 11 | 
| 12 #include "vpx_ports/config.h" | 12 #include "vpx_ports/config.h" | 
| 13 #include "vpx_ports/x86.h" | 13 #include "vpx_ports/x86.h" | 
| 14 #include "vp8/encoder/variance.h" | 14 #include "vp8/encoder/variance.h" | 
| 15 #include "vp8/encoder/onyx_int.h" | 15 #include "vp8/encoder/onyx_int.h" | 
| 16 | 16 | 
| 17 | 17 | 
| 18 #if HAVE_MMX | 18 #if HAVE_MMX | 
| 19 static void short_fdct8x4_mmx(short *input, short *output, int pitch) | 19 void vp8_short_fdct8x4_mmx(short *input, short *output, int pitch) | 
| 20 { | 20 { | 
| 21     vp8_short_fdct4x4_mmx(input,   output,    pitch); | 21     vp8_short_fdct4x4_mmx(input,   output,    pitch); | 
| 22     vp8_short_fdct4x4_mmx(input + 4, output + 16, pitch); | 22     vp8_short_fdct4x4_mmx(input + 4, output + 16, pitch); | 
| 23 } | 23 } | 
| 24 | 24 | 
| 25 int vp8_fast_quantize_b_impl_mmx(short *coeff_ptr, short *zbin_ptr, | 25 int vp8_fast_quantize_b_impl_mmx(short *coeff_ptr, short *zbin_ptr, | 
| 26                                  short *qcoeff_ptr, short *dequant_ptr, | 26                                  short *qcoeff_ptr, short *dequant_ptr, | 
| 27                                  short *scan_mask, short *round_ptr, | 27                                  short *scan_mask, short *round_ptr, | 
| 28                                  short *quant_ptr, short *dqcoeff_ptr); | 28                                  short *quant_ptr, short *dqcoeff_ptr); | 
| 29 static void fast_quantize_b_mmx(BLOCK *b, BLOCKD *d) | 29 void vp8_fast_quantize_b_mmx(BLOCK *b, BLOCKD *d) | 
| 30 { | 30 { | 
| 31     short *scan_mask   = vp8_default_zig_zag_mask;//d->scan_order_mask_ptr; | 31     short *scan_mask   = vp8_default_zig_zag_mask;//d->scan_order_mask_ptr; | 
| 32     short *coeff_ptr   = b->coeff; | 32     short *coeff_ptr   = b->coeff; | 
| 33     short *zbin_ptr    = b->zbin; | 33     short *zbin_ptr    = b->zbin; | 
| 34     short *round_ptr   = b->round; | 34     short *round_ptr   = b->round; | 
| 35     short *quant_ptr   = b->quant_fast; | 35     short *quant_ptr   = b->quant_fast; | 
| 36     short *qcoeff_ptr  = d->qcoeff; | 36     short *qcoeff_ptr  = d->qcoeff; | 
| 37     short *dqcoeff_ptr = d->dqcoeff; | 37     short *dqcoeff_ptr = d->dqcoeff; | 
| 38     short *dequant_ptr = d->dequant; | 38     short *dequant_ptr = d->dequant; | 
| 39 | 39 | 
| 40     d->eob = vp8_fast_quantize_b_impl_mmx( | 40     d->eob = vp8_fast_quantize_b_impl_mmx( | 
| 41                  coeff_ptr, | 41                  coeff_ptr, | 
| 42                  zbin_ptr, | 42                  zbin_ptr, | 
| 43                  qcoeff_ptr, | 43                  qcoeff_ptr, | 
| 44                  dequant_ptr, | 44                  dequant_ptr, | 
| 45                  scan_mask, | 45                  scan_mask, | 
| 46 | 46 | 
| 47                  round_ptr, | 47                  round_ptr, | 
| 48                  quant_ptr, | 48                  quant_ptr, | 
| 49                  dqcoeff_ptr | 49                  dqcoeff_ptr | 
| 50              ); | 50              ); | 
| 51 } | 51 } | 
| 52 | 52 | 
| 53 int vp8_mbblock_error_mmx_impl(short *coeff_ptr, short *dcoef_ptr, int dc); | 53 int vp8_mbblock_error_mmx_impl(short *coeff_ptr, short *dcoef_ptr, int dc); | 
| 54 static int mbblock_error_mmx(MACROBLOCK *mb, int dc) | 54 int vp8_mbblock_error_mmx(MACROBLOCK *mb, int dc) | 
| 55 { | 55 { | 
| 56     short *coeff_ptr =  mb->block[0].coeff; | 56     short *coeff_ptr =  mb->block[0].coeff; | 
| 57     short *dcoef_ptr =  mb->e_mbd.block[0].dqcoeff; | 57     short *dcoef_ptr =  mb->e_mbd.block[0].dqcoeff; | 
| 58     return vp8_mbblock_error_mmx_impl(coeff_ptr, dcoef_ptr, dc); | 58     return vp8_mbblock_error_mmx_impl(coeff_ptr, dcoef_ptr, dc); | 
| 59 } | 59 } | 
| 60 | 60 | 
| 61 int vp8_mbuverror_mmx_impl(short *s_ptr, short *d_ptr); | 61 int vp8_mbuverror_mmx_impl(short *s_ptr, short *d_ptr); | 
| 62 static int mbuverror_mmx(MACROBLOCK *mb) | 62 int vp8_mbuverror_mmx(MACROBLOCK *mb) | 
| 63 { | 63 { | 
| 64     short *s_ptr = &mb->coeff[256]; | 64     short *s_ptr = &mb->coeff[256]; | 
| 65     short *d_ptr = &mb->e_mbd.dqcoeff[256]; | 65     short *d_ptr = &mb->e_mbd.dqcoeff[256]; | 
| 66     return vp8_mbuverror_mmx_impl(s_ptr, d_ptr); | 66     return vp8_mbuverror_mmx_impl(s_ptr, d_ptr); | 
| 67 } | 67 } | 
| 68 | 68 | 
| 69 void vp8_subtract_b_mmx_impl(unsigned char *z,  int src_stride, | 69 void vp8_subtract_b_mmx_impl(unsigned char *z,  int src_stride, | 
| 70                              short *diff, unsigned char *predictor, | 70                              short *diff, unsigned char *predictor, | 
| 71                              int pitch); | 71                              int pitch); | 
| 72 static void subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch) | 72 void vp8_subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch) | 
| 73 { | 73 { | 
| 74     unsigned char *z = *(be->base_src) + be->src; | 74     unsigned char *z = *(be->base_src) + be->src; | 
| 75     unsigned int  src_stride = be->src_stride; | 75     unsigned int  src_stride = be->src_stride; | 
| 76     short *diff = &be->src_diff[0]; | 76     short *diff = &be->src_diff[0]; | 
| 77     unsigned char *predictor = &bd->predictor[0]; | 77     unsigned char *predictor = &bd->predictor[0]; | 
| 78     vp8_subtract_b_mmx_impl(z, src_stride, diff, predictor, pitch); | 78     vp8_subtract_b_mmx_impl(z, src_stride, diff, predictor, pitch); | 
| 79 } | 79 } | 
| 80 | 80 | 
| 81 #endif | 81 #endif | 
| 82 | 82 | 
| 83 #if HAVE_SSE2 | 83 #if HAVE_SSE2 | 
| 84 int vp8_fast_quantize_b_impl_sse2(short *coeff_ptr, |  | 
| 85                                  short *qcoeff_ptr, short *dequant_ptr, |  | 
| 86                                  const short *inv_scan_order, short *round_ptr, |  | 
| 87                                  short *quant_ptr, short *dqcoeff_ptr); |  | 
| 88 static void fast_quantize_b_sse2(BLOCK *b, BLOCKD *d) |  | 
| 89 { |  | 
| 90     short *scan_mask   = vp8_default_zig_zag_mask;//d->scan_order_mask_ptr; |  | 
| 91     short *coeff_ptr   = b->coeff; |  | 
| 92     short *round_ptr   = b->round; |  | 
| 93     short *quant_ptr   = b->quant_fast; |  | 
| 94     short *qcoeff_ptr  = d->qcoeff; |  | 
| 95     short *dqcoeff_ptr = d->dqcoeff; |  | 
| 96     short *dequant_ptr = d->dequant; |  | 
| 97 |  | 
| 98     d->eob = vp8_fast_quantize_b_impl_sse2( |  | 
| 99                  coeff_ptr, |  | 
| 100                  qcoeff_ptr, |  | 
| 101                  dequant_ptr, |  | 
| 102                  vp8_default_inv_zig_zag, |  | 
| 103                  round_ptr, |  | 
| 104                  quant_ptr, |  | 
| 105                  dqcoeff_ptr |  | 
| 106              ); |  | 
| 107 } |  | 
| 108 |  | 
| 109 int vp8_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr, int dc); | 84 int vp8_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr, int dc); | 
| 110 static int mbblock_error_xmm(MACROBLOCK *mb, int dc) | 85 int vp8_mbblock_error_xmm(MACROBLOCK *mb, int dc) | 
| 111 { | 86 { | 
| 112     short *coeff_ptr =  mb->block[0].coeff; | 87     short *coeff_ptr =  mb->block[0].coeff; | 
| 113     short *dcoef_ptr =  mb->e_mbd.block[0].dqcoeff; | 88     short *dcoef_ptr =  mb->e_mbd.block[0].dqcoeff; | 
| 114     return vp8_mbblock_error_xmm_impl(coeff_ptr, dcoef_ptr, dc); | 89     return vp8_mbblock_error_xmm_impl(coeff_ptr, dcoef_ptr, dc); | 
| 115 } | 90 } | 
| 116 | 91 | 
| 117 int vp8_mbuverror_xmm_impl(short *s_ptr, short *d_ptr); | 92 int vp8_mbuverror_xmm_impl(short *s_ptr, short *d_ptr); | 
| 118 static int mbuverror_xmm(MACROBLOCK *mb) | 93 int vp8_mbuverror_xmm(MACROBLOCK *mb) | 
| 119 { | 94 { | 
| 120     short *s_ptr = &mb->coeff[256]; | 95     short *s_ptr = &mb->coeff[256]; | 
| 121     short *d_ptr = &mb->e_mbd.dqcoeff[256]; | 96     short *d_ptr = &mb->e_mbd.dqcoeff[256]; | 
| 122     return vp8_mbuverror_xmm_impl(s_ptr, d_ptr); | 97     return vp8_mbuverror_xmm_impl(s_ptr, d_ptr); | 
| 123 } | 98 } | 
| 124 | 99 | 
| 125 void vp8_subtract_b_sse2_impl(unsigned char *z,  int src_stride, | 100 void vp8_subtract_b_sse2_impl(unsigned char *z,  int src_stride, | 
| 126                              short *diff, unsigned char *predictor, | 101                              short *diff, unsigned char *predictor, | 
| 127                              int pitch); | 102                              int pitch); | 
| 128 static void subtract_b_sse2(BLOCK *be, BLOCKD *bd, int pitch) | 103 void vp8_subtract_b_sse2(BLOCK *be, BLOCKD *bd, int pitch) | 
| 129 { | 104 { | 
| 130     unsigned char *z = *(be->base_src) + be->src; | 105     unsigned char *z = *(be->base_src) + be->src; | 
| 131     unsigned int  src_stride = be->src_stride; | 106     unsigned int  src_stride = be->src_stride; | 
| 132     short *diff = &be->src_diff[0]; | 107     short *diff = &be->src_diff[0]; | 
| 133     unsigned char *predictor = &bd->predictor[0]; | 108     unsigned char *predictor = &bd->predictor[0]; | 
| 134     vp8_subtract_b_sse2_impl(z, src_stride, diff, predictor, pitch); | 109     vp8_subtract_b_sse2_impl(z, src_stride, diff, predictor, pitch); | 
| 135 } | 110 } | 
| 136 | 111 | 
| 137 #endif | 112 #endif | 
| 138 | 113 | 
| 139 #if HAVE_SSSE3 | 114 #if HAVE_SSSE3 | 
| 140 int vp8_fast_quantize_b_impl_ssse3(short *coeff_ptr, | 115 #if CONFIG_INTERNAL_STATS | 
| 141                                  short *qcoeff_ptr, short *dequant_ptr, |  | 
| 142                                  short *round_ptr, |  | 
| 143                                  short *quant_ptr, short *dqcoeff_ptr); |  | 
| 144 static void fast_quantize_b_ssse3(BLOCK *b, BLOCKD *d) |  | 
| 145 { |  | 
| 146     d->eob = vp8_fast_quantize_b_impl_ssse3( |  | 
| 147                     b->coeff, |  | 
| 148                     d->qcoeff, |  | 
| 149                     d->dequant, |  | 
| 150                     b->round, |  | 
| 151                     b->quant_fast, |  | 
| 152                     d->dqcoeff |  | 
| 153                ); |  | 
| 154 } |  | 
| 155 #if CONFIG_PSNR |  | 
| 156 #if ARCH_X86_64 | 116 #if ARCH_X86_64 | 
| 157 typedef void ssimpf | 117 typedef void ssimpf | 
| 158 ( | 118 ( | 
| 159     unsigned char *s, | 119     unsigned char *s, | 
| 160     int sp, | 120     int sp, | 
| 161     unsigned char *r, | 121     unsigned char *r, | 
| 162     int rp, | 122     int rp, | 
| 163     unsigned long *sum_s, | 123     unsigned long *sum_s, | 
| 164     unsigned long *sum_r, | 124     unsigned long *sum_r, | 
| 165     unsigned long *sum_sq_s, | 125     unsigned long *sum_sq_s, | 
| 166     unsigned long *sum_sq_r, | 126     unsigned long *sum_sq_r, | 
| 167     unsigned long *sum_sxr | 127     unsigned long *sum_sxr | 
| 168 ); | 128 ); | 
| 169 | 129 | 
| 170 extern ssimpf vp8_ssim_parms_16x16_sse3; | 130 extern ssimpf vp8_ssim_parms_16x16_sse3; | 
| 171 extern ssimpf vp8_ssim_parms_8x8_sse3; | 131 extern ssimpf vp8_ssim_parms_8x8_sse3; | 
| 172 #endif | 132 #endif | 
| 173 #endif | 133 #endif | 
| 174 #endif | 134 #endif | 
| 175 | 135 | 
| 176 | 136 | 
| 177 void vp8_arch_x86_encoder_init(VP8_COMP *cpi) | 137 void vp8_arch_x86_encoder_init(VP8_COMP *cpi) | 
| 178 { | 138 { | 
| 179 #if CONFIG_RUNTIME_CPU_DETECT | 139 #if CONFIG_RUNTIME_CPU_DETECT | 
| 180     int flags = x86_simd_caps(); | 140     int flags = x86_simd_caps(); | 
| 181     int mmx_enabled = flags & HAS_MMX; |  | 
| 182     int xmm_enabled = flags & HAS_SSE; |  | 
| 183     int wmt_enabled = flags & HAS_SSE2; |  | 
| 184     int SSE3Enabled = flags & HAS_SSE3; |  | 
| 185     int SSSE3Enabled = flags & HAS_SSSE3; |  | 
| 186     int SSE4_1Enabled = flags & HAS_SSE4_1; |  | 
| 187 | 141 | 
| 188     /* Note: | 142     /* Note: | 
| 189      * | 143      * | 
| 190      * This platform can be built without runtime CPU detection as well. If | 144      * This platform can be built without runtime CPU detection as well. If | 
| 191      * you modify any of the function mappings present in this file, be sure | 145      * you modify any of the function mappings present in this file, be sure | 
| 192      * to also update them in static mapings (<arch>/filename_<arch>.h) | 146      * to also update them in static mapings (<arch>/filename_<arch>.h) | 
| 193      */ | 147      */ | 
| 194 | 148 | 
| 195     /* Override default functions with fastest ones for this CPU. */ | 149     /* Override default functions with fastest ones for this CPU. */ | 
| 196 #if HAVE_MMX | 150 #if HAVE_MMX | 
| 197     if (mmx_enabled) | 151     if (flags & HAS_MMX) | 
| 198     { | 152     { | 
| 199         cpi->rtcd.variance.sad16x16              = vp8_sad16x16_mmx; | 153         cpi->rtcd.variance.sad16x16              = vp8_sad16x16_mmx; | 
| 200         cpi->rtcd.variance.sad16x8               = vp8_sad16x8_mmx; | 154         cpi->rtcd.variance.sad16x8               = vp8_sad16x8_mmx; | 
| 201         cpi->rtcd.variance.sad8x16               = vp8_sad8x16_mmx; | 155         cpi->rtcd.variance.sad8x16               = vp8_sad8x16_mmx; | 
| 202         cpi->rtcd.variance.sad8x8                = vp8_sad8x8_mmx; | 156         cpi->rtcd.variance.sad8x8                = vp8_sad8x8_mmx; | 
| 203         cpi->rtcd.variance.sad4x4                = vp8_sad4x4_mmx; | 157         cpi->rtcd.variance.sad4x4                = vp8_sad4x4_mmx; | 
| 204 | 158 | 
| 205         cpi->rtcd.variance.var4x4                = vp8_variance4x4_mmx; | 159         cpi->rtcd.variance.var4x4                = vp8_variance4x4_mmx; | 
| 206         cpi->rtcd.variance.var8x8                = vp8_variance8x8_mmx; | 160         cpi->rtcd.variance.var8x8                = vp8_variance8x8_mmx; | 
| 207         cpi->rtcd.variance.var8x16               = vp8_variance8x16_mmx; | 161         cpi->rtcd.variance.var8x16               = vp8_variance8x16_mmx; | 
| 208         cpi->rtcd.variance.var16x8               = vp8_variance16x8_mmx; | 162         cpi->rtcd.variance.var16x8               = vp8_variance16x8_mmx; | 
| 209         cpi->rtcd.variance.var16x16              = vp8_variance16x16_mmx; | 163         cpi->rtcd.variance.var16x16              = vp8_variance16x16_mmx; | 
| 210 | 164 | 
| 211         cpi->rtcd.variance.subpixvar4x4          = vp8_sub_pixel_variance4x4_mmx
     ; | 165         cpi->rtcd.variance.subpixvar4x4          = vp8_sub_pixel_variance4x4_mmx
     ; | 
| 212         cpi->rtcd.variance.subpixvar8x8          = vp8_sub_pixel_variance8x8_mmx
     ; | 166         cpi->rtcd.variance.subpixvar8x8          = vp8_sub_pixel_variance8x8_mmx
     ; | 
| 213         cpi->rtcd.variance.subpixvar8x16         = vp8_sub_pixel_variance8x16_mm
     x; | 167         cpi->rtcd.variance.subpixvar8x16         = vp8_sub_pixel_variance8x16_mm
     x; | 
| 214         cpi->rtcd.variance.subpixvar16x8         = vp8_sub_pixel_variance16x8_mm
     x; | 168         cpi->rtcd.variance.subpixvar16x8         = vp8_sub_pixel_variance16x8_mm
     x; | 
| 215         cpi->rtcd.variance.subpixvar16x16        = vp8_sub_pixel_variance16x16_m
     mx; | 169         cpi->rtcd.variance.subpixvar16x16        = vp8_sub_pixel_variance16x16_m
     mx; | 
| 216         cpi->rtcd.variance.halfpixvar16x16_h     = vp8_variance_halfpixvar16x16_
     h_mmx; | 170         cpi->rtcd.variance.halfpixvar16x16_h     = vp8_variance_halfpixvar16x16_
     h_mmx; | 
| 217         cpi->rtcd.variance.halfpixvar16x16_v     = vp8_variance_halfpixvar16x16_
     v_mmx; | 171         cpi->rtcd.variance.halfpixvar16x16_v     = vp8_variance_halfpixvar16x16_
     v_mmx; | 
| 218         cpi->rtcd.variance.halfpixvar16x16_hv    = vp8_variance_halfpixvar16x16_
     hv_mmx; | 172         cpi->rtcd.variance.halfpixvar16x16_hv    = vp8_variance_halfpixvar16x16_
     hv_mmx; | 
| 219         cpi->rtcd.variance.subpixmse16x16        = vp8_sub_pixel_mse16x16_mmx; | 173         cpi->rtcd.variance.subpixmse16x16        = vp8_sub_pixel_mse16x16_mmx; | 
| 220 | 174 | 
| 221         cpi->rtcd.variance.mse16x16              = vp8_mse16x16_mmx; | 175         cpi->rtcd.variance.mse16x16              = vp8_mse16x16_mmx; | 
| 222         cpi->rtcd.variance.getmbss               = vp8_get_mb_ss_mmx; | 176         cpi->rtcd.variance.getmbss               = vp8_get_mb_ss_mmx; | 
| 223 | 177 | 
| 224         cpi->rtcd.variance.get16x16prederror     = vp8_get16x16pred_error_mmx; |  | 
| 225         cpi->rtcd.variance.get8x8var             = vp8_get8x8var_mmx; |  | 
| 226         cpi->rtcd.variance.get16x16var           = vp8_get16x16var_mmx; |  | 
| 227         cpi->rtcd.variance.get4x4sse_cs          = vp8_get4x4sse_cs_mmx; | 178         cpi->rtcd.variance.get4x4sse_cs          = vp8_get4x4sse_cs_mmx; | 
| 228 | 179 | 
| 229         cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_mmx; | 180         cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_mmx; | 
| 230         cpi->rtcd.fdct.short8x4                  = short_fdct8x4_mmx; | 181         cpi->rtcd.fdct.short8x4                  = vp8_short_fdct8x4_mmx; | 
| 231         cpi->rtcd.fdct.fast4x4                   = vp8_short_fdct4x4_mmx; | 182         cpi->rtcd.fdct.fast4x4                   = vp8_short_fdct4x4_mmx; | 
| 232         cpi->rtcd.fdct.fast8x4                   = short_fdct8x4_mmx; | 183         cpi->rtcd.fdct.fast8x4                   = vp8_short_fdct8x4_mmx; | 
| 233 | 184 | 
| 234         cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_c; | 185         cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_c; | 
| 235 | 186 | 
| 236         cpi->rtcd.encodemb.berr                  = vp8_block_error_mmx; | 187         cpi->rtcd.encodemb.berr                  = vp8_block_error_mmx; | 
| 237         cpi->rtcd.encodemb.mberr                 = mbblock_error_mmx; | 188         cpi->rtcd.encodemb.mberr                 = vp8_mbblock_error_mmx; | 
| 238         cpi->rtcd.encodemb.mbuverr               = mbuverror_mmx; | 189         cpi->rtcd.encodemb.mbuverr               = vp8_mbuverror_mmx; | 
| 239         cpi->rtcd.encodemb.subb                  = subtract_b_mmx; | 190         cpi->rtcd.encodemb.subb                  = vp8_subtract_b_mmx; | 
| 240         cpi->rtcd.encodemb.submby                = vp8_subtract_mby_mmx; | 191         cpi->rtcd.encodemb.submby                = vp8_subtract_mby_mmx; | 
| 241         cpi->rtcd.encodemb.submbuv               = vp8_subtract_mbuv_mmx; | 192         cpi->rtcd.encodemb.submbuv               = vp8_subtract_mbuv_mmx; | 
| 242 | 193 | 
| 243         /*cpi->rtcd.quantize.fastquantb            = fast_quantize_b_mmx;*/ | 194         /*cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_mmx;*/ | 
| 244     } | 195     } | 
| 245 #endif | 196 #endif | 
| 246 | 197 | 
| 247 #if HAVE_SSE2 | 198 #if HAVE_SSE2 | 
| 248     if (wmt_enabled) | 199     if (flags & HAS_SSE2) | 
| 249     { | 200     { | 
| 250         cpi->rtcd.variance.sad16x16              = vp8_sad16x16_wmt; | 201         cpi->rtcd.variance.sad16x16              = vp8_sad16x16_wmt; | 
| 251         cpi->rtcd.variance.sad16x8               = vp8_sad16x8_wmt; | 202         cpi->rtcd.variance.sad16x8               = vp8_sad16x8_wmt; | 
| 252         cpi->rtcd.variance.sad8x16               = vp8_sad8x16_wmt; | 203         cpi->rtcd.variance.sad8x16               = vp8_sad8x16_wmt; | 
| 253         cpi->rtcd.variance.sad8x8                = vp8_sad8x8_wmt; | 204         cpi->rtcd.variance.sad8x8                = vp8_sad8x8_wmt; | 
| 254         cpi->rtcd.variance.sad4x4                = vp8_sad4x4_wmt; | 205         cpi->rtcd.variance.sad4x4                = vp8_sad4x4_wmt; | 
|  | 206         cpi->rtcd.variance.copy32xn              = vp8_copy32xn_sse2; | 
| 255 | 207 | 
| 256         cpi->rtcd.variance.var4x4                = vp8_variance4x4_wmt; | 208         cpi->rtcd.variance.var4x4                = vp8_variance4x4_wmt; | 
| 257         cpi->rtcd.variance.var8x8                = vp8_variance8x8_wmt; | 209         cpi->rtcd.variance.var8x8                = vp8_variance8x8_wmt; | 
| 258         cpi->rtcd.variance.var8x16               = vp8_variance8x16_wmt; | 210         cpi->rtcd.variance.var8x16               = vp8_variance8x16_wmt; | 
| 259         cpi->rtcd.variance.var16x8               = vp8_variance16x8_wmt; | 211         cpi->rtcd.variance.var16x8               = vp8_variance16x8_wmt; | 
| 260         cpi->rtcd.variance.var16x16              = vp8_variance16x16_wmt; | 212         cpi->rtcd.variance.var16x16              = vp8_variance16x16_wmt; | 
| 261 | 213 | 
| 262         cpi->rtcd.variance.subpixvar4x4          = vp8_sub_pixel_variance4x4_wmt
     ; | 214         cpi->rtcd.variance.subpixvar4x4          = vp8_sub_pixel_variance4x4_wmt
     ; | 
| 263         cpi->rtcd.variance.subpixvar8x8          = vp8_sub_pixel_variance8x8_wmt
     ; | 215         cpi->rtcd.variance.subpixvar8x8          = vp8_sub_pixel_variance8x8_wmt
     ; | 
| 264         cpi->rtcd.variance.subpixvar8x16         = vp8_sub_pixel_variance8x16_wm
     t; | 216         cpi->rtcd.variance.subpixvar8x16         = vp8_sub_pixel_variance8x16_wm
     t; | 
| 265         cpi->rtcd.variance.subpixvar16x8         = vp8_sub_pixel_variance16x8_wm
     t; | 217         cpi->rtcd.variance.subpixvar16x8         = vp8_sub_pixel_variance16x8_wm
     t; | 
| 266         cpi->rtcd.variance.subpixvar16x16        = vp8_sub_pixel_variance16x16_w
     mt; | 218         cpi->rtcd.variance.subpixvar16x16        = vp8_sub_pixel_variance16x16_w
     mt; | 
| 267         cpi->rtcd.variance.halfpixvar16x16_h     = vp8_variance_halfpixvar16x16_
     h_wmt; | 219         cpi->rtcd.variance.halfpixvar16x16_h     = vp8_variance_halfpixvar16x16_
     h_wmt; | 
| 268         cpi->rtcd.variance.halfpixvar16x16_v     = vp8_variance_halfpixvar16x16_
     v_wmt; | 220         cpi->rtcd.variance.halfpixvar16x16_v     = vp8_variance_halfpixvar16x16_
     v_wmt; | 
| 269         cpi->rtcd.variance.halfpixvar16x16_hv    = vp8_variance_halfpixvar16x16_
     hv_wmt; | 221         cpi->rtcd.variance.halfpixvar16x16_hv    = vp8_variance_halfpixvar16x16_
     hv_wmt; | 
| 270         cpi->rtcd.variance.subpixmse16x16        = vp8_sub_pixel_mse16x16_wmt; | 222         cpi->rtcd.variance.subpixmse16x16        = vp8_sub_pixel_mse16x16_wmt; | 
| 271 | 223 | 
| 272         cpi->rtcd.variance.mse16x16              = vp8_mse16x16_wmt; | 224         cpi->rtcd.variance.mse16x16              = vp8_mse16x16_wmt; | 
| 273         cpi->rtcd.variance.getmbss               = vp8_get_mb_ss_sse2; | 225         cpi->rtcd.variance.getmbss               = vp8_get_mb_ss_sse2; | 
| 274 | 226 | 
| 275         cpi->rtcd.variance.get16x16prederror     = vp8_get16x16pred_error_sse2; |  | 
| 276         cpi->rtcd.variance.get8x8var             = vp8_get8x8var_sse2; |  | 
| 277         cpi->rtcd.variance.get16x16var           = vp8_get16x16var_sse2; |  | 
| 278 |  | 
| 279 |  | 
| 280         /* cpi->rtcd.variance.get4x4sse_cs  not implemented for wmt */; | 227         /* cpi->rtcd.variance.get4x4sse_cs  not implemented for wmt */; | 
| 281 | 228 | 
| 282         cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_sse2; | 229         cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_sse2; | 
| 283         cpi->rtcd.fdct.short8x4                  = vp8_short_fdct8x4_sse2; | 230         cpi->rtcd.fdct.short8x4                  = vp8_short_fdct8x4_sse2; | 
| 284         cpi->rtcd.fdct.fast4x4                   = vp8_short_fdct4x4_sse2; | 231         cpi->rtcd.fdct.fast4x4                   = vp8_short_fdct4x4_sse2; | 
| 285         cpi->rtcd.fdct.fast8x4                   = vp8_short_fdct8x4_sse2; | 232         cpi->rtcd.fdct.fast8x4                   = vp8_short_fdct8x4_sse2; | 
| 286 | 233 | 
| 287         cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_sse2 ; | 234         cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_sse2 ; | 
| 288 | 235 | 
| 289         cpi->rtcd.encodemb.berr                  = vp8_block_error_xmm; | 236         cpi->rtcd.encodemb.berr                  = vp8_block_error_xmm; | 
| 290         cpi->rtcd.encodemb.mberr                 = mbblock_error_xmm; | 237         cpi->rtcd.encodemb.mberr                 = vp8_mbblock_error_xmm; | 
| 291         cpi->rtcd.encodemb.mbuverr               = mbuverror_xmm; | 238         cpi->rtcd.encodemb.mbuverr               = vp8_mbuverror_xmm; | 
| 292         cpi->rtcd.encodemb.subb                  = subtract_b_sse2; | 239         cpi->rtcd.encodemb.subb                  = vp8_subtract_b_sse2; | 
| 293         cpi->rtcd.encodemb.submby                = vp8_subtract_mby_sse2; | 240         cpi->rtcd.encodemb.submby                = vp8_subtract_mby_sse2; | 
| 294         cpi->rtcd.encodemb.submbuv               = vp8_subtract_mbuv_sse2; | 241         cpi->rtcd.encodemb.submbuv               = vp8_subtract_mbuv_sse2; | 
| 295 | 242 | 
| 296         cpi->rtcd.quantize.quantb                = vp8_regular_quantize_b_sse2; | 243         cpi->rtcd.quantize.quantb                = vp8_regular_quantize_b_sse2; | 
| 297         cpi->rtcd.quantize.fastquantb            = fast_quantize_b_sse2; | 244         cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_sse2; | 
| 298 | 245 | 
| 299 #if !(CONFIG_REALTIME_ONLY) | 246 #if !(CONFIG_REALTIME_ONLY) | 
| 300         cpi->rtcd.temporal.apply                 = vp8_temporal_filter_apply_sse
     2; | 247         cpi->rtcd.temporal.apply                 = vp8_temporal_filter_apply_sse
     2; | 
| 301 #endif | 248 #endif | 
| 302     } | 249     } | 
| 303 #endif | 250 #endif | 
| 304 | 251 | 
| 305 #if HAVE_SSE3 | 252 #if HAVE_SSE3 | 
| 306     if (SSE3Enabled) | 253     if (flags & HAS_SSE3) | 
| 307     { | 254     { | 
| 308         cpi->rtcd.variance.sad16x16              = vp8_sad16x16_sse3; | 255         cpi->rtcd.variance.sad16x16              = vp8_sad16x16_sse3; | 
| 309         cpi->rtcd.variance.sad16x16x3            = vp8_sad16x16x3_sse3; | 256         cpi->rtcd.variance.sad16x16x3            = vp8_sad16x16x3_sse3; | 
| 310         cpi->rtcd.variance.sad16x8x3             = vp8_sad16x8x3_sse3; | 257         cpi->rtcd.variance.sad16x8x3             = vp8_sad16x8x3_sse3; | 
| 311         cpi->rtcd.variance.sad8x16x3             = vp8_sad8x16x3_sse3; | 258         cpi->rtcd.variance.sad8x16x3             = vp8_sad8x16x3_sse3; | 
| 312         cpi->rtcd.variance.sad8x8x3              = vp8_sad8x8x3_sse3; | 259         cpi->rtcd.variance.sad8x8x3              = vp8_sad8x8x3_sse3; | 
| 313         cpi->rtcd.variance.sad4x4x3              = vp8_sad4x4x3_sse3; | 260         cpi->rtcd.variance.sad4x4x3              = vp8_sad4x4x3_sse3; | 
| 314 #if !(CONFIG_REALTIME_ONLY) |  | 
| 315         cpi->rtcd.search.full_search             = vp8_full_search_sadx3; | 261         cpi->rtcd.search.full_search             = vp8_full_search_sadx3; | 
| 316 #endif |  | 
| 317         cpi->rtcd.variance.sad16x16x4d           = vp8_sad16x16x4d_sse3; | 262         cpi->rtcd.variance.sad16x16x4d           = vp8_sad16x16x4d_sse3; | 
| 318         cpi->rtcd.variance.sad16x8x4d            = vp8_sad16x8x4d_sse3; | 263         cpi->rtcd.variance.sad16x8x4d            = vp8_sad16x8x4d_sse3; | 
| 319         cpi->rtcd.variance.sad8x16x4d            = vp8_sad8x16x4d_sse3; | 264         cpi->rtcd.variance.sad8x16x4d            = vp8_sad8x16x4d_sse3; | 
| 320         cpi->rtcd.variance.sad8x8x4d             = vp8_sad8x8x4d_sse3; | 265         cpi->rtcd.variance.sad8x8x4d             = vp8_sad8x8x4d_sse3; | 
| 321         cpi->rtcd.variance.sad4x4x4d             = vp8_sad4x4x4d_sse3; | 266         cpi->rtcd.variance.sad4x4x4d             = vp8_sad4x4x4d_sse3; | 
|  | 267         cpi->rtcd.variance.copy32xn              = vp8_copy32xn_sse3; | 
| 322         cpi->rtcd.search.diamond_search          = vp8_diamond_search_sadx4; | 268         cpi->rtcd.search.diamond_search          = vp8_diamond_search_sadx4; | 
|  | 269         cpi->rtcd.search.refining_search         = vp8_refining_search_sadx4; | 
| 323     } | 270     } | 
| 324 #endif | 271 #endif | 
| 325 | 272 | 
| 326 #if HAVE_SSSE3 | 273 #if HAVE_SSSE3 | 
| 327     if (SSSE3Enabled) | 274     if (flags & HAS_SSSE3) | 
| 328     { | 275     { | 
| 329         cpi->rtcd.variance.sad16x16x3            = vp8_sad16x16x3_ssse3; | 276         cpi->rtcd.variance.sad16x16x3            = vp8_sad16x16x3_ssse3; | 
| 330         cpi->rtcd.variance.sad16x8x3             = vp8_sad16x8x3_ssse3; | 277         cpi->rtcd.variance.sad16x8x3             = vp8_sad16x8x3_ssse3; | 
| 331 | 278 | 
| 332         cpi->rtcd.variance.subpixvar16x8         = vp8_sub_pixel_variance16x8_ss
     se3; | 279         cpi->rtcd.variance.subpixvar16x8         = vp8_sub_pixel_variance16x8_ss
     se3; | 
| 333         cpi->rtcd.variance.subpixvar16x16        = vp8_sub_pixel_variance16x16_s
     sse3; | 280         cpi->rtcd.variance.subpixvar16x16        = vp8_sub_pixel_variance16x16_s
     sse3; | 
| 334 | 281 | 
| 335         cpi->rtcd.quantize.fastquantb            = fast_quantize_b_ssse3; | 282         cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_ssse3; | 
| 336 | 283 | 
| 337 #if CONFIG_PSNR | 284 #if CONFIG_INTERNAL_STATS | 
| 338 #if ARCH_X86_64 | 285 #if ARCH_X86_64 | 
| 339         cpi->rtcd.variance.ssimpf_8x8            = vp8_ssim_parms_8x8_sse3; | 286         cpi->rtcd.variance.ssimpf_8x8            = vp8_ssim_parms_8x8_sse3; | 
| 340         cpi->rtcd.variance.ssimpf                = vp8_ssim_parms_16x16_sse3; | 287         cpi->rtcd.variance.ssimpf                = vp8_ssim_parms_16x16_sse3; | 
| 341 #endif | 288 #endif | 
| 342 #endif | 289 #endif | 
| 343 | 290 | 
| 344     } | 291     } | 
| 345 #endif | 292 #endif | 
| 346 | 293 | 
| 347 | 294 | 
| 348 | 295 | 
| 349 #if HAVE_SSE4_1 | 296 #if HAVE_SSE4_1 | 
| 350     if (SSE4_1Enabled) | 297     if (flags & HAS_SSE4_1) | 
| 351     { | 298     { | 
| 352         cpi->rtcd.variance.sad16x16x8            = vp8_sad16x16x8_sse4; | 299         cpi->rtcd.variance.sad16x16x8            = vp8_sad16x16x8_sse4; | 
| 353         cpi->rtcd.variance.sad16x8x8             = vp8_sad16x8x8_sse4; | 300         cpi->rtcd.variance.sad16x8x8             = vp8_sad16x8x8_sse4; | 
| 354         cpi->rtcd.variance.sad8x16x8             = vp8_sad8x16x8_sse4; | 301         cpi->rtcd.variance.sad8x16x8             = vp8_sad8x16x8_sse4; | 
| 355         cpi->rtcd.variance.sad8x8x8              = vp8_sad8x8x8_sse4; | 302         cpi->rtcd.variance.sad8x8x8              = vp8_sad8x8x8_sse4; | 
| 356         cpi->rtcd.variance.sad4x4x8              = vp8_sad4x4x8_sse4; | 303         cpi->rtcd.variance.sad4x4x8              = vp8_sad4x4x8_sse4; | 
| 357 #if !(CONFIG_REALTIME_ONLY) |  | 
| 358         cpi->rtcd.search.full_search             = vp8_full_search_sadx8; | 304         cpi->rtcd.search.full_search             = vp8_full_search_sadx8; | 
| 359 #endif | 305 | 
|  | 306         cpi->rtcd.quantize.quantb                = vp8_regular_quantize_b_sse4; | 
| 360     } | 307     } | 
| 361 #endif | 308 #endif | 
| 362 | 309 | 
| 363 #endif | 310 #endif | 
| 364 } | 311 } | 
| OLD | NEW | 
|---|