| Index: source/config/win/x64/vp9_rtcd.h | 
| diff --git a/source/config/win/x64/vp9_rtcd.h b/source/config/win/x64/vp9_rtcd.h | 
| index f8a3381eb11af5deb4b4064c8bbe7301de0b4e19..10a6b8401db45851ee7d8b22345368f85a751831 100644 | 
| --- a/source/config/win/x64/vp9_rtcd.h | 
| +++ b/source/config/win/x64/vp9_rtcd.h | 
| @@ -42,6 +42,10 @@ int64_t vp9_block_error_sse2(const tran_low_t *coeff, const tran_low_t *dqcoeff, | 
| int64_t vp9_block_error_avx2(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); | 
| RTCD_EXTERN int64_t (*vp9_block_error)(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); | 
|  | 
| +int64_t vp9_block_error_fp_c(const int16_t *coeff, const int16_t *dqcoeff, int block_size); | 
| +int64_t vp9_block_error_fp_sse2(const int16_t *coeff, const int16_t *dqcoeff, int block_size); | 
| +#define vp9_block_error_fp vp9_block_error_fp_sse2 | 
| + | 
| void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); | 
| void vp9_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); | 
| void vp9_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); | 
| @@ -171,28 +175,36 @@ void vp9_d63_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t | 
| RTCD_EXTERN void (*vp9_d63_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); | 
|  | 
| void vp9_dc_128_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); | 
| -#define vp9_dc_128_predictor_16x16 vp9_dc_128_predictor_16x16_c | 
| +void vp9_dc_128_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); | 
| +#define vp9_dc_128_predictor_16x16 vp9_dc_128_predictor_16x16_sse2 | 
|  | 
| void vp9_dc_128_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); | 
| -#define vp9_dc_128_predictor_32x32 vp9_dc_128_predictor_32x32_c | 
| +void vp9_dc_128_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); | 
| +#define vp9_dc_128_predictor_32x32 vp9_dc_128_predictor_32x32_sse2 | 
|  | 
| void vp9_dc_128_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); | 
| -#define vp9_dc_128_predictor_4x4 vp9_dc_128_predictor_4x4_c | 
| +void vp9_dc_128_predictor_4x4_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); | 
| +#define vp9_dc_128_predictor_4x4 vp9_dc_128_predictor_4x4_sse | 
|  | 
| void vp9_dc_128_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); | 
| -#define vp9_dc_128_predictor_8x8 vp9_dc_128_predictor_8x8_c | 
| +void vp9_dc_128_predictor_8x8_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); | 
| +#define vp9_dc_128_predictor_8x8 vp9_dc_128_predictor_8x8_sse | 
|  | 
| void vp9_dc_left_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); | 
| -#define vp9_dc_left_predictor_16x16 vp9_dc_left_predictor_16x16_c | 
| +void vp9_dc_left_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); | 
| +#define vp9_dc_left_predictor_16x16 vp9_dc_left_predictor_16x16_sse2 | 
|  | 
| void vp9_dc_left_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); | 
| -#define vp9_dc_left_predictor_32x32 vp9_dc_left_predictor_32x32_c | 
| +void vp9_dc_left_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); | 
| +#define vp9_dc_left_predictor_32x32 vp9_dc_left_predictor_32x32_sse2 | 
|  | 
| void vp9_dc_left_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); | 
| -#define vp9_dc_left_predictor_4x4 vp9_dc_left_predictor_4x4_c | 
| +void vp9_dc_left_predictor_4x4_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); | 
| +#define vp9_dc_left_predictor_4x4 vp9_dc_left_predictor_4x4_sse | 
|  | 
| void vp9_dc_left_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); | 
| -#define vp9_dc_left_predictor_8x8 vp9_dc_left_predictor_8x8_c | 
| +void vp9_dc_left_predictor_8x8_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); | 
| +#define vp9_dc_left_predictor_8x8 vp9_dc_left_predictor_8x8_sse | 
|  | 
| void vp9_dc_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); | 
| void vp9_dc_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); | 
| @@ -211,16 +223,20 @@ void vp9_dc_predictor_8x8_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *a | 
| #define vp9_dc_predictor_8x8 vp9_dc_predictor_8x8_sse | 
|  | 
| void vp9_dc_top_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); | 
| -#define vp9_dc_top_predictor_16x16 vp9_dc_top_predictor_16x16_c | 
| +void vp9_dc_top_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); | 
| +#define vp9_dc_top_predictor_16x16 vp9_dc_top_predictor_16x16_sse2 | 
|  | 
| void vp9_dc_top_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); | 
| -#define vp9_dc_top_predictor_32x32 vp9_dc_top_predictor_32x32_c | 
| +void vp9_dc_top_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); | 
| +#define vp9_dc_top_predictor_32x32 vp9_dc_top_predictor_32x32_sse2 | 
|  | 
| void vp9_dc_top_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); | 
| -#define vp9_dc_top_predictor_4x4 vp9_dc_top_predictor_4x4_c | 
| +void vp9_dc_top_predictor_4x4_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); | 
| +#define vp9_dc_top_predictor_4x4 vp9_dc_top_predictor_4x4_sse | 
|  | 
| void vp9_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); | 
| -#define vp9_dc_top_predictor_8x8 vp9_dc_top_predictor_8x8_c | 
| +void vp9_dc_top_predictor_8x8_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); | 
| +#define vp9_dc_top_predictor_8x8 vp9_dc_top_predictor_8x8_sse | 
|  | 
| int vp9_denoiser_filter_c(const uint8_t *sig, int sig_stride, const uint8_t *mc_avg, int mc_avg_stride, uint8_t *avg, int avg_stride, int increase_denoising, BLOCK_SIZE bs, int motion_magnitude); | 
| int vp9_denoiser_filter_sse2(const uint8_t *sig, int sig_stride, const uint8_t *mc_avg, int mc_avg_stride, uint8_t *avg, int avg_stride, int increase_denoising, BLOCK_SIZE bs, int motion_magnitude); | 
| @@ -334,10 +350,18 @@ void vp9_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov | 
| void vp9_h_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); | 
| RTCD_EXTERN void (*vp9_h_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); | 
|  | 
| +void vp9_hadamard_16x16_c(int16_t const *src_diff, int src_stride, int16_t *coeff); | 
| +void vp9_hadamard_16x16_sse2(int16_t const *src_diff, int src_stride, int16_t *coeff); | 
| +#define vp9_hadamard_16x16 vp9_hadamard_16x16_sse2 | 
| + | 
| +void vp9_hadamard_8x8_c(int16_t const *src_diff, int src_stride, int16_t *coeff); | 
| +void vp9_hadamard_8x8_sse2(int16_t const *src_diff, int src_stride, int16_t *coeff); | 
| +void vp9_hadamard_8x8_ssse3(int16_t const *src_diff, int src_stride, int16_t *coeff); | 
| +RTCD_EXTERN void (*vp9_hadamard_8x8)(int16_t const *src_diff, int src_stride, int16_t *coeff); | 
| + | 
| void vp9_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); | 
| void vp9_idct16x16_10_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); | 
| -void vp9_idct16x16_10_add_ssse3(const tran_low_t *input, uint8_t *dest, int dest_stride); | 
| -RTCD_EXTERN void (*vp9_idct16x16_10_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); | 
| +#define vp9_idct16x16_10_add vp9_idct16x16_10_add_sse2 | 
|  | 
| void vp9_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); | 
| void vp9_idct16x16_1_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); | 
| @@ -345,8 +369,7 @@ void vp9_idct16x16_1_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_s | 
|  | 
| void vp9_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); | 
| void vp9_idct16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); | 
| -void vp9_idct16x16_256_add_ssse3(const tran_low_t *input, uint8_t *dest, int dest_stride); | 
| -RTCD_EXTERN void (*vp9_idct16x16_256_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); | 
| +#define vp9_idct16x16_256_add vp9_idct16x16_256_add_sse2 | 
|  | 
| void vp9_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); | 
| void vp9_idct32x32_1024_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); | 
| @@ -461,6 +484,10 @@ void vp9_mbpost_proc_down_c(uint8_t *dst, int pitch, int rows, int cols, int fli | 
| void vp9_mbpost_proc_down_xmm(uint8_t *dst, int pitch, int rows, int cols, int flimit); | 
| #define vp9_mbpost_proc_down vp9_mbpost_proc_down_xmm | 
|  | 
| +void vp9_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max); | 
| +void vp9_minmax_8x8_sse2(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max); | 
| +#define vp9_minmax_8x8 vp9_minmax_8x8_sse2 | 
| + | 
| unsigned int vp9_mse16x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse); | 
| unsigned int vp9_mse16x16_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse); | 
| unsigned int vp9_mse16x16_avx2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse); | 
| @@ -504,228 +531,9 @@ void vp9_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int | 
| void vp9_quantize_fp_32x32_ssse3(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); | 
| RTCD_EXTERN void (*vp9_quantize_fp_32x32)(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); | 
|  | 
| -unsigned int vp9_sad16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride); | 
| -unsigned int vp9_sad16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride); | 
| -#define vp9_sad16x16 vp9_sad16x16_sse2 | 
| - | 
| -unsigned int vp9_sad16x16_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred); | 
| -unsigned int vp9_sad16x16_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred); | 
| -#define vp9_sad16x16_avg vp9_sad16x16_avg_sse2 | 
| - | 
| -void vp9_sad16x16x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array); | 
| -void vp9_sad16x16x3_sse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array); | 
| -void vp9_sad16x16x3_ssse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array); | 
| -RTCD_EXTERN void (*vp9_sad16x16x3)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array); | 
| - | 
| -void vp9_sad16x16x4d_c(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array); | 
| -void vp9_sad16x16x4d_sse2(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array); | 
| -#define vp9_sad16x16x4d vp9_sad16x16x4d_sse2 | 
| - | 
| -void vp9_sad16x16x8_c(const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array); | 
| -#define vp9_sad16x16x8 vp9_sad16x16x8_c | 
| - | 
| -unsigned int vp9_sad16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); | 
| -unsigned int vp9_sad16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); | 
| -#define vp9_sad16x32 vp9_sad16x32_sse2 | 
| - | 
| -unsigned int vp9_sad16x32_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); | 
| -unsigned int vp9_sad16x32_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); | 
| -#define vp9_sad16x32_avg vp9_sad16x32_avg_sse2 | 
| - | 
| -void vp9_sad16x32x4d_c(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array); | 
| -void vp9_sad16x32x4d_sse2(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array); | 
| -#define vp9_sad16x32x4d vp9_sad16x32x4d_sse2 | 
| - | 
| -unsigned int vp9_sad16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride); | 
| -unsigned int vp9_sad16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride); | 
| -#define vp9_sad16x8 vp9_sad16x8_sse2 | 
| - | 
| -unsigned int vp9_sad16x8_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred); | 
| -unsigned int vp9_sad16x8_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred); | 
| -#define vp9_sad16x8_avg vp9_sad16x8_avg_sse2 | 
| - | 
| -void vp9_sad16x8x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array); | 
| -void vp9_sad16x8x3_sse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array); | 
| -void vp9_sad16x8x3_ssse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array); | 
| -RTCD_EXTERN void (*vp9_sad16x8x3)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array); | 
| - | 
| -void vp9_sad16x8x4d_c(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array); | 
| -void vp9_sad16x8x4d_sse2(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array); | 
| -#define vp9_sad16x8x4d vp9_sad16x8x4d_sse2 | 
| - | 
| -void vp9_sad16x8x8_c(const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array); | 
| -#define vp9_sad16x8x8 vp9_sad16x8x8_c | 
| - | 
| -unsigned int vp9_sad32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); | 
| -unsigned int vp9_sad32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); | 
| -unsigned int vp9_sad32x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); | 
| -RTCD_EXTERN unsigned int (*vp9_sad32x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); | 
| - | 
| -unsigned int vp9_sad32x16_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); | 
| -unsigned int vp9_sad32x16_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); | 
| -unsigned int vp9_sad32x16_avg_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); | 
| -RTCD_EXTERN unsigned int (*vp9_sad32x16_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); | 
| - | 
| -void vp9_sad32x16x4d_c(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array); | 
| -void vp9_sad32x16x4d_sse2(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array); | 
| -#define vp9_sad32x16x4d vp9_sad32x16x4d_sse2 | 
| - | 
| -unsigned int vp9_sad32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride); | 
| -unsigned int vp9_sad32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride); | 
| -unsigned int vp9_sad32x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride); | 
| -RTCD_EXTERN unsigned int (*vp9_sad32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride); | 
| - | 
| -unsigned int vp9_sad32x32_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred); | 
| -unsigned int vp9_sad32x32_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred); | 
| -unsigned int vp9_sad32x32_avg_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred); | 
| -RTCD_EXTERN unsigned int (*vp9_sad32x32_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred); | 
| - | 
| -void vp9_sad32x32x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array); | 
| -#define vp9_sad32x32x3 vp9_sad32x32x3_c | 
| - | 
| -void vp9_sad32x32x4d_c(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array); | 
| -void vp9_sad32x32x4d_sse2(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array); | 
| -void vp9_sad32x32x4d_avx2(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array); | 
| -RTCD_EXTERN void (*vp9_sad32x32x4d)(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array); | 
| - | 
| -void vp9_sad32x32x8_c(const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array); | 
| -#define vp9_sad32x32x8 vp9_sad32x32x8_c | 
| - | 
| -unsigned int vp9_sad32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); | 
| -unsigned int vp9_sad32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); | 
| -unsigned int vp9_sad32x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); | 
| -RTCD_EXTERN unsigned int (*vp9_sad32x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); | 
| - | 
| -unsigned int vp9_sad32x64_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); | 
| -unsigned int vp9_sad32x64_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); | 
| -unsigned int vp9_sad32x64_avg_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); | 
| -RTCD_EXTERN unsigned int (*vp9_sad32x64_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); | 
| - | 
| -void vp9_sad32x64x4d_c(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array); | 
| -void vp9_sad32x64x4d_sse2(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array); | 
| -#define vp9_sad32x64x4d vp9_sad32x64x4d_sse2 | 
| - | 
| -unsigned int vp9_sad4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride); | 
| -unsigned int vp9_sad4x4_sse(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride); | 
| -#define vp9_sad4x4 vp9_sad4x4_sse | 
| - | 
| -unsigned int vp9_sad4x4_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred); | 
| -unsigned int vp9_sad4x4_avg_sse(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred); | 
| -#define vp9_sad4x4_avg vp9_sad4x4_avg_sse | 
| - | 
| -void vp9_sad4x4x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array); | 
| -void vp9_sad4x4x3_sse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array); | 
| -RTCD_EXTERN void (*vp9_sad4x4x3)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array); | 
| - | 
| -void vp9_sad4x4x4d_c(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array); | 
| -void vp9_sad4x4x4d_sse(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array); | 
| -#define vp9_sad4x4x4d vp9_sad4x4x4d_sse | 
| - | 
| -void vp9_sad4x4x8_c(const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array); | 
| -#define vp9_sad4x4x8 vp9_sad4x4x8_c | 
| - | 
| -unsigned int vp9_sad4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); | 
| -unsigned int vp9_sad4x8_sse(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); | 
| -#define vp9_sad4x8 vp9_sad4x8_sse | 
| - | 
| -unsigned int vp9_sad4x8_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); | 
| -unsigned int vp9_sad4x8_avg_sse(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); | 
| -#define vp9_sad4x8_avg vp9_sad4x8_avg_sse | 
| - | 
| -void vp9_sad4x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); | 
| -void vp9_sad4x8x4d_sse(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); | 
| -#define vp9_sad4x8x4d vp9_sad4x8x4d_sse | 
| - | 
| -void vp9_sad4x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); | 
| -#define vp9_sad4x8x8 vp9_sad4x8x8_c | 
| - | 
| -unsigned int vp9_sad64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); | 
| -unsigned int vp9_sad64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); | 
| -unsigned int vp9_sad64x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); | 
| -RTCD_EXTERN unsigned int (*vp9_sad64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); | 
| - | 
| -unsigned int vp9_sad64x32_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); | 
| -unsigned int vp9_sad64x32_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); | 
| -unsigned int vp9_sad64x32_avg_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); | 
| -RTCD_EXTERN unsigned int (*vp9_sad64x32_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); | 
| - | 
| -void vp9_sad64x32x4d_c(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array); | 
| -void vp9_sad64x32x4d_sse2(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array); | 
| -#define vp9_sad64x32x4d vp9_sad64x32x4d_sse2 | 
| - | 
| -unsigned int vp9_sad64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride); | 
| -unsigned int vp9_sad64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride); | 
| -unsigned int vp9_sad64x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride); | 
| -RTCD_EXTERN unsigned int (*vp9_sad64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride); | 
| - | 
| -unsigned int vp9_sad64x64_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred); | 
| -unsigned int vp9_sad64x64_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred); | 
| -unsigned int vp9_sad64x64_avg_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred); | 
| -RTCD_EXTERN unsigned int (*vp9_sad64x64_avg)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred); | 
| - | 
| -void vp9_sad64x64x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array); | 
| -#define vp9_sad64x64x3 vp9_sad64x64x3_c | 
| - | 
| -void vp9_sad64x64x4d_c(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array); | 
| -void vp9_sad64x64x4d_sse2(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array); | 
| -void vp9_sad64x64x4d_avx2(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array); | 
| -RTCD_EXTERN void (*vp9_sad64x64x4d)(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array); | 
| - | 
| -void vp9_sad64x64x8_c(const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array); | 
| -#define vp9_sad64x64x8 vp9_sad64x64x8_c | 
| - | 
| -unsigned int vp9_sad8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride); | 
| -unsigned int vp9_sad8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride); | 
| -#define vp9_sad8x16 vp9_sad8x16_sse2 | 
| - | 
| -unsigned int vp9_sad8x16_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred); | 
| -unsigned int vp9_sad8x16_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred); | 
| -#define vp9_sad8x16_avg vp9_sad8x16_avg_sse2 | 
| - | 
| -void vp9_sad8x16x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array); | 
| -void vp9_sad8x16x3_sse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array); | 
| -RTCD_EXTERN void (*vp9_sad8x16x3)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array); | 
| - | 
| -void vp9_sad8x16x4d_c(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array); | 
| -void vp9_sad8x16x4d_sse2(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array); | 
| -#define vp9_sad8x16x4d vp9_sad8x16x4d_sse2 | 
| - | 
| -void vp9_sad8x16x8_c(const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array); | 
| -#define vp9_sad8x16x8 vp9_sad8x16x8_c | 
| - | 
| -unsigned int vp9_sad8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); | 
| -unsigned int vp9_sad8x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride); | 
| -#define vp9_sad8x4 vp9_sad8x4_sse2 | 
| - | 
| -unsigned int vp9_sad8x4_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); | 
| -unsigned int vp9_sad8x4_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); | 
| -#define vp9_sad8x4_avg vp9_sad8x4_avg_sse2 | 
| - | 
| -void vp9_sad8x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); | 
| -void vp9_sad8x4x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); | 
| -#define vp9_sad8x4x4d vp9_sad8x4x4d_sse2 | 
| - | 
| -void vp9_sad8x4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); | 
| -#define vp9_sad8x4x8 vp9_sad8x4x8_c | 
| - | 
| -unsigned int vp9_sad8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride); | 
| -unsigned int vp9_sad8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride); | 
| -#define vp9_sad8x8 vp9_sad8x8_sse2 | 
| - | 
| -unsigned int vp9_sad8x8_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred); | 
| -unsigned int vp9_sad8x8_avg_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred); | 
| -#define vp9_sad8x8_avg vp9_sad8x8_avg_sse2 | 
| - | 
| -void vp9_sad8x8x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array); | 
| -void vp9_sad8x8x3_sse3(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array); | 
| -RTCD_EXTERN void (*vp9_sad8x8x3)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array); | 
| - | 
| -void vp9_sad8x8x4d_c(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array); | 
| -void vp9_sad8x8x4d_sse2(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array); | 
| -#define vp9_sad8x8x4d vp9_sad8x8x4d_sse2 | 
| - | 
| -void vp9_sad8x8x8_c(const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array); | 
| -#define vp9_sad8x8x8 vp9_sad8x8x8_c | 
| +int16_t vp9_satd_c(const int16_t *coeff, int length); | 
| +int16_t vp9_satd_sse2(const int16_t *coeff, int length); | 
| +#define vp9_satd vp9_satd_sse2 | 
|  | 
| unsigned int vp9_sub_pixel_avg_variance16x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); | 
| unsigned int vp9_sub_pixel_avg_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); | 
| @@ -1040,10 +848,8 @@ static void setup_rtcd_internal(void) | 
| if (flags & HAS_SSSE3) vp9_h_predictor_4x4 = vp9_h_predictor_4x4_ssse3; | 
| vp9_h_predictor_8x8 = vp9_h_predictor_8x8_c; | 
| if (flags & HAS_SSSE3) vp9_h_predictor_8x8 = vp9_h_predictor_8x8_ssse3; | 
| -    vp9_idct16x16_10_add = vp9_idct16x16_10_add_sse2; | 
| -    if (flags & HAS_SSSE3) vp9_idct16x16_10_add = vp9_idct16x16_10_add_ssse3; | 
| -    vp9_idct16x16_256_add = vp9_idct16x16_256_add_sse2; | 
| -    if (flags & HAS_SSSE3) vp9_idct16x16_256_add = vp9_idct16x16_256_add_ssse3; | 
| +    vp9_hadamard_8x8 = vp9_hadamard_8x8_sse2; | 
| +    if (flags & HAS_SSSE3) vp9_hadamard_8x8 = vp9_hadamard_8x8_ssse3; | 
| vp9_idct8x8_12_add = vp9_idct8x8_12_add_sse2; | 
| if (flags & HAS_SSSE3) vp9_idct8x8_12_add = vp9_idct8x8_12_add_ssse3; | 
| vp9_idct8x8_64_add = vp9_idct8x8_64_add_sse2; | 
| @@ -1060,42 +866,6 @@ static void setup_rtcd_internal(void) | 
| if (flags & HAS_SSSE3) vp9_quantize_fp = vp9_quantize_fp_ssse3; | 
| vp9_quantize_fp_32x32 = vp9_quantize_fp_32x32_c; | 
| if (flags & HAS_SSSE3) vp9_quantize_fp_32x32 = vp9_quantize_fp_32x32_ssse3; | 
| -    vp9_sad16x16x3 = vp9_sad16x16x3_c; | 
| -    if (flags & HAS_SSE3) vp9_sad16x16x3 = vp9_sad16x16x3_sse3; | 
| -    if (flags & HAS_SSSE3) vp9_sad16x16x3 = vp9_sad16x16x3_ssse3; | 
| -    vp9_sad16x8x3 = vp9_sad16x8x3_c; | 
| -    if (flags & HAS_SSE3) vp9_sad16x8x3 = vp9_sad16x8x3_sse3; | 
| -    if (flags & HAS_SSSE3) vp9_sad16x8x3 = vp9_sad16x8x3_ssse3; | 
| -    vp9_sad32x16 = vp9_sad32x16_sse2; | 
| -    if (flags & HAS_AVX2) vp9_sad32x16 = vp9_sad32x16_avx2; | 
| -    vp9_sad32x16_avg = vp9_sad32x16_avg_sse2; | 
| -    if (flags & HAS_AVX2) vp9_sad32x16_avg = vp9_sad32x16_avg_avx2; | 
| -    vp9_sad32x32 = vp9_sad32x32_sse2; | 
| -    if (flags & HAS_AVX2) vp9_sad32x32 = vp9_sad32x32_avx2; | 
| -    vp9_sad32x32_avg = vp9_sad32x32_avg_sse2; | 
| -    if (flags & HAS_AVX2) vp9_sad32x32_avg = vp9_sad32x32_avg_avx2; | 
| -    vp9_sad32x32x4d = vp9_sad32x32x4d_sse2; | 
| -    if (flags & HAS_AVX2) vp9_sad32x32x4d = vp9_sad32x32x4d_avx2; | 
| -    vp9_sad32x64 = vp9_sad32x64_sse2; | 
| -    if (flags & HAS_AVX2) vp9_sad32x64 = vp9_sad32x64_avx2; | 
| -    vp9_sad32x64_avg = vp9_sad32x64_avg_sse2; | 
| -    if (flags & HAS_AVX2) vp9_sad32x64_avg = vp9_sad32x64_avg_avx2; | 
| -    vp9_sad4x4x3 = vp9_sad4x4x3_c; | 
| -    if (flags & HAS_SSE3) vp9_sad4x4x3 = vp9_sad4x4x3_sse3; | 
| -    vp9_sad64x32 = vp9_sad64x32_sse2; | 
| -    if (flags & HAS_AVX2) vp9_sad64x32 = vp9_sad64x32_avx2; | 
| -    vp9_sad64x32_avg = vp9_sad64x32_avg_sse2; | 
| -    if (flags & HAS_AVX2) vp9_sad64x32_avg = vp9_sad64x32_avg_avx2; | 
| -    vp9_sad64x64 = vp9_sad64x64_sse2; | 
| -    if (flags & HAS_AVX2) vp9_sad64x64 = vp9_sad64x64_avx2; | 
| -    vp9_sad64x64_avg = vp9_sad64x64_avg_sse2; | 
| -    if (flags & HAS_AVX2) vp9_sad64x64_avg = vp9_sad64x64_avg_avx2; | 
| -    vp9_sad64x64x4d = vp9_sad64x64x4d_sse2; | 
| -    if (flags & HAS_AVX2) vp9_sad64x64x4d = vp9_sad64x64x4d_avx2; | 
| -    vp9_sad8x16x3 = vp9_sad8x16x3_c; | 
| -    if (flags & HAS_SSE3) vp9_sad8x16x3 = vp9_sad8x16x3_sse3; | 
| -    vp9_sad8x8x3 = vp9_sad8x8x3_c; | 
| -    if (flags & HAS_SSE3) vp9_sad8x8x3 = vp9_sad8x8x3_sse3; | 
| vp9_sub_pixel_avg_variance16x16 = vp9_sub_pixel_avg_variance16x16_sse2; | 
| if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance16x16 = vp9_sub_pixel_avg_variance16x16_ssse3; | 
| vp9_sub_pixel_avg_variance16x32 = vp9_sub_pixel_avg_variance16x32_sse2; | 
|  |