Index: source/config/mac/x64/vp9_rtcd.h |
=================================================================== |
--- source/config/mac/x64/vp9_rtcd.h (revision 263116) |
+++ source/config/mac/x64/vp9_rtcd.h (working copy) |
@@ -45,6 +45,7 @@ |
void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); |
void vp9_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); |
void vp9_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); |
+void vp9_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); |
RTCD_EXTERN void (*vp9_convolve8)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); |
void vp9_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); |
@@ -65,11 +66,13 @@ |
void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); |
void vp9_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); |
void vp9_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); |
+void vp9_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); |
RTCD_EXTERN void (*vp9_convolve8_horiz)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); |
void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); |
void vp9_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); |
void vp9_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); |
+void vp9_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); |
RTCD_EXTERN void (*vp9_convolve8_vert)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); |
void vp9_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); |
@@ -225,35 +228,43 @@ |
void vp9_fdct16x16_c(const int16_t *input, int16_t *output, int stride); |
void vp9_fdct16x16_sse2(const int16_t *input, int16_t *output, int stride); |
-#define vp9_fdct16x16 vp9_fdct16x16_sse2 |
+void vp9_fdct16x16_avx2(const int16_t *input, int16_t *output, int stride); |
+RTCD_EXTERN void (*vp9_fdct16x16)(const int16_t *input, int16_t *output, int stride); |
void vp9_fdct32x32_c(const int16_t *input, int16_t *output, int stride); |
void vp9_fdct32x32_sse2(const int16_t *input, int16_t *output, int stride); |
-#define vp9_fdct32x32 vp9_fdct32x32_sse2 |
+void vp9_fdct32x32_avx2(const int16_t *input, int16_t *output, int stride); |
+RTCD_EXTERN void (*vp9_fdct32x32)(const int16_t *input, int16_t *output, int stride); |
void vp9_fdct32x32_rd_c(const int16_t *input, int16_t *output, int stride); |
void vp9_fdct32x32_rd_sse2(const int16_t *input, int16_t *output, int stride); |
-#define vp9_fdct32x32_rd vp9_fdct32x32_rd_sse2 |
+void vp9_fdct32x32_rd_avx2(const int16_t *input, int16_t *output, int stride); |
+RTCD_EXTERN void (*vp9_fdct32x32_rd)(const int16_t *input, int16_t *output, int stride); |
void vp9_fdct4x4_c(const int16_t *input, int16_t *output, int stride); |
void vp9_fdct4x4_sse2(const int16_t *input, int16_t *output, int stride); |
-#define vp9_fdct4x4 vp9_fdct4x4_sse2 |
+void vp9_fdct4x4_avx2(const int16_t *input, int16_t *output, int stride); |
+RTCD_EXTERN void (*vp9_fdct4x4)(const int16_t *input, int16_t *output, int stride); |
void vp9_fdct8x8_c(const int16_t *input, int16_t *output, int stride); |
void vp9_fdct8x8_sse2(const int16_t *input, int16_t *output, int stride); |
-#define vp9_fdct8x8 vp9_fdct8x8_sse2 |
+void vp9_fdct8x8_avx2(const int16_t *input, int16_t *output, int stride); |
+RTCD_EXTERN void (*vp9_fdct8x8)(const int16_t *input, int16_t *output, int stride); |
void vp9_fht16x16_c(const int16_t *input, int16_t *output, int stride, int tx_type); |
void vp9_fht16x16_sse2(const int16_t *input, int16_t *output, int stride, int tx_type); |
-#define vp9_fht16x16 vp9_fht16x16_sse2 |
+void vp9_fht16x16_avx2(const int16_t *input, int16_t *output, int stride, int tx_type); |
+RTCD_EXTERN void (*vp9_fht16x16)(const int16_t *input, int16_t *output, int stride, int tx_type); |
void vp9_fht4x4_c(const int16_t *input, int16_t *output, int stride, int tx_type); |
void vp9_fht4x4_sse2(const int16_t *input, int16_t *output, int stride, int tx_type); |
-#define vp9_fht4x4 vp9_fht4x4_sse2 |
+void vp9_fht4x4_avx2(const int16_t *input, int16_t *output, int stride, int tx_type); |
+RTCD_EXTERN void (*vp9_fht4x4)(const int16_t *input, int16_t *output, int stride, int tx_type); |
void vp9_fht8x8_c(const int16_t *input, int16_t *output, int stride, int tx_type); |
void vp9_fht8x8_sse2(const int16_t *input, int16_t *output, int stride, int tx_type); |
-#define vp9_fht8x8 vp9_fht8x8_sse2 |
+void vp9_fht8x8_avx2(const int16_t *input, int16_t *output, int stride, int tx_type); |
+RTCD_EXTERN void (*vp9_fht8x8)(const int16_t *input, int16_t *output, int stride, int tx_type); |
int vp9_full_range_search_c(const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv); |
#define vp9_full_range_search vp9_full_range_search_c |
@@ -355,7 +366,8 @@ |
void vp9_lpf_horizontal_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count); |
void vp9_lpf_horizontal_16_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count); |
-#define vp9_lpf_horizontal_16 vp9_lpf_horizontal_16_sse2 |
+void vp9_lpf_horizontal_16_avx2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count); |
+RTCD_EXTERN void (*vp9_lpf_horizontal_16)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count); |
void vp9_lpf_horizontal_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count); |
void vp9_lpf_horizontal_4_mmx(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count); |
@@ -400,7 +412,8 @@ |
unsigned int vp9_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); |
unsigned int vp9_mse16x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); |
unsigned int vp9_mse16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); |
-#define vp9_mse16x16 vp9_mse16x16_sse2 |
+unsigned int vp9_mse16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); |
+RTCD_EXTERN unsigned int (*vp9_mse16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); |
unsigned int vp9_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); |
#define vp9_mse16x8 vp9_mse16x8_c |
@@ -502,7 +515,8 @@ |
void vp9_sad32x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); |
void vp9_sad32x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); |
-#define vp9_sad32x32x4d vp9_sad32x32x4d_sse2 |
+void vp9_sad32x32x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); |
+RTCD_EXTERN void (*vp9_sad32x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); |
void vp9_sad32x32x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); |
#define vp9_sad32x32x8 vp9_sad32x32x8_c |
@@ -579,7 +593,8 @@ |
void vp9_sad64x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); |
void vp9_sad64x64x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); |
-#define vp9_sad64x64x4d vp9_sad64x64x4d_sse2 |
+void vp9_sad64x64x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); |
+RTCD_EXTERN void (*vp9_sad64x64x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array); |
void vp9_sad64x64x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); |
#define vp9_sad64x64x8 vp9_sad64x64x8_c |
@@ -662,6 +677,7 @@ |
unsigned int vp9_sub_pixel_avg_variance32x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); |
unsigned int vp9_sub_pixel_avg_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); |
unsigned int vp9_sub_pixel_avg_variance32x32_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); |
+unsigned int vp9_sub_pixel_avg_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); |
RTCD_EXTERN unsigned int (*vp9_sub_pixel_avg_variance32x32)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); |
unsigned int vp9_sub_pixel_avg_variance32x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); |
@@ -687,6 +703,7 @@ |
unsigned int vp9_sub_pixel_avg_variance64x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); |
unsigned int vp9_sub_pixel_avg_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); |
unsigned int vp9_sub_pixel_avg_variance64x64_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); |
+unsigned int vp9_sub_pixel_avg_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); |
RTCD_EXTERN unsigned int (*vp9_sub_pixel_avg_variance64x64)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); |
unsigned int vp9_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); |
@@ -733,6 +750,7 @@ |
unsigned int vp9_sub_pixel_variance32x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); |
unsigned int vp9_sub_pixel_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); |
unsigned int vp9_sub_pixel_variance32x32_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); |
+unsigned int vp9_sub_pixel_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); |
RTCD_EXTERN unsigned int (*vp9_sub_pixel_variance32x32)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); |
unsigned int vp9_sub_pixel_variance32x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); |
@@ -758,6 +776,7 @@ |
unsigned int vp9_sub_pixel_variance64x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); |
unsigned int vp9_sub_pixel_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); |
unsigned int vp9_sub_pixel_variance64x64_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); |
+unsigned int vp9_sub_pixel_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); |
RTCD_EXTERN unsigned int (*vp9_sub_pixel_variance64x64)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); |
unsigned int vp9_sub_pixel_variance8x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); |
@@ -818,7 +837,8 @@ |
unsigned int vp9_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); |
unsigned int vp9_variance16x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); |
unsigned int vp9_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); |
-#define vp9_variance16x16 vp9_variance16x16_sse2 |
+unsigned int vp9_variance16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); |
+RTCD_EXTERN unsigned int (*vp9_variance16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); |
unsigned int vp9_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); |
unsigned int vp9_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); |
@@ -831,11 +851,13 @@ |
unsigned int vp9_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); |
unsigned int vp9_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); |
-#define vp9_variance32x16 vp9_variance32x16_sse2 |
+unsigned int vp9_variance32x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); |
+RTCD_EXTERN unsigned int (*vp9_variance32x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); |
unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); |
unsigned int vp9_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); |
-#define vp9_variance32x32 vp9_variance32x32_sse2 |
+unsigned int vp9_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); |
+RTCD_EXTERN unsigned int (*vp9_variance32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); |
unsigned int vp9_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); |
unsigned int vp9_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); |
@@ -852,11 +874,13 @@ |
unsigned int vp9_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); |
unsigned int vp9_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); |
-#define vp9_variance64x32 vp9_variance64x32_sse2 |
+unsigned int vp9_variance64x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); |
+RTCD_EXTERN unsigned int (*vp9_variance64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); |
unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); |
unsigned int vp9_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); |
-#define vp9_variance64x64 vp9_variance64x64_sse2 |
+unsigned int vp9_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); |
+RTCD_EXTERN unsigned int (*vp9_variance64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); |
unsigned int vp9_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); |
unsigned int vp9_variance8x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); |
@@ -914,6 +938,7 @@ |
vp9_convolve8 = vp9_convolve8_sse2; |
if (flags & HAS_SSSE3) vp9_convolve8 = vp9_convolve8_ssse3; |
+ if (flags & HAS_AVX2) vp9_convolve8 = vp9_convolve8_avx2; |
vp9_convolve8_avg = vp9_convolve8_avg_sse2; |
if (flags & HAS_SSSE3) vp9_convolve8_avg = vp9_convolve8_avg_ssse3; |
vp9_convolve8_avg_horiz = vp9_convolve8_avg_horiz_sse2; |
@@ -922,8 +947,10 @@ |
if (flags & HAS_SSSE3) vp9_convolve8_avg_vert = vp9_convolve8_avg_vert_ssse3; |
vp9_convolve8_horiz = vp9_convolve8_horiz_sse2; |
if (flags & HAS_SSSE3) vp9_convolve8_horiz = vp9_convolve8_horiz_ssse3; |
+ if (flags & HAS_AVX2) vp9_convolve8_horiz = vp9_convolve8_horiz_avx2; |
vp9_convolve8_vert = vp9_convolve8_vert_sse2; |
if (flags & HAS_SSSE3) vp9_convolve8_vert = vp9_convolve8_vert_ssse3; |
+ if (flags & HAS_AVX2) vp9_convolve8_vert = vp9_convolve8_vert_avx2; |
vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_c; |
if (flags & HAS_SSSE3) vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_ssse3; |
vp9_d153_predictor_4x4 = vp9_d153_predictor_4x4_c; |
@@ -956,6 +983,22 @@ |
if (flags & HAS_SSSE3) vp9_d63_predictor_8x8 = vp9_d63_predictor_8x8_ssse3; |
vp9_diamond_search_sad = vp9_diamond_search_sad_c; |
if (flags & HAS_SSE3) vp9_diamond_search_sad = vp9_diamond_search_sadx4; |
+ vp9_fdct16x16 = vp9_fdct16x16_sse2; |
+ if (flags & HAS_AVX2) vp9_fdct16x16 = vp9_fdct16x16_avx2; |
+ vp9_fdct32x32 = vp9_fdct32x32_sse2; |
+ if (flags & HAS_AVX2) vp9_fdct32x32 = vp9_fdct32x32_avx2; |
+ vp9_fdct32x32_rd = vp9_fdct32x32_rd_sse2; |
+ if (flags & HAS_AVX2) vp9_fdct32x32_rd = vp9_fdct32x32_rd_avx2; |
+ vp9_fdct4x4 = vp9_fdct4x4_sse2; |
+ if (flags & HAS_AVX2) vp9_fdct4x4 = vp9_fdct4x4_avx2; |
+ vp9_fdct8x8 = vp9_fdct8x8_sse2; |
+ if (flags & HAS_AVX2) vp9_fdct8x8 = vp9_fdct8x8_avx2; |
+ vp9_fht16x16 = vp9_fht16x16_sse2; |
+ if (flags & HAS_AVX2) vp9_fht16x16 = vp9_fht16x16_avx2; |
+ vp9_fht4x4 = vp9_fht4x4_sse2; |
+ if (flags & HAS_AVX2) vp9_fht4x4 = vp9_fht4x4_avx2; |
+ vp9_fht8x8 = vp9_fht8x8_sse2; |
+ if (flags & HAS_AVX2) vp9_fht8x8 = vp9_fht8x8_avx2; |
vp9_full_search_sad = vp9_full_search_sad_c; |
if (flags & HAS_SSE3) vp9_full_search_sad = vp9_full_search_sadx3; |
if (flags & HAS_SSE4_1) vp9_full_search_sad = vp9_full_search_sadx8; |
@@ -967,6 +1010,10 @@ |
if (flags & HAS_SSSE3) vp9_h_predictor_4x4 = vp9_h_predictor_4x4_ssse3; |
vp9_h_predictor_8x8 = vp9_h_predictor_8x8_c; |
if (flags & HAS_SSSE3) vp9_h_predictor_8x8 = vp9_h_predictor_8x8_ssse3; |
+ vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_sse2; |
+ if (flags & HAS_AVX2) vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_avx2; |
+ vp9_mse16x16 = vp9_mse16x16_sse2; |
+ if (flags & HAS_AVX2) vp9_mse16x16 = vp9_mse16x16_avx2; |
vp9_quantize_b = vp9_quantize_b_c; |
if (flags & HAS_SSSE3) vp9_quantize_b = vp9_quantize_b_ssse3; |
vp9_quantize_b_32x32 = vp9_quantize_b_32x32_c; |
@@ -979,8 +1026,12 @@ |
vp9_sad16x8x3 = vp9_sad16x8x3_c; |
if (flags & HAS_SSE3) vp9_sad16x8x3 = vp9_sad16x8x3_sse3; |
if (flags & HAS_SSSE3) vp9_sad16x8x3 = vp9_sad16x8x3_ssse3; |
+ vp9_sad32x32x4d = vp9_sad32x32x4d_sse2; |
+ if (flags & HAS_AVX2) vp9_sad32x32x4d = vp9_sad32x32x4d_avx2; |
vp9_sad4x4x3 = vp9_sad4x4x3_c; |
if (flags & HAS_SSE3) vp9_sad4x4x3 = vp9_sad4x4x3_sse3; |
+ vp9_sad64x64x4d = vp9_sad64x64x4d_sse2; |
+ if (flags & HAS_AVX2) vp9_sad64x64x4d = vp9_sad64x64x4d_avx2; |
vp9_sad8x16x3 = vp9_sad8x16x3_c; |
if (flags & HAS_SSE3) vp9_sad8x16x3 = vp9_sad8x16x3_sse3; |
vp9_sad8x8x3 = vp9_sad8x8x3_c; |
@@ -995,6 +1046,7 @@ |
if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance32x16 = vp9_sub_pixel_avg_variance32x16_ssse3; |
vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_sse2; |
if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_ssse3; |
+ if (flags & HAS_AVX2) vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_avx2; |
vp9_sub_pixel_avg_variance32x64 = vp9_sub_pixel_avg_variance32x64_sse2; |
if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance32x64 = vp9_sub_pixel_avg_variance32x64_ssse3; |
vp9_sub_pixel_avg_variance4x4 = vp9_sub_pixel_avg_variance4x4_sse; |
@@ -1005,6 +1057,7 @@ |
if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance64x32 = vp9_sub_pixel_avg_variance64x32_ssse3; |
vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_sse2; |
if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_ssse3; |
+ if (flags & HAS_AVX2) vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_avx2; |
vp9_sub_pixel_avg_variance8x16 = vp9_sub_pixel_avg_variance8x16_sse2; |
if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance8x16 = vp9_sub_pixel_avg_variance8x16_ssse3; |
vp9_sub_pixel_avg_variance8x4 = vp9_sub_pixel_avg_variance8x4_sse2; |
@@ -1021,6 +1074,7 @@ |
if (flags & HAS_SSSE3) vp9_sub_pixel_variance32x16 = vp9_sub_pixel_variance32x16_ssse3; |
vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_sse2; |
if (flags & HAS_SSSE3) vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_ssse3; |
+ if (flags & HAS_AVX2) vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_avx2; |
vp9_sub_pixel_variance32x64 = vp9_sub_pixel_variance32x64_sse2; |
if (flags & HAS_SSSE3) vp9_sub_pixel_variance32x64 = vp9_sub_pixel_variance32x64_ssse3; |
vp9_sub_pixel_variance4x4 = vp9_sub_pixel_variance4x4_sse; |
@@ -1031,12 +1085,23 @@ |
if (flags & HAS_SSSE3) vp9_sub_pixel_variance64x32 = vp9_sub_pixel_variance64x32_ssse3; |
vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_sse2; |
if (flags & HAS_SSSE3) vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_ssse3; |
+ if (flags & HAS_AVX2) vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_avx2; |
vp9_sub_pixel_variance8x16 = vp9_sub_pixel_variance8x16_sse2; |
if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x16 = vp9_sub_pixel_variance8x16_ssse3; |
vp9_sub_pixel_variance8x4 = vp9_sub_pixel_variance8x4_sse2; |
if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x4 = vp9_sub_pixel_variance8x4_ssse3; |
vp9_sub_pixel_variance8x8 = vp9_sub_pixel_variance8x8_sse2; |
if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x8 = vp9_sub_pixel_variance8x8_ssse3; |
+ vp9_variance16x16 = vp9_variance16x16_sse2; |
+ if (flags & HAS_AVX2) vp9_variance16x16 = vp9_variance16x16_avx2; |
+ vp9_variance32x16 = vp9_variance32x16_sse2; |
+ if (flags & HAS_AVX2) vp9_variance32x16 = vp9_variance32x16_avx2; |
+ vp9_variance32x32 = vp9_variance32x32_sse2; |
+ if (flags & HAS_AVX2) vp9_variance32x32 = vp9_variance32x32_avx2; |
+ vp9_variance64x32 = vp9_variance64x32_sse2; |
+ if (flags & HAS_AVX2) vp9_variance64x32 = vp9_variance64x32_avx2; |
+ vp9_variance64x64 = vp9_variance64x64_sse2; |
+ if (flags & HAS_AVX2) vp9_variance64x64 = vp9_variance64x64_avx2; |
} |
#endif |