| Index: source/libvpx/vpx_dsp/vpx_dsp_rtcd_defs.pl
|
| diff --git a/source/libvpx/vpx_dsp/vpx_dsp_rtcd_defs.pl b/source/libvpx/vpx_dsp/vpx_dsp_rtcd_defs.pl
|
| index 128dd8ba8ad46a7655e9614391dc0f878d0e470a..2901cd616680db26c3df4f2e3879fbe48ba0a359 100644
|
| --- a/source/libvpx/vpx_dsp/vpx_dsp_rtcd_defs.pl
|
| +++ b/source/libvpx/vpx_dsp/vpx_dsp_rtcd_defs.pl
|
| @@ -11,6 +11,12 @@ EOF
|
| }
|
| forward_decls qw/vpx_dsp_forward_decls/;
|
|
|
| +
|
| +$avx2 = '';
|
| +if (vpx_config("HAVE_AVX2") eq "yes") {
|
| + $avx2 = 'avx2';
|
| +}
|
| +
|
| # x86inc.asm had specific constraints. break it out so it's easy to disable.
|
| # zero all the variables to avoid tricky else conditions.
|
| $mmx_x86inc = $sse_x86inc = $sse2_x86inc = $ssse3_x86inc = $avx_x86inc =
|
| @@ -23,14 +29,14 @@ if (vpx_config("CONFIG_USE_X86INC") eq "yes") {
|
| $sse2_x86inc = 'sse2';
|
| $ssse3_x86inc = 'ssse3';
|
| $avx_x86inc = 'avx';
|
| - $avx2_x86inc = 'avx2';
|
| + $avx2_x86inc = "$avx2";
|
| if ($opts{arch} eq "x86_64") {
|
| $mmx_x86_64_x86inc = 'mmx';
|
| $sse_x86_64_x86inc = 'sse';
|
| $sse2_x86_64_x86inc = 'sse2';
|
| $ssse3_x86_64_x86inc = 'ssse3';
|
| $avx_x86_64_x86inc = 'avx';
|
| - $avx2_x86_64_x86inc = 'avx2';
|
| + $avx2_x86_64_x86inc = "$avx2";
|
| }
|
| }
|
|
|
| @@ -47,7 +53,7 @@ if ($opts{arch} eq "x86_64") {
|
| $sse2_x86_64 = 'sse2';
|
| $ssse3_x86_64 = 'ssse3';
|
| $avx_x86_64 = 'avx';
|
| - $avx2_x86_64 = 'avx2';
|
| + $avx2_x86_64 = "$avx2";
|
| }
|
|
|
| #
|
| @@ -470,7 +476,7 @@ add_proto qw/void vpx_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_
|
| specialize qw/vpx_lpf_vertical_4_dual sse2 neon dspr2 msa/;
|
|
|
| add_proto qw/void vpx_lpf_horizontal_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
|
| -specialize qw/vpx_lpf_horizontal_16 sse2 avx2 neon_asm dspr2 msa/;
|
| +specialize qw/vpx_lpf_horizontal_16 sse2 neon_asm dspr2 msa/, "$avx2";
|
| $vpx_lpf_horizontal_16_neon_asm=vpx_lpf_horizontal_16_neon;
|
|
|
| add_proto qw/void vpx_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
|
| @@ -600,10 +606,10 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
| specialize qw/vpx_fdct16x16_1 sse2 msa/;
|
|
|
| add_proto qw/void vpx_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
|
| - specialize qw/vpx_fdct32x32 sse2 avx2 msa/;
|
| + specialize qw/vpx_fdct32x32 sse2 msa/, "$avx2";
|
|
|
| add_proto qw/void vpx_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
|
| - specialize qw/vpx_fdct32x32_rd sse2 avx2 msa/;
|
| + specialize qw/vpx_fdct32x32_rd sse2 msa/, "$avx2";
|
|
|
| add_proto qw/void vpx_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
|
| specialize qw/vpx_fdct32x32_1 sse2 msa/;
|
| @@ -833,19 +839,19 @@ specialize qw/vpx_subtract_block neon msa/, "$sse2_x86inc";
|
| # Single block SAD
|
| #
|
| add_proto qw/unsigned int vpx_sad64x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
|
| -specialize qw/vpx_sad64x64 avx2 neon msa/, "$sse2_x86inc";
|
| +specialize qw/vpx_sad64x64 neon msa/, "$sse2_x86inc", "$avx2";
|
|
|
| add_proto qw/unsigned int vpx_sad64x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
|
| -specialize qw/vpx_sad64x32 avx2 msa/, "$sse2_x86inc";
|
| +specialize qw/vpx_sad64x32 msa/, "$sse2_x86inc", "$avx2";
|
|
|
| add_proto qw/unsigned int vpx_sad32x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
|
| -specialize qw/vpx_sad32x64 avx2 msa/, "$sse2_x86inc";
|
| +specialize qw/vpx_sad32x64 msa/, "$sse2_x86inc", "$avx2";
|
|
|
| add_proto qw/unsigned int vpx_sad32x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
|
| -specialize qw/vpx_sad32x32 avx2 neon msa/, "$sse2_x86inc";
|
| +specialize qw/vpx_sad32x32 neon msa/, "$sse2_x86inc", "$avx2";
|
|
|
| add_proto qw/unsigned int vpx_sad32x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
|
| -specialize qw/vpx_sad32x16 avx2 msa/, "$sse2_x86inc";
|
| +specialize qw/vpx_sad32x16 msa/, "$sse2_x86inc", "$avx2";
|
|
|
| add_proto qw/unsigned int vpx_sad16x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
|
| specialize qw/vpx_sad16x32 msa/, "$sse2_x86inc";
|
| @@ -875,19 +881,19 @@ specialize qw/vpx_sad4x4 mmx neon msa/, "$sse_x86inc";
|
| # Avg
|
| #
|
| add_proto qw/unsigned int vpx_sad64x64_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
|
| -specialize qw/vpx_sad64x64_avg avx2 msa/, "$sse2_x86inc";
|
| +specialize qw/vpx_sad64x64_avg msa/, "$sse2_x86inc", "$avx2";
|
|
|
| add_proto qw/unsigned int vpx_sad64x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
|
| -specialize qw/vpx_sad64x32_avg avx2 msa/, "$sse2_x86inc";
|
| +specialize qw/vpx_sad64x32_avg msa/, "$sse2_x86inc", "$avx2";
|
|
|
| add_proto qw/unsigned int vpx_sad32x64_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
|
| -specialize qw/vpx_sad32x64_avg avx2 msa/, "$sse2_x86inc";
|
| +specialize qw/vpx_sad32x64_avg msa/, "$sse2_x86inc", "$avx2";
|
|
|
| add_proto qw/unsigned int vpx_sad32x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
|
| -specialize qw/vpx_sad32x32_avg avx2 msa/, "$sse2_x86inc";
|
| +specialize qw/vpx_sad32x32_avg msa/, "$sse2_x86inc", "$avx2";
|
|
|
| add_proto qw/unsigned int vpx_sad32x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
|
| -specialize qw/vpx_sad32x16_avg avx2 msa/, "$sse2_x86inc";
|
| +specialize qw/vpx_sad32x16_avg msa/, "$sse2_x86inc", "$avx2";
|
|
|
| add_proto qw/unsigned int vpx_sad16x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
|
| specialize qw/vpx_sad16x32_avg msa/, "$sse2_x86inc";
|
| @@ -970,7 +976,7 @@ specialize qw/vpx_sad4x4x8 sse4_1 msa/;
|
| # Multi-block SAD, comparing a reference to N independent blocks
|
| #
|
| add_proto qw/void vpx_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
|
| -specialize qw/vpx_sad64x64x4d avx2 neon msa/, "$sse2_x86inc";
|
| +specialize qw/vpx_sad64x64x4d neon msa/, "$sse2_x86inc", "$avx2";
|
|
|
| add_proto qw/void vpx_sad64x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
|
| specialize qw/vpx_sad64x32x4d msa/, "$sse2_x86inc";
|
| @@ -979,7 +985,7 @@ add_proto qw/void vpx_sad32x64x4d/, "const uint8_t *src_ptr, int src_stride, con
|
| specialize qw/vpx_sad32x64x4d msa/, "$sse2_x86inc";
|
|
|
| add_proto qw/void vpx_sad32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
|
| -specialize qw/vpx_sad32x32x4d avx2 neon msa/, "$sse2_x86inc";
|
| +specialize qw/vpx_sad32x32x4d neon msa/, "$sse2_x86inc", "$avx2";
|
|
|
| add_proto qw/void vpx_sad32x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
|
| specialize qw/vpx_sad32x16x4d msa/, "$sse2_x86inc";
|
| @@ -1221,25 +1227,25 @@ if (vpx_config("CONFIG_ENCODERS") eq "yes" || vpx_config("CONFIG_POSTPROC") eq "
|
| # Variance
|
| #
|
| add_proto qw/unsigned int vpx_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
|
| - specialize qw/vpx_variance64x64 sse2 avx2 neon msa/;
|
| + specialize qw/vpx_variance64x64 sse2 neon msa/, "$avx2";
|
|
|
| add_proto qw/unsigned int vpx_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
|
| - specialize qw/vpx_variance64x32 sse2 avx2 neon msa/;
|
| + specialize qw/vpx_variance64x32 sse2 neon msa/, "$avx2";
|
|
|
| add_proto qw/unsigned int vpx_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
|
| specialize qw/vpx_variance32x64 sse2 neon msa/;
|
|
|
| add_proto qw/unsigned int vpx_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
|
| - specialize qw/vpx_variance32x32 sse2 avx2 neon msa/;
|
| + specialize qw/vpx_variance32x32 sse2 neon msa/, "$avx2";
|
|
|
| add_proto qw/unsigned int vpx_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
|
| - specialize qw/vpx_variance32x16 sse2 avx2 msa/;
|
| + specialize qw/vpx_variance32x16 sse2 msa/, "$avx2";
|
|
|
| add_proto qw/unsigned int vpx_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
|
| specialize qw/vpx_variance16x32 sse2 msa/;
|
|
|
| add_proto qw/unsigned int vpx_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
|
| - specialize qw/vpx_variance16x16 mmx sse2 avx2 media neon msa/;
|
| + specialize qw/vpx_variance16x16 mmx sse2 media neon msa/, "$avx2";
|
|
|
| add_proto qw/unsigned int vpx_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
|
| specialize qw/vpx_variance16x8 mmx sse2 neon msa/;
|
| @@ -1263,13 +1269,13 @@ add_proto qw/unsigned int vpx_variance4x4/, "const uint8_t *src_ptr, int source_
|
| # Specialty Variance
|
| #
|
| add_proto qw/void vpx_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
|
| - specialize qw/vpx_get16x16var sse2 avx2 neon msa/;
|
| + specialize qw/vpx_get16x16var sse2 neon msa/, "$avx2";
|
|
|
| add_proto qw/void vpx_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
|
| specialize qw/vpx_get8x8var mmx sse2 neon msa/;
|
|
|
| add_proto qw/unsigned int vpx_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
|
| - specialize qw/vpx_mse16x16 mmx sse2 avx2 media neon msa/;
|
| + specialize qw/vpx_mse16x16 mmx sse2 media neon msa/, "$avx2";
|
|
|
| add_proto qw/unsigned int vpx_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
|
| specialize qw/vpx_mse16x8 sse2 msa/;
|
| @@ -1292,7 +1298,7 @@ add_proto qw/void vpx_comp_avg_pred/, "uint8_t *comp_pred, const uint8_t *pred,
|
| # Subpixel Variance
|
| #
|
| add_proto qw/uint32_t vpx_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
|
| - specialize qw/vpx_sub_pixel_variance64x64 avx2 neon msa/, "$sse2_x86inc", "$ssse3_x86inc";
|
| + specialize qw/vpx_sub_pixel_variance64x64 neon msa/, "$sse2_x86inc", "$ssse3_x86inc", "$avx2";
|
|
|
| add_proto qw/uint32_t vpx_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
|
| specialize qw/vpx_sub_pixel_variance64x32 msa/, "$sse2_x86inc", "$ssse3_x86inc";
|
| @@ -1301,7 +1307,7 @@ add_proto qw/uint32_t vpx_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int
|
| specialize qw/vpx_sub_pixel_variance32x64 msa/, "$sse2_x86inc", "$ssse3_x86inc";
|
|
|
| add_proto qw/uint32_t vpx_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
|
| - specialize qw/vpx_sub_pixel_variance32x32 avx2 neon msa/, "$sse2_x86inc", "$ssse3_x86inc";
|
| + specialize qw/vpx_sub_pixel_variance32x32 neon msa/, "$sse2_x86inc", "$ssse3_x86inc", "$avx2";
|
|
|
| add_proto qw/uint32_t vpx_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
|
| specialize qw/vpx_sub_pixel_variance32x16 msa/, "$sse2_x86inc", "$ssse3_x86inc";
|
| @@ -1331,7 +1337,7 @@ add_proto qw/uint32_t vpx_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int s
|
| specialize qw/vpx_sub_pixel_variance4x4 mmx msa/, "$sse_x86inc", "$ssse3_x86inc";
|
|
|
| add_proto qw/uint32_t vpx_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
|
| - specialize qw/vpx_sub_pixel_avg_variance64x64 avx2 msa/, "$sse2_x86inc", "$ssse3_x86inc";
|
| + specialize qw/vpx_sub_pixel_avg_variance64x64 msa/, "$sse2_x86inc", "$ssse3_x86inc", "$avx2";
|
|
|
| add_proto qw/uint32_t vpx_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
|
| specialize qw/vpx_sub_pixel_avg_variance64x32 msa/, "$sse2_x86inc", "$ssse3_x86inc";
|
| @@ -1340,7 +1346,7 @@ add_proto qw/uint32_t vpx_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr,
|
| specialize qw/vpx_sub_pixel_avg_variance32x64 msa/, "$sse2_x86inc", "$ssse3_x86inc";
|
|
|
| add_proto qw/uint32_t vpx_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
|
| - specialize qw/vpx_sub_pixel_avg_variance32x32 avx2 msa/, "$sse2_x86inc", "$ssse3_x86inc";
|
| + specialize qw/vpx_sub_pixel_avg_variance32x32 msa/, "$sse2_x86inc", "$ssse3_x86inc", "$avx2";
|
|
|
| add_proto qw/uint32_t vpx_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
|
| specialize qw/vpx_sub_pixel_avg_variance32x16 msa/, "$sse2_x86inc", "$ssse3_x86inc";
|
|
|