| Index: source/libvpx/vp9/common/vp9_rtcd_defs.pl
 | 
| ===================================================================
 | 
| --- source/libvpx/vp9/common/vp9_rtcd_defs.pl	(revision 271012)
 | 
| +++ source/libvpx/vp9/common/vp9_rtcd_defs.pl	(working copy)
 | 
| @@ -58,7 +58,8 @@
 | 
|  specialize qw/vp9_d63_predictor_4x4/, "$ssse3_x86inc";
 | 
|  
 | 
|  add_proto qw/void vp9_h_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 | 
| -specialize qw/vp9_h_predictor_4x4 neon dspr2/, "$ssse3_x86inc";
 | 
| +specialize qw/vp9_h_predictor_4x4 neon_asm dspr2/, "$ssse3_x86inc";
 | 
| +$vp9_h_predictor_4x4_neon_asm=vp9_h_predictor_4x4_neon;
 | 
|  
 | 
|  add_proto qw/void vp9_d117_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 | 
|  specialize qw/vp9_d117_predictor_4x4/;
 | 
| @@ -70,10 +71,12 @@
 | 
|  specialize qw/vp9_d153_predictor_4x4/, "$ssse3_x86inc";
 | 
|  
 | 
|  add_proto qw/void vp9_v_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 | 
| -specialize qw/vp9_v_predictor_4x4 neon/, "$sse_x86inc";
 | 
| +specialize qw/vp9_v_predictor_4x4 neon_asm/, "$sse_x86inc";
 | 
| +$vp9_v_predictor_4x4_neon_asm=vp9_v_predictor_4x4_neon;
 | 
|  
 | 
|  add_proto qw/void vp9_tm_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 | 
| -specialize qw/vp9_tm_predictor_4x4 neon dspr2/, "$sse_x86inc";
 | 
| +specialize qw/vp9_tm_predictor_4x4 neon_asm dspr2/, "$sse_x86inc";
 | 
| +$vp9_tm_predictor_4x4_neon_asm=vp9_tm_predictor_4x4_neon;
 | 
|  
 | 
|  add_proto qw/void vp9_dc_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 | 
|  specialize qw/vp9_dc_predictor_4x4 dspr2/, "$sse_x86inc";
 | 
| @@ -97,7 +100,8 @@
 | 
|  specialize qw/vp9_d63_predictor_8x8/, "$ssse3_x86inc";
 | 
|  
 | 
|  add_proto qw/void vp9_h_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 | 
| -specialize qw/vp9_h_predictor_8x8 neon dspr2/, "$ssse3_x86inc";
 | 
| +specialize qw/vp9_h_predictor_8x8 neon_asm dspr2/, "$ssse3_x86inc";
 | 
| +$vp9_h_predictor_8x8_neon_asm=vp9_h_predictor_8x8_neon;
 | 
|  
 | 
|  add_proto qw/void vp9_d117_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 | 
|  specialize qw/vp9_d117_predictor_8x8/;
 | 
| @@ -109,10 +113,12 @@
 | 
|  specialize qw/vp9_d153_predictor_8x8/, "$ssse3_x86inc";
 | 
|  
 | 
|  add_proto qw/void vp9_v_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 | 
| -specialize qw/vp9_v_predictor_8x8 neon/, "$sse_x86inc";
 | 
| +specialize qw/vp9_v_predictor_8x8 neon_asm/, "$sse_x86inc";
 | 
| +$vp9_v_predictor_8x8_neon_asm=vp9_v_predictor_8x8_neon;
 | 
|  
 | 
|  add_proto qw/void vp9_tm_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 | 
| -specialize qw/vp9_tm_predictor_8x8 neon dspr2/, "$sse2_x86inc";
 | 
| +specialize qw/vp9_tm_predictor_8x8 neon_asm dspr2/, "$sse2_x86inc";
 | 
| +$vp9_tm_predictor_8x8_neon_asm=vp9_tm_predictor_8x8_neon;
 | 
|  
 | 
|  add_proto qw/void vp9_dc_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 | 
|  specialize qw/vp9_dc_predictor_8x8 dspr2/, "$sse_x86inc";
 | 
| @@ -136,7 +142,8 @@
 | 
|  specialize qw/vp9_d63_predictor_16x16/, "$ssse3_x86inc";
 | 
|  
 | 
|  add_proto qw/void vp9_h_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 | 
| -specialize qw/vp9_h_predictor_16x16 neon dspr2/, "$ssse3_x86inc";
 | 
| +specialize qw/vp9_h_predictor_16x16 neon_asm dspr2/, "$ssse3_x86inc";
 | 
| +$vp9_h_predictor_16x16_neon_asm=vp9_h_predictor_16x16_neon;
 | 
|  
 | 
|  add_proto qw/void vp9_d117_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 | 
|  specialize qw/vp9_d117_predictor_16x16/;
 | 
| @@ -148,10 +155,12 @@
 | 
|  specialize qw/vp9_d153_predictor_16x16/, "$ssse3_x86inc";
 | 
|  
 | 
|  add_proto qw/void vp9_v_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 | 
| -specialize qw/vp9_v_predictor_16x16 neon/, "$sse2_x86inc";
 | 
| +specialize qw/vp9_v_predictor_16x16 neon_asm/, "$sse2_x86inc";
 | 
| +$vp9_v_predictor_16x16_neon_asm=vp9_v_predictor_16x16_neon;
 | 
|  
 | 
|  add_proto qw/void vp9_tm_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 | 
| -specialize qw/vp9_tm_predictor_16x16 neon/, "$sse2_x86inc";
 | 
| +specialize qw/vp9_tm_predictor_16x16 neon_asm/, "$sse2_x86inc";
 | 
| +$vp9_tm_predictor_16x16_neon_asm=vp9_tm_predictor_16x16_neon;
 | 
|  
 | 
|  add_proto qw/void vp9_dc_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 | 
|  specialize qw/vp9_dc_predictor_16x16 dspr2/, "$sse2_x86inc";
 | 
| @@ -175,7 +184,8 @@
 | 
|  specialize qw/vp9_d63_predictor_32x32/, "$ssse3_x86inc";
 | 
|  
 | 
|  add_proto qw/void vp9_h_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 | 
| -specialize qw/vp9_h_predictor_32x32 neon/, "$ssse3_x86inc";
 | 
| +specialize qw/vp9_h_predictor_32x32 neon_asm/, "$ssse3_x86inc";
 | 
| +$vp9_h_predictor_32x32_neon_asm=vp9_h_predictor_32x32_neon;
 | 
|  
 | 
|  add_proto qw/void vp9_d117_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 | 
|  specialize qw/vp9_d117_predictor_32x32/;
 | 
| @@ -187,10 +197,12 @@
 | 
|  specialize qw/vp9_d153_predictor_32x32/;
 | 
|  
 | 
|  add_proto qw/void vp9_v_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 | 
| -specialize qw/vp9_v_predictor_32x32 neon/, "$sse2_x86inc";
 | 
| +specialize qw/vp9_v_predictor_32x32 neon_asm/, "$sse2_x86inc";
 | 
| +$vp9_v_predictor_32x32_neon_asm=vp9_v_predictor_32x32_neon;
 | 
|  
 | 
|  add_proto qw/void vp9_tm_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 | 
| -specialize qw/vp9_tm_predictor_32x32 neon/, "$sse2_x86_64";
 | 
| +specialize qw/vp9_tm_predictor_32x32 neon_asm/, "$sse2_x86_64";
 | 
| +$vp9_tm_predictor_32x32_neon_asm=vp9_tm_predictor_32x32_neon;
 | 
|  
 | 
|  add_proto qw/void vp9_dc_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 | 
|  specialize qw/vp9_dc_predictor_32x32/, "$sse2_x86inc";
 | 
| @@ -208,37 +220,48 @@
 | 
|  # Loopfilter
 | 
|  #
 | 
|  add_proto qw/void vp9_lpf_vertical_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
 | 
| -specialize qw/vp9_lpf_vertical_16 sse2 neon dspr2/;
 | 
| +specialize qw/vp9_lpf_vertical_16 sse2 neon_asm dspr2/;
 | 
| +$vp9_lpf_vertical_16_neon_asm=vp9_lpf_vertical_16_neon;
 | 
|  
 | 
|  add_proto qw/void vp9_lpf_vertical_16_dual/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
 | 
| -specialize qw/vp9_lpf_vertical_16_dual sse2 neon dspr2/;
 | 
| +specialize qw/vp9_lpf_vertical_16_dual sse2 neon_asm dspr2/;
 | 
| +$vp9_lpf_vertical_16_dual_neon_asm=vp9_lpf_vertical_16_dual_neon;
 | 
|  
 | 
|  add_proto qw/void vp9_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
 | 
| -specialize qw/vp9_lpf_vertical_8 sse2 neon dspr2/;
 | 
| +specialize qw/vp9_lpf_vertical_8 sse2 neon_asm dspr2/;
 | 
| +$vp9_lpf_vertical_8_neon_asm=vp9_lpf_vertical_8_neon;
 | 
|  
 | 
|  add_proto qw/void vp9_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
 | 
| -specialize qw/vp9_lpf_vertical_8_dual sse2 neon dspr2/;
 | 
| +specialize qw/vp9_lpf_vertical_8_dual sse2 neon_asm dspr2/;
 | 
| +$vp9_lpf_vertical_8_dual_neon_asm=vp9_lpf_vertical_8_dual_neon;
 | 
|  
 | 
|  add_proto qw/void vp9_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
 | 
| -specialize qw/vp9_lpf_vertical_4 mmx neon dspr2/;
 | 
| +specialize qw/vp9_lpf_vertical_4 mmx neon_asm dspr2/;
 | 
| +$vp9_lpf_vertical_4_neon_asm=vp9_lpf_vertical_4_neon;
 | 
|  
 | 
|  add_proto qw/void vp9_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
 | 
| -specialize qw/vp9_lpf_vertical_4_dual sse2 neon dspr2/;
 | 
| +specialize qw/vp9_lpf_vertical_4_dual sse2 neon_asm dspr2/;
 | 
| +$vp9_lpf_vertical_4_dual_neon_asm=vp9_lpf_vertical_4_dual_neon;
 | 
|  
 | 
|  add_proto qw/void vp9_lpf_horizontal_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
 | 
| -specialize qw/vp9_lpf_horizontal_16 sse2 avx2 neon dspr2/;
 | 
| +specialize qw/vp9_lpf_horizontal_16 sse2 avx2 neon_asm dspr2/;
 | 
| +$vp9_lpf_horizontal_16_neon_asm=vp9_lpf_horizontal_16_neon;
 | 
|  
 | 
|  add_proto qw/void vp9_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
 | 
| -specialize qw/vp9_lpf_horizontal_8 sse2 neon dspr2/;
 | 
| +specialize qw/vp9_lpf_horizontal_8 sse2 neon_asm dspr2/;
 | 
| +$vp9_lpf_horizontal_8_neon_asm=vp9_lpf_horizontal_8_neon;
 | 
|  
 | 
|  add_proto qw/void vp9_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
 | 
| -specialize qw/vp9_lpf_horizontal_8_dual sse2 neon dspr2/;
 | 
| +specialize qw/vp9_lpf_horizontal_8_dual sse2 neon_asm dspr2/;
 | 
| +$vp9_lpf_horizontal_8_dual_neon_asm=vp9_lpf_horizontal_8_dual_neon;
 | 
|  
 | 
|  add_proto qw/void vp9_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
 | 
| -specialize qw/vp9_lpf_horizontal_4 mmx neon dspr2/;
 | 
| +specialize qw/vp9_lpf_horizontal_4 mmx neon_asm dspr2/;
 | 
| +$vp9_lpf_horizontal_4_neon_asm=vp9_lpf_horizontal_4_neon;
 | 
|  
 | 
|  add_proto qw/void vp9_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
 | 
| -specialize qw/vp9_lpf_horizontal_4_dual sse2 neon dspr2/;
 | 
| +specialize qw/vp9_lpf_horizontal_4_dual sse2 neon_asm dspr2/;
 | 
| +$vp9_lpf_horizontal_4_dual_neon_asm=vp9_lpf_horizontal_4_dual_neon;
 | 
|  
 | 
|  #
 | 
|  # post proc
 | 
| @@ -274,71 +297,91 @@
 | 
|  # Sub Pixel Filters
 | 
|  #
 | 
|  add_proto qw/void vp9_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
 | 
| -specialize qw/vp9_convolve_copy neon dspr2/, "$sse2_x86inc";
 | 
| +specialize qw/vp9_convolve_copy neon_asm dspr2/, "$sse2_x86inc";
 | 
| +$vp9_convolve_copy_neon_asm=vp9_convolve_copy_neon;
 | 
|  
 | 
|  add_proto qw/void vp9_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
 | 
| -specialize qw/vp9_convolve_avg neon dspr2/, "$sse2_x86inc";
 | 
| +specialize qw/vp9_convolve_avg neon_asm dspr2/, "$sse2_x86inc";
 | 
| +$vp9_convolve_avg_neon_asm=vp9_convolve_avg_neon;
 | 
|  
 | 
|  add_proto qw/void vp9_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
 | 
| -specialize qw/vp9_convolve8 sse2 ssse3 avx2 neon dspr2/;
 | 
| +specialize qw/vp9_convolve8 sse2 ssse3 avx2 neon_asm dspr2/;
 | 
| +$vp9_convolve8_neon_asm=vp9_convolve8_neon;
 | 
|  
 | 
|  add_proto qw/void vp9_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
 | 
| -specialize qw/vp9_convolve8_horiz sse2 ssse3 avx2 neon dspr2/;
 | 
| +specialize qw/vp9_convolve8_horiz sse2 ssse3 avx2 neon_asm dspr2/;
 | 
| +$vp9_convolve8_horiz_neon_asm=vp9_convolve8_horiz_neon;
 | 
|  
 | 
|  add_proto qw/void vp9_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
 | 
| -specialize qw/vp9_convolve8_vert sse2 ssse3 avx2 neon dspr2/;
 | 
| +specialize qw/vp9_convolve8_vert sse2 ssse3 avx2 neon_asm dspr2/;
 | 
| +$vp9_convolve8_vert_neon_asm=vp9_convolve8_vert_neon;
 | 
|  
 | 
|  add_proto qw/void vp9_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
 | 
| -specialize qw/vp9_convolve8_avg sse2 ssse3 neon dspr2/;
 | 
| +specialize qw/vp9_convolve8_avg sse2 ssse3 neon_asm dspr2/;
 | 
| +$vp9_convolve8_avg_neon_asm=vp9_convolve8_avg_neon;
 | 
|  
 | 
|  add_proto qw/void vp9_convolve8_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
 | 
| -specialize qw/vp9_convolve8_avg_horiz sse2 ssse3 neon dspr2/;
 | 
| +specialize qw/vp9_convolve8_avg_horiz sse2 ssse3 neon_asm dspr2/;
 | 
| +$vp9_convolve8_avg_horiz_neon_asm=vp9_convolve8_avg_horiz_neon;
 | 
|  
 | 
|  add_proto qw/void vp9_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
 | 
| -specialize qw/vp9_convolve8_avg_vert sse2 ssse3 neon dspr2/;
 | 
| +specialize qw/vp9_convolve8_avg_vert sse2 ssse3 neon_asm dspr2/;
 | 
| +$vp9_convolve8_avg_vert_neon_asm=vp9_convolve8_avg_vert_neon;
 | 
|  
 | 
|  #
 | 
|  # dct
 | 
|  #
 | 
|  add_proto qw/void vp9_idct4x4_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
 | 
| -specialize qw/vp9_idct4x4_1_add sse2 neon dspr2/;
 | 
| +specialize qw/vp9_idct4x4_1_add sse2 neon_asm dspr2/;
 | 
| +$vp9_idct4x4_1_add_neon_asm=vp9_idct4x4_1_add_neon;
 | 
|  
 | 
|  add_proto qw/void vp9_idct4x4_16_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
 | 
| -specialize qw/vp9_idct4x4_16_add sse2 neon dspr2/;
 | 
| +specialize qw/vp9_idct4x4_16_add sse2 neon_asm dspr2/;
 | 
| +$vp9_idct4x4_16_add_neon_asm=vp9_idct4x4_16_add_neon;
 | 
|  
 | 
|  add_proto qw/void vp9_idct8x8_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
 | 
| -specialize qw/vp9_idct8x8_1_add sse2 neon dspr2/;
 | 
| +specialize qw/vp9_idct8x8_1_add sse2 neon_asm dspr2/;
 | 
| +$vp9_idct8x8_1_add_neon_asm=vp9_idct8x8_1_add_neon;
 | 
|  
 | 
|  add_proto qw/void vp9_idct8x8_64_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
 | 
| -specialize qw/vp9_idct8x8_64_add sse2 neon dspr2/, "$ssse3_x86_64";
 | 
| +specialize qw/vp9_idct8x8_64_add sse2 neon_asm dspr2/, "$ssse3_x86_64";
 | 
| +$vp9_idct8x8_64_add_neon_asm=vp9_idct8x8_64_add_neon;
 | 
|  
 | 
| -add_proto qw/void vp9_idct8x8_10_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
 | 
| -specialize qw/vp9_idct8x8_10_add sse2 neon dspr2/;
 | 
| +add_proto qw/void vp9_idct8x8_12_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
 | 
| +specialize qw/vp9_idct8x8_12_add sse2 neon_asm dspr2/, "$ssse3_x86_64";
 | 
| +$vp9_idct8x8_12_add_neon_asm=vp9_idct8x8_12_add_neon;
 | 
|  
 | 
|  add_proto qw/void vp9_idct16x16_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
 | 
| -specialize qw/vp9_idct16x16_1_add sse2 neon dspr2/;
 | 
| +specialize qw/vp9_idct16x16_1_add sse2 neon_asm dspr2/;
 | 
| +$vp9_idct16x16_1_add_neon_asm=vp9_idct16x16_1_add_neon;
 | 
|  
 | 
|  add_proto qw/void vp9_idct16x16_256_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
 | 
| -specialize qw/vp9_idct16x16_256_add sse2 neon dspr2/;
 | 
| +specialize qw/vp9_idct16x16_256_add sse2 neon_asm dspr2/;
 | 
| +$vp9_idct16x16_256_add_neon_asm=vp9_idct16x16_256_add_neon;
 | 
|  
 | 
|  add_proto qw/void vp9_idct16x16_10_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
 | 
| -specialize qw/vp9_idct16x16_10_add sse2 neon dspr2/;
 | 
| +specialize qw/vp9_idct16x16_10_add sse2 neon_asm dspr2/;
 | 
| +$vp9_idct16x16_10_add_neon_asm=vp9_idct16x16_10_add_neon;
 | 
|  
 | 
|  add_proto qw/void vp9_idct32x32_1024_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
 | 
| -specialize qw/vp9_idct32x32_1024_add sse2 neon dspr2/;
 | 
| +specialize qw/vp9_idct32x32_1024_add sse2 neon_asm dspr2/;
 | 
| +$vp9_idct32x32_1024_add_neon_asm=vp9_idct32x32_1024_add_neon;
 | 
|  
 | 
|  add_proto qw/void vp9_idct32x32_34_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
 | 
| -specialize qw/vp9_idct32x32_34_add sse2 neon dspr2/;
 | 
| -$vp9_idct32x32_34_add_neon=vp9_idct32x32_1024_add_neon;
 | 
| +specialize qw/vp9_idct32x32_34_add sse2 neon_asm dspr2/;
 | 
| +$vp9_idct32x32_34_add_neon_asm=vp9_idct32x32_1024_add_neon;
 | 
|  
 | 
|  add_proto qw/void vp9_idct32x32_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
 | 
| -specialize qw/vp9_idct32x32_1_add sse2 neon dspr2/;
 | 
| +specialize qw/vp9_idct32x32_1_add sse2 neon_asm dspr2/;
 | 
| +$vp9_idct32x32_1_add_neon_asm=vp9_idct32x32_1_add_neon;
 | 
|  
 | 
|  add_proto qw/void vp9_iht4x4_16_add/, "const int16_t *input, uint8_t *dest, int dest_stride, int tx_type";
 | 
| -specialize qw/vp9_iht4x4_16_add sse2 neon dspr2/;
 | 
| +specialize qw/vp9_iht4x4_16_add sse2 neon_asm dspr2/;
 | 
| +$vp9_iht4x4_16_add_neon_asm=vp9_iht4x4_16_add_neon;
 | 
|  
 | 
|  add_proto qw/void vp9_iht8x8_64_add/, "const int16_t *input, uint8_t *dest, int dest_stride, int tx_type";
 | 
| -specialize qw/vp9_iht8x8_64_add sse2 neon dspr2/;
 | 
| +specialize qw/vp9_iht8x8_64_add sse2 neon_asm dspr2/;
 | 
| +$vp9_iht8x8_64_add_neon_asm=vp9_iht8x8_64_add_neon;
 | 
|  
 | 
|  add_proto qw/void vp9_iht16x16_256_add/, "const int16_t *input, uint8_t *output, int pitch, int tx_type";
 | 
|  specialize qw/vp9_iht16x16_256_add sse2 dspr2/;
 | 
| @@ -660,7 +703,7 @@
 | 
|  # ENCODEMB INVOKE
 | 
|  
 | 
|  add_proto qw/int64_t vp9_block_error/, "const int16_t *coeff, const int16_t *dqcoeff, intptr_t block_size, int64_t *ssz";
 | 
| -specialize qw/vp9_block_error/, "$sse2_x86inc";
 | 
| +specialize qw/vp9_block_error avx2/, "$sse2_x86inc";
 | 
|  
 | 
|  add_proto qw/void vp9_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride";
 | 
|  specialize qw/vp9_subtract_block/, "$sse2_x86inc";
 | 
| @@ -693,7 +736,7 @@
 | 
|  specialize qw/vp9_fht16x16 sse2 avx2/;
 | 
|  
 | 
|  add_proto qw/void vp9_fwht4x4/, "const int16_t *input, int16_t *output, int stride";
 | 
| -specialize qw/vp9_fwht4x4/;
 | 
| +specialize qw/vp9_fwht4x4/, "$mmx_x86inc";
 | 
|  
 | 
|  add_proto qw/void vp9_fdct4x4/, "const int16_t *input, int16_t *output, int stride";
 | 
|  specialize qw/vp9_fdct4x4 sse2 avx2/;
 | 
| 
 |