| Index: source/libvpx/vp9/common/x86/vp9_asm_stubs.c
|
| ===================================================================
|
| --- source/libvpx/vp9/common/x86/vp9_asm_stubs.c (revision 223100)
|
| +++ source/libvpx/vp9/common/x86/vp9_asm_stubs.c (working copy)
|
| @@ -13,6 +13,7 @@
|
| #include "./vpx_config.h"
|
| #include "./vp9_rtcd.h"
|
| #include "vpx_ports/mem.h"
|
| +
|
| ///////////////////////////////////////////////////////////////////////////
|
| // the mmx function that does the bilinear filtering and var calculation //
|
| // int one pass //
|
| @@ -36,6 +37,8 @@
|
| { 8, 8, 8, 8, 120, 120, 120, 120 }
|
| };
|
|
|
| +
|
| +int num_func_entry = 0;
|
| #if HAVE_SSSE3
|
| void vp9_filter_block1d16_v8_ssse3(const unsigned char *src_ptr,
|
| const unsigned int src_pitch,
|
| @@ -44,6 +47,13 @@
|
| unsigned int output_height,
|
| const short *filter);
|
|
|
| +void vp9_filter_block1d16_v8_intrin_ssse3(const unsigned char *src_ptr,
|
| + const unsigned int src_pitch,
|
| + unsigned char *output_ptr,
|
| + unsigned int out_pitch,
|
| + unsigned int output_height,
|
| + const short *filter);
|
| +
|
| void vp9_filter_block1d16_h8_ssse3(const unsigned char *src_ptr,
|
| const unsigned int src_pitch,
|
| unsigned char *output_ptr,
|
| @@ -51,6 +61,13 @@
|
| unsigned int output_height,
|
| const short *filter);
|
|
|
| +void vp9_filter_block1d16_h8_intrin_ssse3(const unsigned char *src_ptr,
|
| + const unsigned int src_pitch,
|
| + unsigned char *output_ptr,
|
| + unsigned int out_pitch,
|
| + unsigned int output_height,
|
| + const short *filter);
|
| +
|
| void vp9_filter_block1d8_v8_ssse3(const unsigned char *src_ptr,
|
| const unsigned int src_pitch,
|
| unsigned char *output_ptr,
|
| @@ -58,6 +75,13 @@
|
| unsigned int output_height,
|
| const short *filter);
|
|
|
| +void vp9_filter_block1d8_v8_intrin_ssse3(const unsigned char *src_ptr,
|
| + const unsigned int src_pitch,
|
| + unsigned char *output_ptr,
|
| + unsigned int out_pitch,
|
| + unsigned int output_height,
|
| + const short *filter);
|
| +
|
| void vp9_filter_block1d8_h8_ssse3(const unsigned char *src_ptr,
|
| const unsigned int src_pitch,
|
| unsigned char *output_ptr,
|
| @@ -65,6 +89,13 @@
|
| unsigned int output_height,
|
| const short *filter);
|
|
|
| +void vp9_filter_block1d8_h8_intrin_ssse3(const unsigned char *src_ptr,
|
| + const unsigned int src_pitch,
|
| + unsigned char *output_ptr,
|
| + unsigned int out_pitch,
|
| + unsigned int output_height,
|
| + const short *filter);
|
| +
|
| void vp9_filter_block1d4_v8_ssse3(const unsigned char *src_ptr,
|
| const unsigned int src_pitch,
|
| unsigned char *output_ptr,
|
| @@ -72,6 +103,14 @@
|
| unsigned int output_height,
|
| const short *filter);
|
|
|
| +void vp9_filter_block1d4_v8_intrin_ssse3(const unsigned char *src_ptr,
|
| + const unsigned int src_pitch,
|
| + unsigned char *output_ptr,
|
| + unsigned int out_pitch,
|
| + unsigned int output_height,
|
| + const short *filter);
|
| +
|
| +
|
| void vp9_filter_block1d4_h8_ssse3(const unsigned char *src_ptr,
|
| const unsigned int src_pitch,
|
| unsigned char *output_ptr,
|
| @@ -79,6 +118,13 @@
|
| unsigned int output_height,
|
| const short *filter);
|
|
|
| +void vp9_filter_block1d4_h8_intrin_ssse3(const unsigned char *src_ptr,
|
| + const unsigned int src_pitch,
|
| + unsigned char *output_ptr,
|
| + unsigned int out_pitch,
|
| + unsigned int output_height,
|
| + const short *filter);
|
| +
|
| void vp9_filter_block1d16_v8_avg_ssse3(const unsigned char *src_ptr,
|
| const unsigned int src_pitch,
|
| unsigned char *output_ptr,
|
| @@ -129,7 +175,7 @@
|
| /* Ensure the filter can be compressed to int16_t. */
|
| if (x_step_q4 == 16 && filter_x[3] != 128) {
|
| while (w >= 16) {
|
| - vp9_filter_block1d16_h8_ssse3(src, src_stride,
|
| + vp9_filter_block1d16_h8_intrin_ssse3(src, src_stride,
|
| dst, dst_stride,
|
| h, filter_x);
|
| src += 16;
|
| @@ -137,7 +183,7 @@
|
| w -= 16;
|
| }
|
| while (w >= 8) {
|
| - vp9_filter_block1d8_h8_ssse3(src, src_stride,
|
| + vp9_filter_block1d8_h8_intrin_ssse3(src, src_stride,
|
| dst, dst_stride,
|
| h, filter_x);
|
| src += 8;
|
| @@ -145,7 +191,7 @@
|
| w -= 8;
|
| }
|
| while (w >= 4) {
|
| - vp9_filter_block1d4_h8_ssse3(src, src_stride,
|
| + vp9_filter_block1d4_h8_intrin_ssse3(src, src_stride,
|
| dst, dst_stride,
|
| h, filter_x);
|
| src += 4;
|
| @@ -167,25 +213,25 @@
|
| int w, int h) {
|
| if (y_step_q4 == 16 && filter_y[3] != 128) {
|
| while (w >= 16) {
|
| - vp9_filter_block1d16_v8_ssse3(src - src_stride * 3, src_stride,
|
| - dst, dst_stride,
|
| - h, filter_y);
|
| + vp9_filter_block1d16_v8_intrin_ssse3(src - src_stride * 3, src_stride,
|
| + dst, dst_stride,
|
| + h, filter_y);
|
| src += 16;
|
| dst += 16;
|
| w -= 16;
|
| }
|
| while (w >= 8) {
|
| - vp9_filter_block1d8_v8_ssse3(src - src_stride * 3, src_stride,
|
| - dst, dst_stride,
|
| - h, filter_y);
|
| + vp9_filter_block1d8_v8_intrin_ssse3(src - src_stride * 3, src_stride,
|
| + dst, dst_stride,
|
| + h, filter_y);
|
| src += 8;
|
| dst += 8;
|
| w -= 8;
|
| }
|
| while (w >= 4) {
|
| - vp9_filter_block1d4_v8_ssse3(src - src_stride * 3, src_stride,
|
| - dst, dst_stride,
|
| - h, filter_y);
|
| + vp9_filter_block1d4_v8_intrin_ssse3(src - src_stride * 3, src_stride,
|
| + dst, dst_stride,
|
| + h, filter_y);
|
| src += 4;
|
| dst += 4;
|
| w -= 4;
|
| @@ -206,24 +252,24 @@
|
| if (x_step_q4 == 16 && filter_x[3] != 128) {
|
| while (w >= 16) {
|
| vp9_filter_block1d16_h8_avg_ssse3(src, src_stride,
|
| - dst, dst_stride,
|
| - h, filter_x);
|
| + dst, dst_stride,
|
| + h, filter_x);
|
| src += 16;
|
| dst += 16;
|
| w -= 16;
|
| }
|
| while (w >= 8) {
|
| vp9_filter_block1d8_h8_avg_ssse3(src, src_stride,
|
| - dst, dst_stride,
|
| - h, filter_x);
|
| + dst, dst_stride,
|
| + h, filter_x);
|
| src += 8;
|
| dst += 8;
|
| w -= 8;
|
| }
|
| while (w >= 4) {
|
| vp9_filter_block1d4_h8_avg_ssse3(src, src_stride,
|
| - dst, dst_stride,
|
| - h, filter_x);
|
| + dst, dst_stride,
|
| + h, filter_x);
|
| src += 4;
|
| dst += 4;
|
| w -= 4;
|
| @@ -244,24 +290,24 @@
|
| if (y_step_q4 == 16 && filter_y[3] != 128) {
|
| while (w >= 16) {
|
| vp9_filter_block1d16_v8_avg_ssse3(src - src_stride * 3, src_stride,
|
| - dst, dst_stride,
|
| - h, filter_y);
|
| + dst, dst_stride,
|
| + h, filter_y);
|
| src += 16;
|
| dst += 16;
|
| w -= 16;
|
| }
|
| while (w >= 8) {
|
| vp9_filter_block1d8_v8_avg_ssse3(src - src_stride * 3, src_stride,
|
| - dst, dst_stride,
|
| - h, filter_y);
|
| + dst, dst_stride,
|
| + h, filter_y);
|
| src += 8;
|
| dst += 8;
|
| w -= 8;
|
| }
|
| while (w >= 4) {
|
| vp9_filter_block1d4_v8_avg_ssse3(src - src_stride * 3, src_stride,
|
| - dst, dst_stride,
|
| - h, filter_y);
|
| + dst, dst_stride,
|
| + h, filter_y);
|
| src += 4;
|
| dst += 4;
|
| w -= 4;
|
|
|