Index: source/libvpx/vp9/common/x86/vp9_asm_stubs.c |
=================================================================== |
--- source/libvpx/vp9/common/x86/vp9_asm_stubs.c (revision 223100) |
+++ source/libvpx/vp9/common/x86/vp9_asm_stubs.c (working copy) |
@@ -13,6 +13,7 @@ |
#include "./vpx_config.h" |
#include "./vp9_rtcd.h" |
#include "vpx_ports/mem.h" |
+ |
/////////////////////////////////////////////////////////////////////////// |
// the mmx function that does the bilinear filtering and var calculation // |
// int one pass // |
@@ -36,6 +37,8 @@ |
{ 8, 8, 8, 8, 120, 120, 120, 120 } |
}; |
+ |
+int num_func_entry = 0; |
#if HAVE_SSSE3 |
void vp9_filter_block1d16_v8_ssse3(const unsigned char *src_ptr, |
const unsigned int src_pitch, |
@@ -44,6 +47,13 @@ |
unsigned int output_height, |
const short *filter); |
+void vp9_filter_block1d16_v8_intrin_ssse3(const unsigned char *src_ptr, |
+ const unsigned int src_pitch, |
+ unsigned char *output_ptr, |
+ unsigned int out_pitch, |
+ unsigned int output_height, |
+ const short *filter); |
+ |
void vp9_filter_block1d16_h8_ssse3(const unsigned char *src_ptr, |
const unsigned int src_pitch, |
unsigned char *output_ptr, |
@@ -51,6 +61,13 @@ |
unsigned int output_height, |
const short *filter); |
+void vp9_filter_block1d16_h8_intrin_ssse3(const unsigned char *src_ptr, |
+ const unsigned int src_pitch, |
+ unsigned char *output_ptr, |
+ unsigned int out_pitch, |
+ unsigned int output_height, |
+ const short *filter); |
+ |
void vp9_filter_block1d8_v8_ssse3(const unsigned char *src_ptr, |
const unsigned int src_pitch, |
unsigned char *output_ptr, |
@@ -58,6 +75,13 @@ |
unsigned int output_height, |
const short *filter); |
+void vp9_filter_block1d8_v8_intrin_ssse3(const unsigned char *src_ptr, |
+ const unsigned int src_pitch, |
+ unsigned char *output_ptr, |
+ unsigned int out_pitch, |
+ unsigned int output_height, |
+ const short *filter); |
+ |
void vp9_filter_block1d8_h8_ssse3(const unsigned char *src_ptr, |
const unsigned int src_pitch, |
unsigned char *output_ptr, |
@@ -65,6 +89,13 @@ |
unsigned int output_height, |
const short *filter); |
+void vp9_filter_block1d8_h8_intrin_ssse3(const unsigned char *src_ptr, |
+ const unsigned int src_pitch, |
+ unsigned char *output_ptr, |
+ unsigned int out_pitch, |
+ unsigned int output_height, |
+ const short *filter); |
+ |
void vp9_filter_block1d4_v8_ssse3(const unsigned char *src_ptr, |
const unsigned int src_pitch, |
unsigned char *output_ptr, |
@@ -72,6 +103,14 @@ |
unsigned int output_height, |
const short *filter); |
+void vp9_filter_block1d4_v8_intrin_ssse3(const unsigned char *src_ptr, |
+ const unsigned int src_pitch, |
+ unsigned char *output_ptr, |
+ unsigned int out_pitch, |
+ unsigned int output_height, |
+ const short *filter); |
+ |
+ |
void vp9_filter_block1d4_h8_ssse3(const unsigned char *src_ptr, |
const unsigned int src_pitch, |
unsigned char *output_ptr, |
@@ -79,6 +118,13 @@ |
unsigned int output_height, |
const short *filter); |
+void vp9_filter_block1d4_h8_intrin_ssse3(const unsigned char *src_ptr, |
+ const unsigned int src_pitch, |
+ unsigned char *output_ptr, |
+ unsigned int out_pitch, |
+ unsigned int output_height, |
+ const short *filter); |
+ |
void vp9_filter_block1d16_v8_avg_ssse3(const unsigned char *src_ptr, |
const unsigned int src_pitch, |
unsigned char *output_ptr, |
@@ -129,7 +175,7 @@ |
/* Ensure the filter can be compressed to int16_t. */ |
if (x_step_q4 == 16 && filter_x[3] != 128) { |
while (w >= 16) { |
- vp9_filter_block1d16_h8_ssse3(src, src_stride, |
+ vp9_filter_block1d16_h8_intrin_ssse3(src, src_stride, |
dst, dst_stride, |
h, filter_x); |
src += 16; |
@@ -137,7 +183,7 @@ |
w -= 16; |
} |
while (w >= 8) { |
- vp9_filter_block1d8_h8_ssse3(src, src_stride, |
+ vp9_filter_block1d8_h8_intrin_ssse3(src, src_stride, |
dst, dst_stride, |
h, filter_x); |
src += 8; |
@@ -145,7 +191,7 @@ |
w -= 8; |
} |
while (w >= 4) { |
- vp9_filter_block1d4_h8_ssse3(src, src_stride, |
+ vp9_filter_block1d4_h8_intrin_ssse3(src, src_stride, |
dst, dst_stride, |
h, filter_x); |
src += 4; |
@@ -167,25 +213,25 @@ |
int w, int h) { |
if (y_step_q4 == 16 && filter_y[3] != 128) { |
while (w >= 16) { |
- vp9_filter_block1d16_v8_ssse3(src - src_stride * 3, src_stride, |
- dst, dst_stride, |
- h, filter_y); |
+ vp9_filter_block1d16_v8_intrin_ssse3(src - src_stride * 3, src_stride, |
+ dst, dst_stride, |
+ h, filter_y); |
src += 16; |
dst += 16; |
w -= 16; |
} |
while (w >= 8) { |
- vp9_filter_block1d8_v8_ssse3(src - src_stride * 3, src_stride, |
- dst, dst_stride, |
- h, filter_y); |
+ vp9_filter_block1d8_v8_intrin_ssse3(src - src_stride * 3, src_stride, |
+ dst, dst_stride, |
+ h, filter_y); |
src += 8; |
dst += 8; |
w -= 8; |
} |
while (w >= 4) { |
- vp9_filter_block1d4_v8_ssse3(src - src_stride * 3, src_stride, |
- dst, dst_stride, |
- h, filter_y); |
+ vp9_filter_block1d4_v8_intrin_ssse3(src - src_stride * 3, src_stride, |
+ dst, dst_stride, |
+ h, filter_y); |
src += 4; |
dst += 4; |
w -= 4; |
@@ -206,24 +252,24 @@ |
if (x_step_q4 == 16 && filter_x[3] != 128) { |
while (w >= 16) { |
vp9_filter_block1d16_h8_avg_ssse3(src, src_stride, |
- dst, dst_stride, |
- h, filter_x); |
+ dst, dst_stride, |
+ h, filter_x); |
src += 16; |
dst += 16; |
w -= 16; |
} |
while (w >= 8) { |
vp9_filter_block1d8_h8_avg_ssse3(src, src_stride, |
- dst, dst_stride, |
- h, filter_x); |
+ dst, dst_stride, |
+ h, filter_x); |
src += 8; |
dst += 8; |
w -= 8; |
} |
while (w >= 4) { |
vp9_filter_block1d4_h8_avg_ssse3(src, src_stride, |
- dst, dst_stride, |
- h, filter_x); |
+ dst, dst_stride, |
+ h, filter_x); |
src += 4; |
dst += 4; |
w -= 4; |
@@ -244,24 +290,24 @@ |
if (y_step_q4 == 16 && filter_y[3] != 128) { |
while (w >= 16) { |
vp9_filter_block1d16_v8_avg_ssse3(src - src_stride * 3, src_stride, |
- dst, dst_stride, |
- h, filter_y); |
+ dst, dst_stride, |
+ h, filter_y); |
src += 16; |
dst += 16; |
w -= 16; |
} |
while (w >= 8) { |
vp9_filter_block1d8_v8_avg_ssse3(src - src_stride * 3, src_stride, |
- dst, dst_stride, |
- h, filter_y); |
+ dst, dst_stride, |
+ h, filter_y); |
src += 8; |
dst += 8; |
w -= 8; |
} |
while (w >= 4) { |
vp9_filter_block1d4_v8_avg_ssse3(src - src_stride * 3, src_stride, |
- dst, dst_stride, |
- h, filter_y); |
+ dst, dst_stride, |
+ h, filter_y); |
src += 4; |
dst += 4; |
w -= 4; |