| Index: source/libvpx/vp9/encoder/arm/neon/vp9_variance_neon.c
|
| diff --git a/source/libvpx/vp9/encoder/arm/neon/vp9_variance_neon.c b/source/libvpx/vp9/encoder/arm/neon/vp9_variance_neon.c
|
| index 166156af7f347ed2e4260292e80d7ca448ca8388..0ac194e92b0fcbe4f5ae4f47e8620bffa0d7131e 100644
|
| --- a/source/libvpx/vp9/encoder/arm/neon/vp9_variance_neon.c
|
| +++ b/source/libvpx/vp9/encoder/arm/neon/vp9_variance_neon.c
|
| @@ -16,10 +16,18 @@
|
| #include "vpx_ports/mem.h"
|
| #include "vpx/vpx_integer.h"
|
|
|
| -#include "vp9/common/vp9_common.h"
|
| #include "vp9/common/vp9_filter.h"
|
|
|
| -#include "vp9/encoder/vp9_variance.h"
|
| +static uint8_t bilinear_filters[8][2] = {
|
| + { 128, 0, },
|
| + { 112, 16, },
|
| + { 96, 32, },
|
| + { 80, 48, },
|
| + { 64, 64, },
|
| + { 48, 80, },
|
| + { 32, 96, },
|
| + { 16, 112, },
|
| +};
|
|
|
| static void var_filter_block2d_bil_w8(const uint8_t *src_ptr,
|
| uint8_t *output_ptr,
|
| @@ -27,9 +35,9 @@ static void var_filter_block2d_bil_w8(const uint8_t *src_ptr,
|
| int pixel_step,
|
| unsigned int output_height,
|
| unsigned int output_width,
|
| - const int16_t *vp9_filter) {
|
| - const uint8x8_t f0 = vmov_n_u8((uint8_t)vp9_filter[0]);
|
| - const uint8x8_t f1 = vmov_n_u8((uint8_t)vp9_filter[1]);
|
| + const uint8_t *vp9_filter) {
|
| + const uint8x8_t f0 = vmov_n_u8(vp9_filter[0]);
|
| + const uint8x8_t f1 = vmov_n_u8(vp9_filter[1]);
|
| unsigned int i;
|
| for (i = 0; i < output_height; ++i) {
|
| const uint8x8_t src_0 = vld1_u8(&src_ptr[0]);
|
| @@ -50,9 +58,9 @@ static void var_filter_block2d_bil_w16(const uint8_t *src_ptr,
|
| int pixel_step,
|
| unsigned int output_height,
|
| unsigned int output_width,
|
| - const int16_t *vp9_filter) {
|
| - const uint8x8_t f0 = vmov_n_u8((uint8_t)vp9_filter[0]);
|
| - const uint8x8_t f1 = vmov_n_u8((uint8_t)vp9_filter[1]);
|
| + const uint8_t *vp9_filter) {
|
| + const uint8x8_t f0 = vmov_n_u8(vp9_filter[0]);
|
| + const uint8x8_t f1 = vmov_n_u8(vp9_filter[1]);
|
| unsigned int i, j;
|
| for (i = 0; i < output_height; ++i) {
|
| for (j = 0; j < output_width; j += 16) {
|
| @@ -84,9 +92,9 @@ unsigned int vp9_sub_pixel_variance8x8_neon(const uint8_t *src,
|
|
|
| var_filter_block2d_bil_w8(src, fdata3, src_stride, 1,
|
| 9, 8,
|
| - BILINEAR_FILTERS_2TAP(xoffset));
|
| + bilinear_filters[xoffset]);
|
| var_filter_block2d_bil_w8(fdata3, temp2, 8, 8, 8,
|
| - 8, BILINEAR_FILTERS_2TAP(yoffset));
|
| + 8, bilinear_filters[yoffset]);
|
| return vpx_variance8x8_neon(temp2, 8, dst, dst_stride, sse);
|
| }
|
|
|
| @@ -102,9 +110,9 @@ unsigned int vp9_sub_pixel_variance16x16_neon(const uint8_t *src,
|
|
|
| var_filter_block2d_bil_w16(src, fdata3, src_stride, 1,
|
| 17, 16,
|
| - BILINEAR_FILTERS_2TAP(xoffset));
|
| + bilinear_filters[xoffset]);
|
| var_filter_block2d_bil_w16(fdata3, temp2, 16, 16, 16,
|
| - 16, BILINEAR_FILTERS_2TAP(yoffset));
|
| + 16, bilinear_filters[yoffset]);
|
| return vpx_variance16x16_neon(temp2, 16, dst, dst_stride, sse);
|
| }
|
|
|
| @@ -120,9 +128,9 @@ unsigned int vp9_sub_pixel_variance32x32_neon(const uint8_t *src,
|
|
|
| var_filter_block2d_bil_w16(src, fdata3, src_stride, 1,
|
| 33, 32,
|
| - BILINEAR_FILTERS_2TAP(xoffset));
|
| + bilinear_filters[xoffset]);
|
| var_filter_block2d_bil_w16(fdata3, temp2, 32, 32, 32,
|
| - 32, BILINEAR_FILTERS_2TAP(yoffset));
|
| + 32, bilinear_filters[yoffset]);
|
| return vpx_variance32x32_neon(temp2, 32, dst, dst_stride, sse);
|
| }
|
|
|
| @@ -138,8 +146,8 @@ unsigned int vp9_sub_pixel_variance64x64_neon(const uint8_t *src,
|
|
|
| var_filter_block2d_bil_w16(src, fdata3, src_stride, 1,
|
| 65, 64,
|
| - BILINEAR_FILTERS_2TAP(xoffset));
|
| + bilinear_filters[xoffset]);
|
| var_filter_block2d_bil_w16(fdata3, temp2, 64, 64, 64,
|
| - 64, BILINEAR_FILTERS_2TAP(yoffset));
|
| + 64, bilinear_filters[yoffset]);
|
| return vpx_variance64x64_neon(temp2, 64, dst, dst_stride, sse);
|
| }
|
|
|