| Index: source/libvpx/vp9/encoder/x86/vp9_dct_sse2.c
|
| diff --git a/source/libvpx/vp9/encoder/x86/vp9_dct_sse2.c b/source/libvpx/vp9/encoder/x86/vp9_dct_sse2.c
|
| index 564b7955e5b9b54c2c803e653b234351bd642a3a..cff4fcbdce0a6ba61229495594da70042ccfc0ac 100644
|
| --- a/source/libvpx/vp9/encoder/x86/vp9_dct_sse2.c
|
| +++ b/source/libvpx/vp9/encoder/x86/vp9_dct_sse2.c
|
| @@ -10,6 +10,8 @@
|
|
|
| #include <assert.h>
|
| #include <emmintrin.h> // SSE2
|
| +
|
| +#include "./vp9_rtcd.h"
|
| #include "vp9/common/vp9_idct.h" // for cospi constants
|
| #include "vp9/encoder/vp9_dct.h"
|
| #include "vp9/encoder/x86/vp9_dct_sse2.h"
|
| @@ -96,7 +98,7 @@ static INLINE void transpose_4x4(__m128i *res) {
|
| res[3] = _mm_unpackhi_epi64(res[2], res[2]);
|
| }
|
|
|
| -void fdct4_sse2(__m128i *in) {
|
| +static void fdct4_sse2(__m128i *in) {
|
| const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64);
|
| const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
|
| const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64);
|
| @@ -129,7 +131,7 @@ void fdct4_sse2(__m128i *in) {
|
| transpose_4x4(in);
|
| }
|
|
|
| -void fadst4_sse2(__m128i *in) {
|
| +static void fadst4_sse2(__m128i *in) {
|
| const __m128i k__sinpi_p01_p02 = pair_set_epi16(sinpi_1_9, sinpi_2_9);
|
| const __m128i k__sinpi_p04_m01 = pair_set_epi16(sinpi_4_9, -sinpi_1_9);
|
| const __m128i k__sinpi_p03_p04 = pair_set_epi16(sinpi_3_9, sinpi_4_9);
|
| @@ -831,7 +833,7 @@ static INLINE void array_transpose_8x8(__m128i *in, __m128i *res) {
|
| // 07 17 27 37 47 57 67 77
|
| }
|
|
|
| -void fdct8_sse2(__m128i *in) {
|
| +static void fdct8_sse2(__m128i *in) {
|
| // constants
|
| const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64);
|
| const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
|
| @@ -971,7 +973,7 @@ void fdct8_sse2(__m128i *in) {
|
| array_transpose_8x8(in, in);
|
| }
|
|
|
| -void fadst8_sse2(__m128i *in) {
|
| +static void fadst8_sse2(__m128i *in) {
|
| // Constants
|
| const __m128i k__cospi_p02_p30 = pair_set_epi16(cospi_2_64, cospi_30_64);
|
| const __m128i k__cospi_p30_m02 = pair_set_epi16(cospi_30_64, -cospi_2_64);
|
| @@ -1353,7 +1355,7 @@ static INLINE void right_shift_16x16(__m128i *res0, __m128i *res1) {
|
| right_shift_8x8(res1 + 8, 2);
|
| }
|
|
|
| -void fdct16_8col(__m128i *in) {
|
| +static void fdct16_8col(__m128i *in) {
|
| // perform 16x16 1-D DCT for 8 columns
|
| __m128i i[8], s[8], p[8], t[8], u[16], v[16];
|
| const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64);
|
| @@ -1675,7 +1677,7 @@ void fdct16_8col(__m128i *in) {
|
| in[15] = _mm_packs_epi32(v[14], v[15]);
|
| }
|
|
|
| -void fadst16_8col(__m128i *in) {
|
| +static void fadst16_8col(__m128i *in) {
|
| // perform 16x16 1-D ADST for 8 columns
|
| __m128i s[16], x[16], u[32], v[32];
|
| const __m128i k__cospi_p01_p31 = pair_set_epi16(cospi_1_64, cospi_31_64);
|
| @@ -2145,13 +2147,13 @@ void fadst16_8col(__m128i *in) {
|
| in[15] = _mm_sub_epi16(kZero, s[1]);
|
| }
|
|
|
| -void fdct16_sse2(__m128i *in0, __m128i *in1) {
|
| +static void fdct16_sse2(__m128i *in0, __m128i *in1) {
|
| fdct16_8col(in0);
|
| fdct16_8col(in1);
|
| array_transpose_16x16(in0, in1);
|
| }
|
|
|
| -void fadst16_sse2(__m128i *in0, __m128i *in1) {
|
| +static void fadst16_sse2(__m128i *in0, __m128i *in1) {
|
| fadst16_8col(in0);
|
| fadst16_8col(in1);
|
| array_transpose_16x16(in0, in1);
|
| @@ -2334,7 +2336,7 @@ void vp9_highbd_fht8x8_sse2(const int16_t *input, tran_low_t *output,
|
| }
|
| }
|
|
|
| -void vp9_highbd_fht16x16_sse2(int16_t *input, tran_low_t *output,
|
| +void vp9_highbd_fht16x16_sse2(const int16_t *input, tran_low_t *output,
|
| int stride, int tx_type) {
|
| if (tx_type == DCT_DCT) {
|
| vp9_highbd_fdct16x16_sse2(input, output, stride);
|
| @@ -2368,8 +2370,8 @@ void vp9_highbd_fht16x16_sse2(int16_t *input, tran_low_t *output,
|
|
|
| /*
|
| * The DCTnxn functions are defined using the macros below. The main code for
|
| - * them is in separate files (vp9/encoder/x86/vp9_dct_impl_sse2.c &
|
| - * vp9/encoder/x86/vp9_dct32x32_sse2.c) which are used by both the 8 bit code
|
| + * them is in separate files (vp9/encoder/x86/vp9_dct_sse2_impl.h &
|
| + * vp9/encoder/x86/vp9_dct32x32_sse2_impl.h) which are used by both the 8 bit code
|
| * and the high bit depth code.
|
| */
|
|
|
| @@ -2378,20 +2380,20 @@ void vp9_highbd_fht16x16_sse2(int16_t *input, tran_low_t *output,
|
| #define FDCT4x4_2D vp9_fdct4x4_sse2
|
| #define FDCT8x8_2D vp9_fdct8x8_sse2
|
| #define FDCT16x16_2D vp9_fdct16x16_sse2
|
| -#include "vp9/encoder/x86/vp9_dct_impl_sse2.c"
|
| +#include "vp9/encoder/x86/vp9_dct_sse2_impl.h"
|
| #undef FDCT4x4_2D
|
| #undef FDCT8x8_2D
|
| #undef FDCT16x16_2D
|
|
|
| #define FDCT32x32_2D vp9_fdct32x32_rd_sse2
|
| #define FDCT32x32_HIGH_PRECISION 0
|
| -#include "vp9/encoder/x86/vp9_dct32x32_sse2.c"
|
| +#include "vp9/encoder/x86/vp9_dct32x32_sse2_impl.h"
|
| #undef FDCT32x32_2D
|
| #undef FDCT32x32_HIGH_PRECISION
|
|
|
| #define FDCT32x32_2D vp9_fdct32x32_sse2
|
| #define FDCT32x32_HIGH_PRECISION 1
|
| -#include "vp9/encoder/x86/vp9_dct32x32_sse2.c" // NOLINT
|
| +#include "vp9/encoder/x86/vp9_dct32x32_sse2_impl.h" // NOLINT
|
| #undef FDCT32x32_2D
|
| #undef FDCT32x32_HIGH_PRECISION
|
|
|
| @@ -2405,20 +2407,20 @@ void vp9_highbd_fht16x16_sse2(int16_t *input, tran_low_t *output,
|
| #define FDCT4x4_2D vp9_highbd_fdct4x4_sse2
|
| #define FDCT8x8_2D vp9_highbd_fdct8x8_sse2
|
| #define FDCT16x16_2D vp9_highbd_fdct16x16_sse2
|
| -#include "vp9/encoder/x86/vp9_dct_impl_sse2.c" // NOLINT
|
| +#include "vp9/encoder/x86/vp9_dct_sse2_impl.h" // NOLINT
|
| #undef FDCT4x4_2D
|
| #undef FDCT8x8_2D
|
| #undef FDCT16x16_2D
|
|
|
| #define FDCT32x32_2D vp9_highbd_fdct32x32_rd_sse2
|
| #define FDCT32x32_HIGH_PRECISION 0
|
| -#include "vp9/encoder/x86/vp9_dct32x32_sse2.c" // NOLINT
|
| +#include "vp9/encoder/x86/vp9_dct32x32_sse2_impl.h" // NOLINT
|
| #undef FDCT32x32_2D
|
| #undef FDCT32x32_HIGH_PRECISION
|
|
|
| #define FDCT32x32_2D vp9_highbd_fdct32x32_sse2
|
| #define FDCT32x32_HIGH_PRECISION 1
|
| -#include "vp9/encoder/x86/vp9_dct32x32_sse2.c" // NOLINT
|
| +#include "vp9/encoder/x86/vp9_dct32x32_sse2_impl.h" // NOLINT
|
| #undef FDCT32x32_2D
|
| #undef FDCT32x32_HIGH_PRECISION
|
|
|
|
|