celt/x86/pitch_sse.h - Issue 882843002: Update to opus-HEAD-66611f1.

Unified Diff: celt/x86/pitch_sse.h

Issue 882843002: Update to opus-HEAD-66611f1. (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/opus.git@master

Patch Set: Add the contents of Makefile.mips back. Created 5 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: celt/x86/pitch_sse.h

diff --git a/celt/x86/pitch_sse.h b/celt/x86/pitch_sse.h

index 695122a5adda014e2ac77a01b6ade6184872ed3a..99d1919a2e62579f40f1d641099bbd7691c9f2f2 100644

--- a/celt/x86/pitch_sse.h

+++ b/celt/x86/pitch_sse.h

@@ -1,4 +1,5 @@

/**

@file pitch_sse.h

@brief Pitch analysis

@@ -32,11 +33,56 @@

#ifndef PITCH_SSE_H

#define PITCH_SSE_H

+#if defined(HAVE_CONFIG_H)

+#include "config.h"

+#endif

+#if defined(OPUS_X86_MAY_HAVE_SSE4_1) || defined(OPUS_X86_MAY_HAVE_SSE2)

+#if defined(OPUS_X86_MAY_HAVE_SSE4_1)

+void xcorr_kernel_sse4_1(

+ const opus_int16 *x,

+ const opus_int16 *y,

+ opus_val32 sum[4],

+ int len);

+extern void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(

+ const opus_int16 *x,

+ const opus_int16 *y,

+ opus_val32 sum[4],

+ int len);

+#define OVERRIDE_XCORR_KERNEL

+#define xcorr_kernel(x, y, sum, len, arch) \

+ ((*XCORR_KERNEL_IMPL[(arch) & OPUS_ARCHMASK])(x, y, sum, len))

+opus_val32 celt_inner_prod_sse4_1(

+ const opus_int16 *x,

+ const opus_int16 *y,

+ int N);

+#endif

+#if defined(OPUS_X86_MAY_HAVE_SSE2)

+opus_val32 celt_inner_prod_sse2(

+ const opus_int16 *x,

+ const opus_int16 *y,

+ int N);

+#endif

+extern opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(

+ const opus_int16 *x,

+ const opus_int16 *y,

+ int N);

+#define OVERRIDE_CELT_INNER_PROD

+#define celt_inner_prod(x, y, N, arch) \

+ ((*CELT_INNER_PROD_IMPL[(arch) & OPUS_ARCHMASK])(x, y, N))

+#else

#include <xmmintrin.h>

#include "arch.h"

#define OVERRIDE_XCORR_KERNEL

-static OPUS_INLINE void xcorr_kernel(const opus_val16 *x, const opus_val16 *y, opus_val32 sum[4], int len)

+static OPUS_INLINE void xcorr_kernel_sse(const opus_val16 *x, const opus_val16 *y, opus_val32 sum[4], int len)

{

int j;

__m128 xsum1, xsum2;

@@ -71,6 +117,9 @@ static OPUS_INLINE void xcorr_kernel(const opus_val16 *x, const opus_val16 *y, o

_mm_storeu_ps(sum,_mm_add_ps(xsum1,xsum2));

}

+#define xcorr_kernel(_x, _y, _z, len, arch) \

+ ((void)(arch),xcorr_kernel_sse(_x, _y, _z, len))

#define OVERRIDE_DUAL_INNER_PROD

static OPUS_INLINE void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,

int N, opus_val32 *xy1, opus_val32 *xy2)

@@ -101,6 +150,35 @@ static OPUS_INLINE void dual_inner_prod(const opus_val16 *x, const opus_val16 *y

}

+#define OVERRIDE_CELT_INNER_PROD

+static OPUS_INLINE opus_val32 celt_inner_prod_sse(const opus_val16 *x, const opus_val16 *y,

+ int N)

+ int i;

+ float xy;

+ __m128 sum;

+ sum = _mm_setzero_ps();

+ /* FIXME: We should probably go 8-way and use 2 sums. */

+ for (i=0;i<N-3;i+=4)

+ {

+ __m128 xi = _mm_loadu_ps(x+i);

+ __m128 yi = _mm_loadu_ps(y+i);

+ sum = _mm_add_ps(sum,_mm_mul_ps(xi, yi));

+ }

+ /* Horizontal sum */

+ sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));

+ sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));

+ _mm_store_ss(&xy, sum);

+ for (;i<N;i++)

+ {

+ xy = MAC16_16(xy, x[i], y[i]);

+ }

+ return xy;

+# define celt_inner_prod(_x, _y, len, arch) \

+ ((void)(arch),celt_inner_prod_sse(_x, _y, len))

#define OVERRIDE_COMB_FILTER_CONST

static OPUS_INLINE void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N,

opus_val16 g10, opus_val16 g11, opus_val16 g12)

@@ -154,3 +232,4 @@ static OPUS_INLINE void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, i

}

#endif

+#endif

« no previous file with comments | « celt/x86/celt_lpc_sse.c ('k') | celt/x86/pitch_sse.c » ('j') | no next file with comments »