| Index: celt/x86/x86cpu.h
|
| diff --git a/celt/arm/armcpu.h b/celt/x86/x86cpu.h
|
| similarity index 50%
|
| copy from celt/arm/armcpu.h
|
| copy to celt/x86/x86cpu.h
|
| index ac5744606e0804a8099f94ab8bf10f34ece6d345..44b3a597634d464bd8eef3ba79241e4d243ba1cf 100644
|
| --- a/celt/arm/armcpu.h
|
| +++ b/celt/x86/x86cpu.h
|
| @@ -1,6 +1,6 @@
|
| -/* Copyright (c) 2010 Xiph.Org Foundation
|
| - * Copyright (c) 2013 Parrot */
|
| -/*
|
| +/* Copyright (c) 2014, Cisco Systems, INC
|
| + Written by XiangMingZhu WeiZhou MinPeng YanWang
|
| +
|
| Redistribution and use in source and binary forms, with or without
|
| modification, are permitted provided that the following conditions
|
| are met:
|
| @@ -25,47 +25,47 @@
|
| SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
| */
|
|
|
| -#if !defined(ARMCPU_H)
|
| -# define ARMCPU_H
|
| +#if !defined(X86CPU_H)
|
| +# define X86CPU_H
|
|
|
| -# if defined(OPUS_ARM_MAY_HAVE_EDSP)
|
| -# define MAY_HAVE_EDSP(name) name ## _edsp
|
| +# if defined(OPUS_X86_MAY_HAVE_SSE2)
|
| +# define MAY_HAVE_SSE2(name) name ## _sse2
|
| # else
|
| -# define MAY_HAVE_EDSP(name) name ## _c
|
| +# define MAY_HAVE_SSE2(name) name ## _c
|
| # endif
|
|
|
| -# if defined(OPUS_ARM_MAY_HAVE_MEDIA)
|
| -# define MAY_HAVE_MEDIA(name) name ## _media
|
| +# if defined(OPUS_X86_MAY_HAVE_SSE4_1)
|
| +# define MAY_HAVE_SSE4_1(name) name ## _sse4_1
|
| # else
|
| -# define MAY_HAVE_MEDIA(name) MAY_HAVE_EDSP(name)
|
| +# define MAY_HAVE_SSE4_1(name) name ## _c
|
| # endif
|
|
|
| -# if defined(OPUS_ARM_MAY_HAVE_NEON)
|
| -# define MAY_HAVE_NEON(name) name ## _neon
|
| -# else
|
| -# define MAY_HAVE_NEON(name) MAY_HAVE_MEDIA(name)
|
| +# if defined(OPUS_HAVE_RTCD)
|
| +int opus_select_arch(void);
|
| # endif
|
|
|
| -# if defined(OPUS_ARM_PRESUME_EDSP)
|
| -# define PRESUME_EDSP(name) name ## _edsp
|
| -# else
|
| -# define PRESUME_EDSP(name) name ## _c
|
| -# endif
|
| +/*gcc appears to emit MOVDQA's to load the argument of an _mm_cvtepi8_epi32()
|
| + or _mm_cvtepi16_epi32() when optimizations are disabled, even though the
|
| + actual PMOVSXWD instruction takes an m32 or m64. Unlike a normal memory
|
| + reference, these require 16-byte alignment and load a full 16 bytes (instead
|
| + of 4 or 8), possibly reading out of bounds.
|
|
|
| -# if defined(OPUS_ARM_PRESUME_MEDIA)
|
| -# define PRESUME_MEDIA(name) name ## _media
|
| -# else
|
| -# define PRESUME_MEDIA(name) PRESUME_EDSP(name)
|
| -# endif
|
| + We can insert an explicit MOVD or MOVQ using _mm_cvtsi32_si128() or
|
| + _mm_loadl_epi64(), which should have the same semantics as an m32 or m64
|
| + reference in the PMOVSXWD instruction itself, but gcc is not smart enough to
|
| + optimize this out when optimizations ARE enabled.*/
|
| +# if !defined(__OPTIMIZE__)
|
| +# define OP_CVTEPI8_EPI32_M32(x) \
|
| + (_mm_cvtepi8_epi32(_mm_cvtsi32_si128(*(int *)(x))))
|
|
|
| -# if defined(OPUS_ARM_PRESUME_NEON)
|
| -# define PRESUME_NEON(name) name ## _neon
|
| +# define OP_CVTEPI16_EPI32_M64(x) \
|
| + (_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i *)(x))))
|
| # else
|
| -# define PRESUME_NEON(name) PRESUME_MEDIA(name)
|
| -# endif
|
| +# define OP_CVTEPI8_EPI32_M32(x) \
|
| + (_mm_cvtepi8_epi32(*(__m128i *)(x)))
|
|
|
| -# if defined(OPUS_HAVE_RTCD)
|
| -int opus_select_arch(void);
|
| +# define OP_CVTEPI16_EPI32_M64(x) \
|
| + (_mm_cvtepi16_epi32(*(__m128i *)(x)))
|
| # endif
|
|
|
| #endif
|
|
|