Index: celt/x86/x86cpu.h |
diff --git a/celt/arm/armcpu.h b/celt/x86/x86cpu.h |
similarity index 50% |
copy from celt/arm/armcpu.h |
copy to celt/x86/x86cpu.h |
index ac5744606e0804a8099f94ab8bf10f34ece6d345..44b3a597634d464bd8eef3ba79241e4d243ba1cf 100644 |
--- a/celt/arm/armcpu.h |
+++ b/celt/x86/x86cpu.h |
@@ -1,6 +1,6 @@ |
-/* Copyright (c) 2010 Xiph.Org Foundation |
- * Copyright (c) 2013 Parrot */ |
-/* |
+/* Copyright (c) 2014, Cisco Systems, INC |
+ Written by XiangMingZhu WeiZhou MinPeng YanWang |
+ |
Redistribution and use in source and binary forms, with or without |
modification, are permitted provided that the following conditions |
are met: |
@@ -25,47 +25,47 @@ |
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
*/ |
-#if !defined(ARMCPU_H) |
-# define ARMCPU_H |
+#if !defined(X86CPU_H) |
+# define X86CPU_H |
-# if defined(OPUS_ARM_MAY_HAVE_EDSP) |
-# define MAY_HAVE_EDSP(name) name ## _edsp |
+# if defined(OPUS_X86_MAY_HAVE_SSE2) |
+# define MAY_HAVE_SSE2(name) name ## _sse2 |
# else |
-# define MAY_HAVE_EDSP(name) name ## _c |
+# define MAY_HAVE_SSE2(name) name ## _c |
# endif |
-# if defined(OPUS_ARM_MAY_HAVE_MEDIA) |
-# define MAY_HAVE_MEDIA(name) name ## _media |
+# if defined(OPUS_X86_MAY_HAVE_SSE4_1) |
+# define MAY_HAVE_SSE4_1(name) name ## _sse4_1 |
# else |
-# define MAY_HAVE_MEDIA(name) MAY_HAVE_EDSP(name) |
+# define MAY_HAVE_SSE4_1(name) name ## _c |
# endif |
-# if defined(OPUS_ARM_MAY_HAVE_NEON) |
-# define MAY_HAVE_NEON(name) name ## _neon |
-# else |
-# define MAY_HAVE_NEON(name) MAY_HAVE_MEDIA(name) |
+# if defined(OPUS_HAVE_RTCD) |
+int opus_select_arch(void); |
# endif |
-# if defined(OPUS_ARM_PRESUME_EDSP) |
-# define PRESUME_EDSP(name) name ## _edsp |
-# else |
-# define PRESUME_EDSP(name) name ## _c |
-# endif |
+/*gcc appears to emit MOVDQA's to load the argument of an _mm_cvtepi8_epi32() |
+ or _mm_cvtepi16_epi32() when optimizations are disabled, even though the |
+ actual PMOVSXWD instruction takes an m32 or m64. Unlike a normal memory |
+ reference, these require 16-byte alignment and load a full 16 bytes (instead |
+ of 4 or 8), possibly reading out of bounds. |
-# if defined(OPUS_ARM_PRESUME_MEDIA) |
-# define PRESUME_MEDIA(name) name ## _media |
-# else |
-# define PRESUME_MEDIA(name) PRESUME_EDSP(name) |
-# endif |
+ We can insert an explicit MOVD or MOVQ using _mm_cvtsi32_si128() or |
+ _mm_loadl_epi64(), which should have the same semantics as an m32 or m64 |
+ reference in the PMOVSXWD instruction itself, but gcc is not smart enough to |
+ optimize this out when optimizations ARE enabled.*/ |
+# if !defined(__OPTIMIZE__) |
+# define OP_CVTEPI8_EPI32_M32(x) \ |
+ (_mm_cvtepi8_epi32(_mm_cvtsi32_si128(*(int *)(x)))) |
-# if defined(OPUS_ARM_PRESUME_NEON) |
-# define PRESUME_NEON(name) name ## _neon |
+# define OP_CVTEPI16_EPI32_M64(x) \ |
+ (_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i *)(x)))) |
# else |
-# define PRESUME_NEON(name) PRESUME_MEDIA(name) |
-# endif |
+# define OP_CVTEPI8_EPI32_M32(x) \ |
+ (_mm_cvtepi8_epi32(*(__m128i *)(x))) |
-# if defined(OPUS_HAVE_RTCD) |
-int opus_select_arch(void); |
+# define OP_CVTEPI16_EPI32_M64(x) \ |
+ (_mm_cvtepi16_epi32(*(__m128i *)(x))) |
# endif |
#endif |