OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "media/base/vector_math.h" | 5 #include "media/base/vector_math.h" |
6 #include "media/base/vector_math_testing.h" | 6 #include "media/base/vector_math_testing.h" |
7 | 7 |
8 #include "base/cpu.h" | 8 #include "base/cpu.h" |
9 #include "base/logging.h" | 9 #include "base/logging.h" |
10 #include "build/build_config.h" | 10 #include "build/build_config.h" |
11 | 11 |
12 #if defined(ARCH_CPU_X86_FAMILY) && defined(__SSE__) | |
13 #include <xmmintrin.h> | |
14 #endif | |
15 | |
16 namespace media { | 12 namespace media { |
17 namespace vector_math { | 13 namespace vector_math { |
18 | 14 |
19 void FMAC(const float src[], float scale, int len, float dest[]) { | 15 void FMAC(const float src[], float scale, int len, float dest[]) { |
20 // Ensure |src| and |dest| are 16-byte aligned. | 16 // Ensure |src| and |dest| are 16-byte aligned. |
21 DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(src) & (kRequiredAlignment - 1)); | 17 DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(src) & (kRequiredAlignment - 1)); |
22 DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(dest) & (kRequiredAlignment - 1)); | 18 DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(dest) & (kRequiredAlignment - 1)); |
23 | 19 |
24 // Rely on function level static initialization to keep VectorFMACProc | 20 // Rely on function level static initialization to keep VectorFMACProc |
25 // selection thread safe. | 21 // selection thread safe. |
26 typedef void (*VectorFMACProc)(const float src[], float scale, int len, | 22 typedef void (*VectorFMACProc)(const float src[], float scale, int len, |
27 float dest[]); | 23 float dest[]); |
28 #if defined(ARCH_CPU_X86_FAMILY) && defined(__SSE__) | 24 #if defined(ARCH_CPU_X86_FAMILY) |
29 static const VectorFMACProc kVectorFMACProc = | 25 static const VectorFMACProc kVectorFMACProc = |
Mark Mentovai
2013/03/05 21:32:31
#if defined(OS_MACOSX), you can use FMAC_SSE direc
DaleCurtis
2013/03/05 21:51:37
Done w/ __SSE__.
| |
30 base::CPU().has_sse() ? FMAC_SSE : FMAC_C; | 26 base::CPU().has_sse() ? FMAC_SSE : FMAC_C; |
31 #else | 27 #else |
32 static const VectorFMACProc kVectorFMACProc = FMAC_C; | 28 static const VectorFMACProc kVectorFMACProc = FMAC_C; |
33 #endif | 29 #endif |
34 | 30 |
35 return kVectorFMACProc(src, scale, len, dest); | 31 return kVectorFMACProc(src, scale, len, dest); |
36 } | 32 } |
37 | 33 |
38 void FMAC_C(const float src[], float scale, int len, float dest[]) { | 34 void FMAC_C(const float src[], float scale, int len, float dest[]) { |
39 for (int i = 0; i < len; ++i) | 35 for (int i = 0; i < len; ++i) |
40 dest[i] += src[i] * scale; | 36 dest[i] += src[i] * scale; |
41 } | 37 } |
42 | 38 |
43 #if defined(ARCH_CPU_X86_FAMILY) && defined(__SSE__) | |
44 void FMAC_SSE(const float src[], float scale, int len, float dest[]) { | |
45 __m128 m_scale = _mm_set_ps1(scale); | |
46 int rem = len % 4; | |
47 for (int i = 0; i < len - rem; i += 4) { | |
48 _mm_store_ps(dest + i, _mm_add_ps(_mm_load_ps(dest + i), | |
49 _mm_mul_ps(_mm_load_ps(src + i), m_scale))); | |
50 } | |
51 | |
52 // Handle any remaining values that wouldn't fit in an SSE pass. | |
53 if (rem) | |
54 FMAC_C(src + len - rem, scale, rem, dest + len - rem); | |
55 } | |
56 #endif | |
57 | |
58 } // namespace vector_math | 39 } // namespace vector_math |
59 } // namespace media | 40 } // namespace media |
OLD | NEW |