OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "media/base/vector_math.h" | 5 #include "media/base/vector_math.h" |
6 #include "media/base/vector_math_testing.h" | 6 #include "media/base/vector_math_testing.h" |
7 | 7 |
8 #include "base/cpu.h" | 8 #include "base/cpu.h" |
9 #include "base/logging.h" | 9 #include "base/logging.h" |
10 #include "build/build_config.h" | |
11 | |
12 #if defined(ARCH_CPU_X86_FAMILY) && defined(__SSE__) | |
13 #include <xmmintrin.h> | |
14 #endif | |
15 | 10 |
16 namespace media { | 11 namespace media { |
17 namespace vector_math { | 12 namespace vector_math { |
18 | 13 |
19 void FMAC(const float src[], float scale, int len, float dest[]) { | 14 void FMAC(const float src[], float scale, int len, float dest[]) { |
20 // Ensure |src| and |dest| are 16-byte aligned. | 15 // Ensure |src| and |dest| are 16-byte aligned. |
21 DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(src) & (kRequiredAlignment - 1)); | 16 DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(src) & (kRequiredAlignment - 1)); |
22 DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(dest) & (kRequiredAlignment - 1)); | 17 DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(dest) & (kRequiredAlignment - 1)); |
23 | 18 |
24 // Rely on function level static initialization to keep VectorFMACProc | 19 // Rely on function level static initialization to keep VectorFMACProc |
25 // selection thread safe. | 20 // selection thread safe. |
26 typedef void (*VectorFMACProc)(const float src[], float scale, int len, | 21 typedef void (*VectorFMACProc)(const float src[], float scale, int len, |
27 float dest[]); | 22 float dest[]); |
28 #if defined(ARCH_CPU_X86_FAMILY) && defined(__SSE__) | 23 #if defined(ARCH_CPU_X86_FAMILY) |
| 24 #if defined(__SSE__) |
| 25 static const VectorFMACProc kVectorFMACProc = FMAC_SSE; |
| 26 #else |
29 static const VectorFMACProc kVectorFMACProc = | 27 static const VectorFMACProc kVectorFMACProc = |
30 base::CPU().has_sse() ? FMAC_SSE : FMAC_C; | 28 base::CPU().has_sse() ? FMAC_SSE : FMAC_C; |
| 29 #endif |
31 #else | 30 #else |
32 static const VectorFMACProc kVectorFMACProc = FMAC_C; | 31 static const VectorFMACProc kVectorFMACProc = FMAC_C; |
33 #endif | 32 #endif |
34 | 33 |
35 return kVectorFMACProc(src, scale, len, dest); | 34 return kVectorFMACProc(src, scale, len, dest); |
36 } | 35 } |
37 | 36 |
38 void FMAC_C(const float src[], float scale, int len, float dest[]) { | 37 void FMAC_C(const float src[], float scale, int len, float dest[]) { |
39 for (int i = 0; i < len; ++i) | 38 for (int i = 0; i < len; ++i) |
40 dest[i] += src[i] * scale; | 39 dest[i] += src[i] * scale; |
41 } | 40 } |
42 | 41 |
43 #if defined(ARCH_CPU_X86_FAMILY) && defined(__SSE__) | |
44 void FMAC_SSE(const float src[], float scale, int len, float dest[]) { | |
45 __m128 m_scale = _mm_set_ps1(scale); | |
46 int rem = len % 4; | |
47 for (int i = 0; i < len - rem; i += 4) { | |
48 _mm_store_ps(dest + i, _mm_add_ps(_mm_load_ps(dest + i), | |
49 _mm_mul_ps(_mm_load_ps(src + i), m_scale))); | |
50 } | |
51 | |
52 // Handle any remaining values that wouldn't fit in an SSE pass. | |
53 if (rem) | |
54 FMAC_C(src + len - rem, scale, rem, dest + len - rem); | |
55 } | |
56 #endif | |
57 | |
58 } // namespace vector_math | 42 } // namespace vector_math |
59 } // namespace media | 43 } // namespace media |
OLD | NEW |