Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "media/base/vector_math.h" | 5 #include "media/base/vector_math.h" |
| 6 #include "media/base/vector_math_testing.h" | 6 #include "media/base/vector_math_testing.h" |
| 7 | 7 |
| 8 #include "base/cpu.h" | 8 #include "base/cpu.h" |
| 9 #include "base/logging.h" | 9 #include "base/logging.h" |
| 10 #include "build/build_config.h" | 10 #include "build/build_config.h" |
| 11 | 11 |
| 12 #if defined(ARCH_CPU_X86_FAMILY) && defined(__SSE__) | |
| 13 #include <xmmintrin.h> | |
| 14 #endif | |
| 15 | |
| 16 namespace media { | 12 namespace media { |
| 17 namespace vector_math { | 13 namespace vector_math { |
| 18 | 14 |
| 19 void FMAC(const float src[], float scale, int len, float dest[]) { | 15 void FMAC(const float src[], float scale, int len, float dest[]) { |
| 20 // Ensure |src| and |dest| are 16-byte aligned. | 16 // Ensure |src| and |dest| are 16-byte aligned. |
| 21 DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(src) & (kRequiredAlignment - 1)); | 17 DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(src) & (kRequiredAlignment - 1)); |
| 22 DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(dest) & (kRequiredAlignment - 1)); | 18 DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(dest) & (kRequiredAlignment - 1)); |
| 23 | 19 |
| 24 // Rely on function level static initialization to keep VectorFMACProc | 20 // Rely on function level static initialization to keep VectorFMACProc |
| 25 // selection thread safe. | 21 // selection thread safe. |
| 26 typedef void (*VectorFMACProc)(const float src[], float scale, int len, | 22 typedef void (*VectorFMACProc)(const float src[], float scale, int len, |
| 27 float dest[]); | 23 float dest[]); |
| 28 #if defined(ARCH_CPU_X86_FAMILY) && defined(__SSE__) | 24 #if defined(ARCH_CPU_X86_FAMILY) |
| 29 static const VectorFMACProc kVectorFMACProc = | 25 static const VectorFMACProc kVectorFMACProc = |
|
Mark Mentovai
2013/03/05 21:32:31
#if defined(OS_MACOSX), you can use FMAC_SSE direc
DaleCurtis
2013/03/05 21:51:37
Done w/ __SSE__.
| |
| 30 base::CPU().has_sse() ? FMAC_SSE : FMAC_C; | 26 base::CPU().has_sse() ? FMAC_SSE : FMAC_C; |
| 31 #else | 27 #else |
| 32 static const VectorFMACProc kVectorFMACProc = FMAC_C; | 28 static const VectorFMACProc kVectorFMACProc = FMAC_C; |
| 33 #endif | 29 #endif |
| 34 | 30 |
| 35 return kVectorFMACProc(src, scale, len, dest); | 31 return kVectorFMACProc(src, scale, len, dest); |
| 36 } | 32 } |
| 37 | 33 |
| 38 void FMAC_C(const float src[], float scale, int len, float dest[]) { | 34 void FMAC_C(const float src[], float scale, int len, float dest[]) { |
| 39 for (int i = 0; i < len; ++i) | 35 for (int i = 0; i < len; ++i) |
| 40 dest[i] += src[i] * scale; | 36 dest[i] += src[i] * scale; |
| 41 } | 37 } |
| 42 | 38 |
| 43 #if defined(ARCH_CPU_X86_FAMILY) && defined(__SSE__) | |
| 44 void FMAC_SSE(const float src[], float scale, int len, float dest[]) { | |
| 45 __m128 m_scale = _mm_set_ps1(scale); | |
| 46 int rem = len % 4; | |
| 47 for (int i = 0; i < len - rem; i += 4) { | |
| 48 _mm_store_ps(dest + i, _mm_add_ps(_mm_load_ps(dest + i), | |
| 49 _mm_mul_ps(_mm_load_ps(src + i), m_scale))); | |
| 50 } | |
| 51 | |
| 52 // Handle any remaining values that wouldn't fit in an SSE pass. | |
| 53 if (rem) | |
| 54 FMAC_C(src + len - rem, scale, rem, dest + len - rem); | |
| 55 } | |
| 56 #endif | |
| 57 | |
| 58 } // namespace vector_math | 39 } // namespace vector_math |
| 59 } // namespace media | 40 } // namespace media |
| OLD | NEW |