Index: src/opts/SkNx_sse.h |
diff --git a/src/opts/SkNx_sse.h b/src/opts/SkNx_sse.h |
index 9751c4db9141246681d8905bbaf2f5220fb293c2..65d9873c5c407c780903e3e6ae4fb1a3ccc89d1e 100644 |
--- a/src/opts/SkNx_sse.h |
+++ b/src/opts/SkNx_sse.h |
@@ -8,7 +8,6 @@ |
#ifndef SkNx_sse_DEFINED |
#define SkNx_sse_DEFINED |
-#include "SkCpu.h" |
#include <immintrin.h> |
// This file may assume <= SSE2, but must check SK_CPU_SSE_LEVEL for anything more recent. |
@@ -91,15 +90,9 @@ public: |
SkNx abs() const { return _mm_andnot_ps(_mm_set1_ps(-0.0f), fVec); } |
SkNx floor() const { |
- if (SkCpu::Supports(SkCpu::SSE41)) { |
- __m128 r; |
- #if defined(__GNUC__) || defined(__clang__) |
- asm("roundps $0x1, %[fVec], %[r]" : [r]"=x"(r) : [fVec]"x"(fVec)); |
- #else |
- r = _mm_floor_ps(fVec); |
- #endif |
- return r; |
- } |
+ #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41 |
+ return _mm_floor_ps(fVec); |
+ #else |
// Emulate _mm_floor_ps() with SSE2: |
// - roundtrip through integers via truncation |
// - subtract 1 if that's too big (possible for negative values). |
@@ -108,6 +101,7 @@ public: |
__m128 roundtrip = _mm_cvtepi32_ps(_mm_cvttps_epi32(fVec)); |
__m128 too_big = _mm_cmpgt_ps(roundtrip, fVec); |
return _mm_sub_ps(roundtrip, _mm_and_ps(too_big, _mm_set1_ps(1.0f))); |
+ #endif |
} |
SkNx sqrt() const { return _mm_sqrt_ps (fVec); } |
@@ -124,12 +118,12 @@ public: |
bool anyTrue() const { return 0x0000 != _mm_movemask_epi8(_mm_castps_si128(fVec)); } |
SkNx thenElse(const SkNx& t, const SkNx& e) const { |
-#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41 |
+ #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41 |
return _mm_blendv_ps(e.fVec, t.fVec, fVec); |
-#else |
+ #else |
return _mm_or_ps(_mm_and_ps (fVec, t.fVec), |
_mm_andnot_ps(fVec, e.fVec)); |
-#endif |
+ #endif |
} |
__m128 fVec; |