| OLD | NEW |
| (Empty) |
| 1 /* | |
| 2 * Copyright 2014 The Android Open Source Project | |
| 3 * | |
| 4 * Use of this source code is governed by a BSD-style license that can be | |
| 5 * found in the LICENSE file. | |
| 6 */ | |
| 7 | |
| 8 #ifndef SkMath_opts_SSE2_DEFINED | |
| 9 #define SkMath_opts_SSE2_DEFINED | |
| 10 | |
| 11 #include <emmintrin.h> | |
| 12 | |
| 13 // Because no _mm_div_epi32() in SSE2, we use float division to emulate. | |
| 14 // When using this function, make sure a and b don't exceed float's precision. | |
| 15 static inline __m128i shim_mm_div_epi32(const __m128i& a, const __m128i& b) { | |
| 16 __m128 x = _mm_cvtepi32_ps(a); | |
| 17 __m128 y = _mm_cvtepi32_ps(b); | |
| 18 return _mm_cvttps_epi32(_mm_div_ps(x, y)); | |
| 19 } | |
| 20 | |
| 21 // Portable version of SkSqrtBits is in SkMath.cpp. | |
| 22 static inline __m128i SkSqrtBits_SSE2(const __m128i& x, int count) { | |
| 23 __m128i root = _mm_setzero_si128(); | |
| 24 __m128i remHi = _mm_setzero_si128(); | |
| 25 __m128i remLo = x; | |
| 26 __m128i one128 = _mm_set1_epi32(1); | |
| 27 | |
| 28 do { | |
| 29 root = _mm_slli_epi32(root, 1); | |
| 30 | |
| 31 remHi = _mm_or_si128(_mm_slli_epi32(remHi, 2), | |
| 32 _mm_srli_epi32(remLo, 30)); | |
| 33 remLo = _mm_slli_epi32(remLo, 2); | |
| 34 | |
| 35 __m128i testDiv = _mm_slli_epi32(root, 1); | |
| 36 testDiv = _mm_add_epi32(testDiv, _mm_set1_epi32(1)); | |
| 37 | |
| 38 __m128i cmp = _mm_cmplt_epi32(remHi, testDiv); | |
| 39 __m128i remHi1 = _mm_and_si128(cmp, remHi); | |
| 40 __m128i root1 = _mm_and_si128(cmp, root); | |
| 41 __m128i remHi2 = _mm_andnot_si128(cmp, _mm_sub_epi32(remHi, testDiv)); | |
| 42 __m128i root2 = _mm_andnot_si128(cmp, _mm_add_epi32(root, one128)); | |
| 43 | |
| 44 remHi = _mm_or_si128(remHi1, remHi2); | |
| 45 root = _mm_or_si128(root1, root2); | |
| 46 } while (--count >= 0); | |
| 47 | |
| 48 return root; | |
| 49 } | |
| 50 | |
| 51 #endif // SkMath_opts_SSE2_DEFINED | |
| OLD | NEW |