| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2016 Google Inc. | 2 * Copyright 2016 Google Inc. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 /* | 8 /* |
| 9 ninja -C out/Release dm nanobench ; and ./out/Release/dm --match Blend_opts ; an
d ./out/Release/nanobench --samples 300 --nompd --match LinearSrcOver -q | 9 ninja -C out/Release dm nanobench ; and ./out/Release/dm --match Blend_opts ; an
d ./out/Release/nanobench --samples 300 --nompd --match LinearSrcOver -q |
| 10 */ | 10 */ |
| 11 | 11 |
| 12 #ifndef SkBlend_opts_DEFINED | 12 #ifndef SkBlend_opts_DEFINED |
| 13 #define SkBlend_opts_DEFINED | 13 #define SkBlend_opts_DEFINED |
| 14 | 14 |
| 15 #include "SkNx.h" | 15 #include "SkNx.h" |
| 16 #include "SkPM4fPriv.h" | 16 #include "SkPM4fPriv.h" |
| 17 | 17 |
| 18 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 | 18 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 |
| 19 #include <immintrin.h> | 19 #include <immintrin.h> |
| 20 #endif | 20 #endif |
| 21 | 21 |
| 22 namespace SK_OPTS_NS { | 22 namespace SK_OPTS_NS { |
| 23 | 23 |
| 24 static inline void srcover_srgb8888_srgb_1(uint32_t* dst, const uint32_t pixel)
{ | 24 static inline void srcover_srgb_srgb_1(uint32_t* dst, uint32_t src) { |
| 25 if ((~pixel & 0xFF000000) == 0) { | 25 if (src >= 0xFF000000) { |
| 26 *dst = pixel; | 26 *dst = src; |
| 27 } else if ((pixel & 0xFF000000) != 0) { | 27 return; |
| 28 srcover_blend_srgb8888_srgb_1(dst, srgb_to_linear(to_4f(pixel))); | |
| 29 } | 28 } |
| 29 auto d = Sk4f_fromS32(*dst), |
| 30 s = Sk4f_fromS32( src); |
| 31 *dst = Sk4f_toS32(s + d * (1.0f - s[3])); |
| 30 } | 32 } |
| 31 | 33 |
| 32 static inline void srcover_srgb_srgb_4(uint32_t* dst, const uint32_t* src) { | 34 static inline void srcover_srgb_srgb_4(uint32_t* dst, const uint32_t* src) { |
| 33 srcover_srgb8888_srgb_1(dst++, *src++); | 35 srcover_srgb_srgb_1(dst++, *src++); |
| 34 srcover_srgb8888_srgb_1(dst++, *src++); | 36 srcover_srgb_srgb_1(dst++, *src++); |
| 35 srcover_srgb8888_srgb_1(dst++, *src++); | 37 srcover_srgb_srgb_1(dst++, *src++); |
| 36 srcover_srgb8888_srgb_1(dst, *src); | 38 srcover_srgb_srgb_1(dst , *src ); |
| 37 } | 39 } |
| 38 | 40 |
| 39 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 | 41 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 |
| 40 | 42 |
| 41 static inline __m128i load(const uint32_t* p) { | 43 static inline __m128i load(const uint32_t* p) { |
| 42 return _mm_loadu_si128(reinterpret_cast<const __m128i*>(p)); | 44 return _mm_loadu_si128(reinterpret_cast<const __m128i*>(p)); |
| 43 } | 45 } |
| 44 | 46 |
| 45 static inline void store(uint32_t* p, __m128i v) { | 47 static inline void store(uint32_t* p, __m128i v) { |
| 46 _mm_storeu_si128(reinterpret_cast<__m128i*>(p), v); | 48 _mm_storeu_si128(reinterpret_cast<__m128i*>(p), v); |
| (...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 80 srcover_srgb_srgb_4(dst, dst + delta); | 82 srcover_srgb_srgb_4(dst, dst + delta); |
| 81 dst += 4; | 83 dst += 4; |
| 82 } while (dst < end | 84 } while (dst < end |
| 83 && _mm_testnzc_si128(pixels = load(dst + delta)
, alphaMask)); | 85 && _mm_testnzc_si128(pixels = load(dst + delta)
, alphaMask)); |
| 84 src += dst - start; | 86 src += dst - start; |
| 85 } | 87 } |
| 86 } | 88 } |
| 87 | 89 |
| 88 count = count & 3; | 90 count = count & 3; |
| 89 while (count-- > 0) { | 91 while (count-- > 0) { |
| 90 srcover_srgb8888_srgb_1(dst++, *src++); | 92 srcover_srgb_srgb_1(dst++, *src++); |
| 91 } | 93 } |
| 92 } | 94 } |
| 93 } | 95 } |
| 94 #else | 96 #else |
| 95 // SSE2 versions | 97 // SSE2 versions |
| 96 | 98 |
| 97 // Note: In the next three comparisons a group of 4 pixels is converted
to a group of | 99 // Note: In the next three comparisons a group of 4 pixels is converted
to a group of |
| 98 // "signed" pixels because the sse2 does not have an unsigned comparison
. | 100 // "signed" pixels because the sse2 does not have an unsigned comparison
. |
| 99 // Make it so that we can use the signed comparison operators by biasing | 101 // Make it so that we can use the signed comparison operators by biasing |
| 100 // 0x00xxxxxx to 0x80xxxxxxx which is the smallest values and biasing 0x
ffxxxxxx to | 102 // 0x00xxxxxx to 0x80xxxxxxx which is the smallest values and biasing 0x
ffxxxxxx to |
| (...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 152 do { | 154 do { |
| 153 srcover_srgb_srgb_4(dst, dst + delta); | 155 srcover_srgb_srgb_4(dst, dst + delta); |
| 154 dst += 4; | 156 dst += 4; |
| 155 } while (dst < end && check_partial_alphas(pixels = load
(dst + delta))); | 157 } while (dst < end && check_partial_alphas(pixels = load
(dst + delta))); |
| 156 src += dst - start; | 158 src += dst - start; |
| 157 } | 159 } |
| 158 } while (dst < end); | 160 } while (dst < end); |
| 159 | 161 |
| 160 count = count & 3; | 162 count = count & 3; |
| 161 while (count-- > 0) { | 163 while (count-- > 0) { |
| 162 srcover_srgb8888_srgb_1(dst++, *src++); | 164 srcover_srgb_srgb_1(dst++, *src++); |
| 163 } | 165 } |
| 164 } | 166 } |
| 165 } | 167 } |
| 166 #endif | 168 #endif |
| 167 #else | 169 #else |
| 168 | 170 |
| 169 static void srcover_srgb_srgb( | 171 static void srcover_srgb_srgb( |
| 170 uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) { | 172 uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) { |
| 171 while (ndst > 0) { | 173 while (ndst > 0) { |
| 172 int n = SkTMin(ndst, nsrc); | 174 int n = SkTMin(ndst, nsrc); |
| 173 | 175 |
| 174 for (int i = 0; i < n; i++) { | 176 for (int i = 0; i < n; i++) { |
| 175 srcover_srgb8888_srgb_1(dst++, src[i]); | 177 srcover_srgb_srgb_1(dst++, src[i]); |
| 176 } | 178 } |
| 177 ndst -= n; | 179 ndst -= n; |
| 178 } | 180 } |
| 179 } | 181 } |
| 180 | 182 |
| 181 #endif | 183 #endif |
| 182 | 184 |
| 183 } // namespace SK_OPTS_NS | 185 } // namespace SK_OPTS_NS |
| 184 | 186 |
| 185 #endif//SkBlend_opts_DEFINED | 187 #endif//SkBlend_opts_DEFINED |
| OLD | NEW |