Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(71)

Unified Diff: src/opts/SkColorXform_opts.h

Issue 2159993003: Improve naive SkColorXform to half floats (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Created 4 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« src/core/SkHalf.h ('K') | « src/core/SkHalf.h ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/opts/SkColorXform_opts.h
diff --git a/src/opts/SkColorXform_opts.h b/src/opts/SkColorXform_opts.h
index e7a2b4594682176740f15afa6a123760109f9d25..251c3a8e8cb28e015ad1348b4fb004e7d6d6cd7f 100644
--- a/src/opts/SkColorXform_opts.h
+++ b/src/opts/SkColorXform_opts.h
@@ -126,16 +126,37 @@ static void color_xform_RGB1(void* dst, const uint32_t* src, int len,
dst = SkTAddOffset<void>(dst, 4 * sizeof(uint32_t));
} else {
- // FIXME (msarett):
- // Can we do better here? Should we store half floats as planar?
- // Should we write Intel/Arm specific code? Should we add a transpose
- // function to SkNx? Should we rewrite the algorithm to be interleaved?
+ Sk4h halfReds = SkFloatToVectorHalf_finite(dstReds);
+ Sk4h halfGreens = SkFloatToVectorHalf_finite(dstGreens);
+ Sk4h halfBlues = SkFloatToVectorHalf_finite(dstBlues);
+ Sk4h halfAlphas = Sk4h(0x3C00);
+
+#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
msarett 2016/07/18 22:09:09 Would be interested in adding this to SkNx?
mtklein 2016/07/19 12:45:45 Sure! We've got a couple little one-off functions
msarett 2016/07/19 15:24:49 Done.
+ __m128i rg = _mm_unpacklo_epi16(halfReds.fVec, halfGreens.fVec);
+ __m128i ba = _mm_unpacklo_epi16(halfBlues.fVec, halfAlphas.fVec);
+ __m128i rgba0 = _mm_unpacklo_epi32(rg, ba);
+ __m128i rgba1 = _mm_unpackhi_epi32(rg, ba);
+ _mm_storeu_si128(((__m128i*) dst) + 0, rgba0);
+ _mm_storeu_si128(((__m128i*) dst) + 1, rgba1);
+#elif defined(SK_ARM_HAS_NEON)
+ uint16x4x2_t rg = vzip_u16(halfReds.fVec, halfGreens.fVec);
+ uint16x4x2_t ba = vzip_u16(halfBlues.fVec, halfAlphas.fVec);
+ uint32x4_t rg32 = vreinterpretq_u32_u16(vcombine_u16(rg.val[0], rg.val[1]));
+ uint32x4_t ba32 = vreinterpretq_u32_u16(vcombine_u16(ba.val[0], ba.val[1]));
+ uint32x4x2_t rgba = vzipq_u32(rg32, ba32);
+ vst1q_u32((uint32_t*) dst, rgba.val[0]);
+ vst1q_u32((uint32_t*) dst, rgba.val[1]);
+#else
+ Sk4h rgba0 = Sk4h(halfReds[0], halfGreens[0], halfBlues[0], halfAlphas[0]);
+ Sk4h rgba1 = Sk4h(halfReds[1], halfGreens[1], halfBlues[1], halfAlphas[1]);
+ Sk4h rgba2 = Sk4h(halfReds[2], halfGreens[2], halfBlues[2], halfAlphas[2]);
+ Sk4h rgba3 = Sk4h(halfReds[3], halfGreens[3], halfBlues[3], halfAlphas[3]);
uint64_t* dst64 = (uint64_t*) dst;
- dst64[0] = SkFloatToHalf_finite(Sk4f(dstReds[0], dstGreens[0], dstBlues[0], 1.0f));
- dst64[1] = SkFloatToHalf_finite(Sk4f(dstReds[1], dstGreens[1], dstBlues[1], 1.0f));
- dst64[2] = SkFloatToHalf_finite(Sk4f(dstReds[2], dstGreens[2], dstBlues[2], 1.0f));
- dst64[3] = SkFloatToHalf_finite(Sk4f(dstReds[3], dstGreens[3], dstBlues[3], 1.0f));
-
+ rgba0.store(dst64 + 0);
+ rgba1.store(dst64 + 1);
+ rgba2.store(dst64 + 2);
+ rgba3.store(dst64 + 3);
+#endif
dst = SkTAddOffset<void>(dst, 4 * sizeof(uint64_t));
}
};
« src/core/SkHalf.h ('K') | « src/core/SkHalf.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698