Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(472)

Side by Side Diff: src/opts/SkColorXform_opts.h

Issue 2159993003: Improve naive SkColorXform to half floats (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Fix NO_SIMD bot Created 4 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« src/core/SkHalf.h ('K') | « src/core/SkHalf.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2016 Google Inc. 2 * Copyright 2016 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #ifndef SkColorXform_opts_DEFINED 8 #ifndef SkColorXform_opts_DEFINED
9 #define SkColorXform_opts_DEFINED 9 #define SkColorXform_opts_DEFINED
10 10
(...skipping 108 matching lines...) Expand 10 before | Expand all | Expand 10 after
119 | dstTables[1][indicesGreens[2]] << SK_G32_SHIFT 119 | dstTables[1][indicesGreens[2]] << SK_G32_SHIFT
120 | dstTables[2][indicesBlues [2]] << SK_B32_SHIFT 120 | dstTables[2][indicesBlues [2]] << SK_B32_SHIFT
121 | 0xFF << SK_A32_SHIFT; 121 | 0xFF << SK_A32_SHIFT;
122 dst32[3] = dstTables[0][indicesReds [3]] << SK_R32_SHIFT 122 dst32[3] = dstTables[0][indicesReds [3]] << SK_R32_SHIFT
123 | dstTables[1][indicesGreens[3]] << SK_G32_SHIFT 123 | dstTables[1][indicesGreens[3]] << SK_G32_SHIFT
124 | dstTables[2][indicesBlues [3]] << SK_B32_SHIFT 124 | dstTables[2][indicesBlues [3]] << SK_B32_SHIFT
125 | 0xFF << SK_A32_SHIFT; 125 | 0xFF << SK_A32_SHIFT;
126 126
127 dst = SkTAddOffset<void>(dst, 4 * sizeof(uint32_t)); 127 dst = SkTAddOffset<void>(dst, 4 * sizeof(uint32_t));
128 } else { 128 } else {
129 // FIXME (msarett): 129 Sk4h halfReds = SkFloatToVectorHalf_finite(dstReds);
130 // Can we do better here? Should we store half floats as planar ? 130 Sk4h halfGreens = SkFloatToVectorHalf_finite(dstGreens);
131 // Should we write Intel/Arm specific code? Should we add a tra nspose 131 Sk4h halfBlues = SkFloatToVectorHalf_finite(dstBlues);
132 // function to SkNx? Should we rewrite the algorithm to be inte rleaved? 132 Sk4h halfAlphas = Sk4h(0x3C00);
mtklein 2016/07/19 12:45:46 I feel like this is going to come up from time to
msarett 2016/07/19 15:24:49 SGTM. Done.
133
mtklein 2016/07/19 12:45:46 // Transpose the half floats back to interlaced RG
msarett 2016/07/19 15:24:49 Added to Nx.
134 #if !defined(SKNX_NO_SIMD) && SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
135 __m128i rg = _mm_unpacklo_epi16(halfReds.fVec, halfGreens.fVec) ;
136 __m128i ba = _mm_unpacklo_epi16(halfBlues.fVec, halfAlphas.fVec) ;
137 __m128i rgba0 = _mm_unpacklo_epi32(rg, ba);
mtklein 2016/07/19 12:45:46 Maybe just lo,hi? Seeing rgba0 and rgba1 makes me
msarett 2016/07/19 15:24:49 Done.
138 __m128i rgba1 = _mm_unpackhi_epi32(rg, ba);
139 _mm_storeu_si128(((__m128i*) dst) + 0, rgba0);
140 _mm_storeu_si128(((__m128i*) dst) + 1, rgba1);
141 #elif !defined(SKNX_NO_SIMD) && defined(SK_ARM_HAS_NEON)
142 uint16x4x2_t rg = vzip_u16(halfReds.fVec, halfGreens.fVec);
mtklein 2016/07/19 12:45:46 Huh. I was expecting: vst4_u16((uint16_t*)ds
msarett 2016/07/19 15:24:49 Duh, yup that's way better. Think I was stuck in
143 uint16x4x2_t ba = vzip_u16(halfBlues.fVec, halfAlphas.fVec);
144 uint32x4_t rg32 = vreinterpretq_u32_u16(vcombine_u16(rg.val[0], rg.val[1]));
145 uint32x4_t ba32 = vreinterpretq_u32_u16(vcombine_u16(ba.val[0], ba.val[1]));
146 uint32x4x2_t rgba = vzipq_u32(rg32, ba32);
147 vst1q_u32((uint32_t*) dst, rgba.val[0]);
148 vst1q_u32((uint32_t*) dst, rgba.val[1]);
149 #else
150 Sk4h rgba0 = Sk4h(halfReds[0], halfGreens[0], halfBlues[0], half Alphas[0]);
mtklein 2016/07/19 12:45:46 How bad is it if you just used this strategy, i.e.
msarett 2016/07/19 15:24:49 Added way more performance measures to commit mess
151 Sk4h rgba1 = Sk4h(halfReds[1], halfGreens[1], halfBlues[1], half Alphas[1]);
152 Sk4h rgba2 = Sk4h(halfReds[2], halfGreens[2], halfBlues[2], half Alphas[2]);
153 Sk4h rgba3 = Sk4h(halfReds[3], halfGreens[3], halfBlues[3], half Alphas[3]);
133 uint64_t* dst64 = (uint64_t*) dst; 154 uint64_t* dst64 = (uint64_t*) dst;
134 dst64[0] = SkFloatToHalf_finite(Sk4f(dstReds[0], dstGreens[0], d stBlues[0], 1.0f)); 155 rgba0.store(dst64 + 0);
135 dst64[1] = SkFloatToHalf_finite(Sk4f(dstReds[1], dstGreens[1], d stBlues[1], 1.0f)); 156 rgba1.store(dst64 + 1);
136 dst64[2] = SkFloatToHalf_finite(Sk4f(dstReds[2], dstGreens[2], d stBlues[2], 1.0f)); 157 rgba2.store(dst64 + 2);
137 dst64[3] = SkFloatToHalf_finite(Sk4f(dstReds[3], dstGreens[3], d stBlues[3], 1.0f)); 158 rgba3.store(dst64 + 3);
138 159 #endif
139 dst = SkTAddOffset<void>(dst, 4 * sizeof(uint64_t)); 160 dst = SkTAddOffset<void>(dst, 4 * sizeof(uint64_t));
140 } 161 }
141 }; 162 };
142 163
143 load_next_4(); 164 load_next_4();
144 165
145 while (len >= 4) { 166 while (len >= 4) {
146 transform_4(); 167 transform_4();
147 load_next_4(); 168 load_next_4();
148 store_4(); 169 store_4();
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after
215 } 236 }
216 237
217 static void color_xform_RGB1_to_linear(uint64_t* dst, const uint32_t* src, int l en, 238 static void color_xform_RGB1_to_linear(uint64_t* dst, const uint32_t* src, int l en,
218 const float* const srcTables[3], const fl oat matrix[12]) { 239 const float* const srcTables[3], const fl oat matrix[12]) {
219 color_xform_RGB1<kLinear_DstGamma>(dst, src, len, srcTables, matrix, nullptr ); 240 color_xform_RGB1<kLinear_DstGamma>(dst, src, len, srcTables, matrix, nullptr );
220 } 241 }
221 242
222 } // namespace SK_OPTS_NS 243 } // namespace SK_OPTS_NS
223 244
224 #endif // SkColorXform_opts_DEFINED 245 #endif // SkColorXform_opts_DEFINED
OLDNEW
« src/core/SkHalf.h ('K') | « src/core/SkHalf.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698