OLD | NEW |
---|---|
1 /* | 1 /* |
2 * Copyright 2016 Google Inc. | 2 * Copyright 2016 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #ifndef SkColorXform_opts_DEFINED | 8 #ifndef SkColorXform_opts_DEFINED |
9 #define SkColorXform_opts_DEFINED | 9 #define SkColorXform_opts_DEFINED |
10 | 10 |
(...skipping 108 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
119 | dstTables[1][indicesGreens[2]] << SK_G32_SHIFT | 119 | dstTables[1][indicesGreens[2]] << SK_G32_SHIFT |
120 | dstTables[2][indicesBlues [2]] << SK_B32_SHIFT | 120 | dstTables[2][indicesBlues [2]] << SK_B32_SHIFT |
121 | 0xFF << SK_A32_SHIFT; | 121 | 0xFF << SK_A32_SHIFT; |
122 dst32[3] = dstTables[0][indicesReds [3]] << SK_R32_SHIFT | 122 dst32[3] = dstTables[0][indicesReds [3]] << SK_R32_SHIFT |
123 | dstTables[1][indicesGreens[3]] << SK_G32_SHIFT | 123 | dstTables[1][indicesGreens[3]] << SK_G32_SHIFT |
124 | dstTables[2][indicesBlues [3]] << SK_B32_SHIFT | 124 | dstTables[2][indicesBlues [3]] << SK_B32_SHIFT |
125 | 0xFF << SK_A32_SHIFT; | 125 | 0xFF << SK_A32_SHIFT; |
126 | 126 |
127 dst = SkTAddOffset<void>(dst, 4 * sizeof(uint32_t)); | 127 dst = SkTAddOffset<void>(dst, 4 * sizeof(uint32_t)); |
128 } else { | 128 } else { |
129 // FIXME (msarett): | 129 Sk4h halfReds = SkFloatToVectorHalf_finite(dstReds); |
130 // Can we do better here? Should we store half floats as planar ? | 130 Sk4h halfGreens = SkFloatToVectorHalf_finite(dstGreens); |
131 // Should we write Intel/Arm specific code? Should we add a tra nspose | 131 Sk4h halfBlues = SkFloatToVectorHalf_finite(dstBlues); |
132 // function to SkNx? Should we rewrite the algorithm to be inte rleaved? | 132 Sk4h halfAlphas = Sk4h(0x3C00); |
133 | |
134 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 | |
msarett
2016/07/18 22:09:09
Would be interested in adding this to SkNx?
mtklein
2016/07/19 12:45:45
Sure! We've got a couple little one-off functions
msarett
2016/07/19 15:24:49
Done.
| |
135 __m128i rg = _mm_unpacklo_epi16(halfReds.fVec, halfGreens.fVec) ; | |
136 __m128i ba = _mm_unpacklo_epi16(halfBlues.fVec, halfAlphas.fVec) ; | |
137 __m128i rgba0 = _mm_unpacklo_epi32(rg, ba); | |
138 __m128i rgba1 = _mm_unpackhi_epi32(rg, ba); | |
139 _mm_storeu_si128(((__m128i*) dst) + 0, rgba0); | |
140 _mm_storeu_si128(((__m128i*) dst) + 1, rgba1); | |
141 #elif defined(SK_ARM_HAS_NEON) | |
142 uint16x4x2_t rg = vzip_u16(halfReds.fVec, halfGreens.fVec); | |
143 uint16x4x2_t ba = vzip_u16(halfBlues.fVec, halfAlphas.fVec); | |
144 uint32x4_t rg32 = vreinterpretq_u32_u16(vcombine_u16(rg.val[0], rg.val[1])); | |
145 uint32x4_t ba32 = vreinterpretq_u32_u16(vcombine_u16(ba.val[0], ba.val[1])); | |
146 uint32x4x2_t rgba = vzipq_u32(rg32, ba32); | |
147 vst1q_u32((uint32_t*) dst, rgba.val[0]); | |
148 vst1q_u32((uint32_t*) dst, rgba.val[1]); | |
149 #else | |
150 Sk4h rgba0 = Sk4h(halfReds[0], halfGreens[0], halfBlues[0], half Alphas[0]); | |
151 Sk4h rgba1 = Sk4h(halfReds[1], halfGreens[1], halfBlues[1], half Alphas[1]); | |
152 Sk4h rgba2 = Sk4h(halfReds[2], halfGreens[2], halfBlues[2], half Alphas[2]); | |
153 Sk4h rgba3 = Sk4h(halfReds[3], halfGreens[3], halfBlues[3], half Alphas[3]); | |
133 uint64_t* dst64 = (uint64_t*) dst; | 154 uint64_t* dst64 = (uint64_t*) dst; |
134 dst64[0] = SkFloatToHalf_finite(Sk4f(dstReds[0], dstGreens[0], d stBlues[0], 1.0f)); | 155 rgba0.store(dst64 + 0); |
135 dst64[1] = SkFloatToHalf_finite(Sk4f(dstReds[1], dstGreens[1], d stBlues[1], 1.0f)); | 156 rgba1.store(dst64 + 1); |
136 dst64[2] = SkFloatToHalf_finite(Sk4f(dstReds[2], dstGreens[2], d stBlues[2], 1.0f)); | 157 rgba2.store(dst64 + 2); |
137 dst64[3] = SkFloatToHalf_finite(Sk4f(dstReds[3], dstGreens[3], d stBlues[3], 1.0f)); | 158 rgba3.store(dst64 + 3); |
138 | 159 #endif |
139 dst = SkTAddOffset<void>(dst, 4 * sizeof(uint64_t)); | 160 dst = SkTAddOffset<void>(dst, 4 * sizeof(uint64_t)); |
140 } | 161 } |
141 }; | 162 }; |
142 | 163 |
143 load_next_4(); | 164 load_next_4(); |
144 | 165 |
145 while (len >= 4) { | 166 while (len >= 4) { |
146 transform_4(); | 167 transform_4(); |
147 load_next_4(); | 168 load_next_4(); |
148 store_4(); | 169 store_4(); |
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
215 } | 236 } |
216 | 237 |
217 static void color_xform_RGB1_to_linear(uint64_t* dst, const uint32_t* src, int l en, | 238 static void color_xform_RGB1_to_linear(uint64_t* dst, const uint32_t* src, int l en, |
218 const float* const srcTables[3], const fl oat matrix[12]) { | 239 const float* const srcTables[3], const fl oat matrix[12]) { |
219 color_xform_RGB1<kLinear_DstGamma>(dst, src, len, srcTables, matrix, nullptr ); | 240 color_xform_RGB1<kLinear_DstGamma>(dst, src, len, srcTables, matrix, nullptr ); |
220 } | 241 } |
221 | 242 |
222 } // namespace SK_OPTS_NS | 243 } // namespace SK_OPTS_NS |
223 | 244 |
224 #endif // SkColorXform_opts_DEFINED | 245 #endif // SkColorXform_opts_DEFINED |
OLD | NEW |