Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 /* | 1 /* |
| 2 * Copyright 2016 Google Inc. | 2 * Copyright 2016 Google Inc. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #ifndef SkColorXform_opts_DEFINED | 8 #ifndef SkColorXform_opts_DEFINED |
| 9 #define SkColorXform_opts_DEFINED | 9 #define SkColorXform_opts_DEFINED |
| 10 | 10 |
| (...skipping 108 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 119 | dstTables[1][indicesGreens[2]] << SK_G32_SHIFT | 119 | dstTables[1][indicesGreens[2]] << SK_G32_SHIFT |
| 120 | dstTables[2][indicesBlues [2]] << SK_B32_SHIFT | 120 | dstTables[2][indicesBlues [2]] << SK_B32_SHIFT |
| 121 | 0xFF << SK_A32_SHIFT; | 121 | 0xFF << SK_A32_SHIFT; |
| 122 dst32[3] = dstTables[0][indicesReds [3]] << SK_R32_SHIFT | 122 dst32[3] = dstTables[0][indicesReds [3]] << SK_R32_SHIFT |
| 123 | dstTables[1][indicesGreens[3]] << SK_G32_SHIFT | 123 | dstTables[1][indicesGreens[3]] << SK_G32_SHIFT |
| 124 | dstTables[2][indicesBlues [3]] << SK_B32_SHIFT | 124 | dstTables[2][indicesBlues [3]] << SK_B32_SHIFT |
| 125 | 0xFF << SK_A32_SHIFT; | 125 | 0xFF << SK_A32_SHIFT; |
| 126 | 126 |
| 127 dst = SkTAddOffset<void>(dst, 4 * sizeof(uint32_t)); | 127 dst = SkTAddOffset<void>(dst, 4 * sizeof(uint32_t)); |
| 128 } else { | 128 } else { |
| 129 // FIXME (msarett): | 129 Sk4h halfReds = SkFloatToVectorHalf_finite(dstReds); |
| 130 // Can we do better here? Should we store half floats as planar ? | 130 Sk4h halfGreens = SkFloatToVectorHalf_finite(dstGreens); |
| 131 // Should we write Intel/Arm specific code? Should we add a tra nspose | 131 Sk4h halfBlues = SkFloatToVectorHalf_finite(dstBlues); |
| 132 // function to SkNx? Should we rewrite the algorithm to be inte rleaved? | 132 Sk4h halfAlphas = Sk4h(0x3C00); |
| 133 | |
| 134 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 | |
|
msarett
2016/07/18 22:09:09
Would be interested in adding this to SkNx?
mtklein
2016/07/19 12:45:45
Sure! We've got a couple little one-off functions
msarett
2016/07/19 15:24:49
Done.
| |
| 135 __m128i rg = _mm_unpacklo_epi16(halfReds.fVec, halfGreens.fVec) ; | |
| 136 __m128i ba = _mm_unpacklo_epi16(halfBlues.fVec, halfAlphas.fVec) ; | |
| 137 __m128i rgba0 = _mm_unpacklo_epi32(rg, ba); | |
| 138 __m128i rgba1 = _mm_unpackhi_epi32(rg, ba); | |
| 139 _mm_storeu_si128(((__m128i*) dst) + 0, rgba0); | |
| 140 _mm_storeu_si128(((__m128i*) dst) + 1, rgba1); | |
| 141 #elif defined(SK_ARM_HAS_NEON) | |
| 142 uint16x4x2_t rg = vzip_u16(halfReds.fVec, halfGreens.fVec); | |
| 143 uint16x4x2_t ba = vzip_u16(halfBlues.fVec, halfAlphas.fVec); | |
| 144 uint32x4_t rg32 = vreinterpretq_u32_u16(vcombine_u16(rg.val[0], rg.val[1])); | |
| 145 uint32x4_t ba32 = vreinterpretq_u32_u16(vcombine_u16(ba.val[0], ba.val[1])); | |
| 146 uint32x4x2_t rgba = vzipq_u32(rg32, ba32); | |
| 147 vst1q_u32((uint32_t*) dst, rgba.val[0]); | |
| 148 vst1q_u32((uint32_t*) dst, rgba.val[1]); | |
| 149 #else | |
| 150 Sk4h rgba0 = Sk4h(halfReds[0], halfGreens[0], halfBlues[0], half Alphas[0]); | |
| 151 Sk4h rgba1 = Sk4h(halfReds[1], halfGreens[1], halfBlues[1], half Alphas[1]); | |
| 152 Sk4h rgba2 = Sk4h(halfReds[2], halfGreens[2], halfBlues[2], half Alphas[2]); | |
| 153 Sk4h rgba3 = Sk4h(halfReds[3], halfGreens[3], halfBlues[3], half Alphas[3]); | |
| 133 uint64_t* dst64 = (uint64_t*) dst; | 154 uint64_t* dst64 = (uint64_t*) dst; |
| 134 dst64[0] = SkFloatToHalf_finite(Sk4f(dstReds[0], dstGreens[0], d stBlues[0], 1.0f)); | 155 rgba0.store(dst64 + 0); |
| 135 dst64[1] = SkFloatToHalf_finite(Sk4f(dstReds[1], dstGreens[1], d stBlues[1], 1.0f)); | 156 rgba1.store(dst64 + 1); |
| 136 dst64[2] = SkFloatToHalf_finite(Sk4f(dstReds[2], dstGreens[2], d stBlues[2], 1.0f)); | 157 rgba2.store(dst64 + 2); |
| 137 dst64[3] = SkFloatToHalf_finite(Sk4f(dstReds[3], dstGreens[3], d stBlues[3], 1.0f)); | 158 rgba3.store(dst64 + 3); |
| 138 | 159 #endif |
| 139 dst = SkTAddOffset<void>(dst, 4 * sizeof(uint64_t)); | 160 dst = SkTAddOffset<void>(dst, 4 * sizeof(uint64_t)); |
| 140 } | 161 } |
| 141 }; | 162 }; |
| 142 | 163 |
| 143 load_next_4(); | 164 load_next_4(); |
| 144 | 165 |
| 145 while (len >= 4) { | 166 while (len >= 4) { |
| 146 transform_4(); | 167 transform_4(); |
| 147 load_next_4(); | 168 load_next_4(); |
| 148 store_4(); | 169 store_4(); |
| (...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 215 } | 236 } |
| 216 | 237 |
| 217 static void color_xform_RGB1_to_linear(uint64_t* dst, const uint32_t* src, int l en, | 238 static void color_xform_RGB1_to_linear(uint64_t* dst, const uint32_t* src, int l en, |
| 218 const float* const srcTables[3], const fl oat matrix[12]) { | 239 const float* const srcTables[3], const fl oat matrix[12]) { |
| 219 color_xform_RGB1<kLinear_DstGamma>(dst, src, len, srcTables, matrix, nullptr ); | 240 color_xform_RGB1<kLinear_DstGamma>(dst, src, len, srcTables, matrix, nullptr ); |
| 220 } | 241 } |
| 221 | 242 |
| 222 } // namespace SK_OPTS_NS | 243 } // namespace SK_OPTS_NS |
| 223 | 244 |
| 224 #endif // SkColorXform_opts_DEFINED | 245 #endif // SkColorXform_opts_DEFINED |
| OLD | NEW |