OLD | NEW |
---|---|
1 /* | 1 /* |
2 * Copyright 2016 Google Inc. | 2 * Copyright 2016 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #ifndef SkColorXform_opts_DEFINED | 8 #ifndef SkColorXform_opts_DEFINED |
9 #define SkColorXform_opts_DEFINED | 9 #define SkColorXform_opts_DEFINED |
10 | 10 |
11 #include "SkNx.h" | 11 #include "SkNx.h" |
12 #include "SkColorPriv.h" | 12 #include "SkColorPriv.h" |
13 #include "SkHalf.h" | |
13 #include "SkSRGB.h" | 14 #include "SkSRGB.h" |
15 #include "SkTemplates.h" | |
14 | 16 |
15 namespace SK_OPTS_NS { | 17 namespace SK_OPTS_NS { |
16 | 18 |
17 static Sk4f linear_to_2dot2(const Sk4f& x) { | 19 static Sk4f linear_to_2dot2(const Sk4f& x) { |
18 // x^(29/64) is a very good approximation of the true value, x^(1/2.2). | 20 // x^(29/64) is a very good approximation of the true value, x^(1/2.2). |
19 auto x2 = x.rsqrt(), // x^(-1/2) | 21 auto x2 = x.rsqrt(), // x^(-1/2) |
20 x32 = x2.rsqrt().rsqrt().rsqrt().rsqrt(), // x^(-1/32) | 22 x32 = x2.rsqrt().rsqrt().rsqrt().rsqrt(), // x^(-1/32) |
21 x64 = x32.rsqrt(); // x^(+1/64) | 23 x64 = x32.rsqrt(); // x^(+1/64) |
22 | 24 |
23 // 29 = 32 - 2 - 1 | 25 // 29 = 32 - 2 - 1 |
24 return 255.0f * x2.invert() * x32 * x64.invert(); | 26 return 255.0f * x2.invert() * x32 * x64.invert(); |
25 } | 27 } |
26 | 28 |
27 static Sk4f clamp_0_to_255(const Sk4f& x) { | 29 static Sk4f clamp_0_to_255(const Sk4f& x) { |
28 // The order of the arguments is important here. We want to make sure that NaN | 30 // The order of the arguments is important here. We want to make sure that NaN |
29 // clamps to zero. Note that max(NaN, 0) = 0, while max(0, NaN) = NaN. | 31 // clamps to zero. Note that max(NaN, 0) = 0, while max(0, NaN) = NaN. |
30 return Sk4f::Min(Sk4f::Max(x, 0.0f), 255.0f); | 32 return Sk4f::Min(Sk4f::Max(x, 0.0f), 255.0f); |
31 } | 33 } |
32 | 34 |
33 enum DstGamma { | 35 enum DstGamma { |
36 // 8888 | |
34 kSRGB_DstGamma, | 37 kSRGB_DstGamma, |
35 k2Dot2_DstGamma, | 38 k2Dot2_DstGamma, |
36 kTable_DstGamma, | 39 kTable_DstGamma, |
40 | |
41 // F16 | |
42 kLinear_DstGamma, | |
37 }; | 43 }; |
38 | 44 |
39 template <DstGamma kDstGamma> | 45 template <DstGamma kDstGamma> |
40 static void color_xform_RGB1(uint32_t* dst, const uint32_t* src, int len, | 46 static void color_xform_RGB1(void* dst, const uint32_t* src, int len, |
41 const float* const srcTables[3], const float matrix [16], | 47 const float* const srcTables[3], const float matrix [16], |
42 const uint8_t* const dstTables[3]) { | 48 const uint8_t* const dstTables[3]) { |
43 Sk4f rXgXbX = Sk4f::Load(matrix + 0), | 49 Sk4f rXgXbX = Sk4f::Load(matrix + 0), |
44 rYgYbY = Sk4f::Load(matrix + 4), | 50 rYgYbY = Sk4f::Load(matrix + 4), |
45 rZgZbZ = Sk4f::Load(matrix + 8); | 51 rZgZbZ = Sk4f::Load(matrix + 8); |
46 | 52 |
47 if (len >= 4) { | 53 if (len >= 4) { |
48 Sk4f reds, greens, blues; | 54 Sk4f reds, greens, blues; |
49 auto load_next_4 = [&reds, &greens, &blues, &src, &len, &srcTables] { | 55 auto load_next_4 = [&reds, &greens, &blues, &src, &len, &srcTables] { |
50 reds = Sk4f{srcTables[0][(src[0] >> 0) & 0xFF], | 56 reds = Sk4f{srcTables[0][(src[0] >> 0) & 0xFF], |
(...skipping 26 matching lines...) Expand all Loading... | |
77 (kSRGB_DstGamma == kDstGamma) ? sk_linear_to_srgb : line ar_to_2dot2; | 83 (kSRGB_DstGamma == kDstGamma) ? sk_linear_to_srgb : line ar_to_2dot2; |
78 | 84 |
79 dstReds = linear_to_curve(dstReds); | 85 dstReds = linear_to_curve(dstReds); |
80 dstGreens = linear_to_curve(dstGreens); | 86 dstGreens = linear_to_curve(dstGreens); |
81 dstBlues = linear_to_curve(dstBlues); | 87 dstBlues = linear_to_curve(dstBlues); |
82 | 88 |
83 dstReds = clamp_0_to_255(dstReds); | 89 dstReds = clamp_0_to_255(dstReds); |
84 dstGreens = clamp_0_to_255(dstGreens); | 90 dstGreens = clamp_0_to_255(dstGreens); |
85 dstBlues = clamp_0_to_255(dstBlues); | 91 dstBlues = clamp_0_to_255(dstBlues); |
86 | 92 |
93 #ifdef SK_PMCOLOR_IS_RGBA | |
87 auto rgba = (Sk4f_round(dstReds) ) | 94 auto rgba = (Sk4f_round(dstReds) ) |
mtklein
2016/07/15 16:45:41
Might feel better to write this sort of stuff usin
msarett
2016/07/15 17:18:23
Yes way better!
| |
88 | (Sk4f_round(dstGreens) << 8) | 95 | (Sk4f_round(dstGreens) << 8) |
89 | (Sk4f_round(dstBlues) << 16) | 96 | (Sk4f_round(dstBlues) << 16) |
90 | (Sk4i{ 0xFF << 24}); | 97 | (Sk4i{ 0xFF << 24}); |
91 rgba.store(dst); | 98 #else |
92 } else { | 99 auto rgba = (Sk4f_round(dstBlues) ) |
100 | (Sk4f_round(dstGreens) << 8) | |
101 | (Sk4f_round(dstReds) << 16) | |
102 | (Sk4i{ 0xFF << 24}); | |
103 #endif | |
104 rgba.store((uint32_t*) dst); | |
105 | |
106 dst = SkTAddOffset<void>(dst, 4 * sizeof(uint32_t)); | |
107 } else if (kTable_DstGamma == kDstGamma) { | |
93 Sk4f scaledReds = Sk4f::Min(Sk4f::Max(1023.0f * dstReds, 0.0 f), 1023.0f); | 108 Sk4f scaledReds = Sk4f::Min(Sk4f::Max(1023.0f * dstReds, 0.0 f), 1023.0f); |
94 Sk4f scaledGreens = Sk4f::Min(Sk4f::Max(1023.0f * dstGreens, 0.0 f), 1023.0f); | 109 Sk4f scaledGreens = Sk4f::Min(Sk4f::Max(1023.0f * dstGreens, 0.0 f), 1023.0f); |
95 Sk4f scaledBlues = Sk4f::Min(Sk4f::Max(1023.0f * dstBlues, 0.0 f), 1023.0f); | 110 Sk4f scaledBlues = Sk4f::Min(Sk4f::Max(1023.0f * dstBlues, 0.0 f), 1023.0f); |
96 | 111 |
97 Sk4i indicesReds = Sk4f_round(scaledReds); | 112 #ifdef SK_PMCOLOR_IS_RGBA |
98 Sk4i indicesGreens = Sk4f_round(scaledGreens); | 113 Sk4i indicesReds = Sk4f_round(scaledReds + 0.5f); |
99 Sk4i indicesBlues = Sk4f_round(scaledBlues); | 114 Sk4i indicesGreens = Sk4f_round(scaledGreens + 0.5f); |
115 Sk4i indicesBlues = Sk4f_round(scaledBlues + 0.5f); | |
116 #else | |
117 Sk4i indicesReds = Sk4f_round(scaledBlues + 0.5f); | |
118 Sk4i indicesGreens = Sk4f_round(scaledGreens + 0.5f); | |
119 Sk4i indicesBlues = Sk4f_round(scaledReds + 0.5f); | |
120 #endif | |
100 | 121 |
101 dst[0] = dstTables[0][indicesReds [0]] | 122 uint32_t* dst32 = (uint32_t*) dst; |
102 | dstTables[1][indicesGreens[0]] << 8 | 123 dst32[0] = dstTables[0][indicesReds [0]] |
103 | dstTables[2][indicesBlues [0]] << 16 | 124 | dstTables[1][indicesGreens[0]] << 8 |
104 | 0xFF << 24; | 125 | dstTables[2][indicesBlues [0]] << 16 |
105 dst[1] = dstTables[0][indicesReds [1]] | 126 | 0xFF << 24; |
106 | dstTables[1][indicesGreens[1]] << 8 | 127 dst32[1] = dstTables[0][indicesReds [1]] |
107 | dstTables[2][indicesBlues [1]] << 16 | 128 | dstTables[1][indicesGreens[1]] << 8 |
108 | 0xFF << 24; | 129 | dstTables[2][indicesBlues [1]] << 16 |
109 dst[2] = dstTables[0][indicesReds [2]] | 130 | 0xFF << 24; |
110 | dstTables[1][indicesGreens[2]] << 8 | 131 dst32[2] = dstTables[0][indicesReds [2]] |
111 | dstTables[2][indicesBlues [2]] << 16 | 132 | dstTables[1][indicesGreens[2]] << 8 |
112 | 0xFF << 24; | 133 | dstTables[2][indicesBlues [2]] << 16 |
113 dst[3] = dstTables[0][indicesReds [3]] | 134 | 0xFF << 24; |
114 | dstTables[1][indicesGreens[3]] << 8 | 135 dst32[3] = dstTables[0][indicesReds [3]] |
115 | dstTables[2][indicesBlues [3]] << 16 | 136 | dstTables[1][indicesGreens[3]] << 8 |
116 | 0xFF << 24; | 137 | dstTables[2][indicesBlues [3]] << 16 |
138 | 0xFF << 24; | |
139 | |
140 dst = SkTAddOffset<void>(dst, 4 * sizeof(uint32_t)); | |
141 } else { | |
142 // FIXME (msarett): | |
143 // Can we do better here? Should we store half floats as planar ? | |
144 // Should we write Intel/Arm specific code? Should we add a tra nspose | |
mtklein
2016/07/15 16:45:41
I think,
Very likely,
No (Or Maybe, but let's mak
msarett
2016/07/15 17:18:23
Haha thanks.
| |
145 // function to SkNx? Should we rewrite the algorithm to be inte rleaved? | |
146 uint64_t* dst64 = (uint64_t*) dst; | |
147 dst64[0] = SkFloatToHalf_finite(Sk4f(dstReds[0], dstGreens[0], d stBlues[0], 1.0f)); | |
148 dst64[1] = SkFloatToHalf_finite(Sk4f(dstReds[1], dstGreens[1], d stBlues[1], 1.0f)); | |
149 dst64[2] = SkFloatToHalf_finite(Sk4f(dstReds[2], dstGreens[2], d stBlues[2], 1.0f)); | |
150 dst64[3] = SkFloatToHalf_finite(Sk4f(dstReds[3], dstGreens[3], d stBlues[3], 1.0f)); | |
151 | |
152 dst = SkTAddOffset<void>(dst, 4 * sizeof(uint64_t)); | |
117 } | 153 } |
118 | |
119 dst += 4; | |
120 }; | 154 }; |
121 | 155 |
122 load_next_4(); | 156 load_next_4(); |
123 | 157 |
124 while (len >= 4) { | 158 while (len >= 4) { |
125 transform_4(); | 159 transform_4(); |
126 load_next_4(); | 160 load_next_4(); |
127 store_4(); | 161 store_4(); |
128 } | 162 } |
129 | 163 |
130 transform_4(); | 164 transform_4(); |
131 store_4(); | 165 store_4(); |
132 } | 166 } |
133 | 167 |
134 while (len > 0) { | 168 while (len > 0) { |
135 // Splat r,g,b across a register each. | 169 // Splat r,g,b across a register each. |
136 auto r = Sk4f{srcTables[0][(*src >> 0) & 0xFF]}, | 170 auto r = Sk4f{srcTables[0][(*src >> 0) & 0xFF]}, |
137 g = Sk4f{srcTables[1][(*src >> 8) & 0xFF]}, | 171 g = Sk4f{srcTables[1][(*src >> 8) & 0xFF]}, |
138 b = Sk4f{srcTables[2][(*src >> 16) & 0xFF]}; | 172 b = Sk4f{srcTables[2][(*src >> 16) & 0xFF]}; |
139 | 173 |
140 // Apply transformation matrix to dst gamut. | 174 if (kSRGB_DstGamma == kDstGamma || k2Dot2_DstGamma == kDstGamma) { |
141 auto dstPixel = rXgXbX*r + rYgYbY*g + rZgZbZ*b; | 175 #ifdef SK_PMCOLOR_IS_RGBA |
msarett
2016/07/15 17:18:23
This is nonsense. Never was going to work...
| |
176 auto dstPixel = rXgXbX*r + rYgYbY*g + rZgZbZ*b; | |
177 #else | |
178 auto dstPixel = rXgXbX*b + rYgYbY*g + rZgZbZ*r; | |
179 #endif | |
142 | 180 |
143 if (kSRGB_DstGamma == kDstGamma || k2Dot2_DstGamma == kDstGamma) { | |
144 Sk4f (*linear_to_curve)(const Sk4f&) = | 181 Sk4f (*linear_to_curve)(const Sk4f&) = |
145 (kSRGB_DstGamma == kDstGamma) ? sk_linear_to_srgb : linear_t o_2dot2; | 182 (kSRGB_DstGamma == kDstGamma) ? sk_linear_to_srgb : linear_t o_2dot2; |
146 | 183 |
147 dstPixel = linear_to_curve(dstPixel); | 184 dstPixel = linear_to_curve(dstPixel); |
148 | 185 |
149 dstPixel = clamp_0_to_255(dstPixel); | 186 dstPixel = clamp_0_to_255(dstPixel); |
150 | 187 |
151 uint32_t rgba; | 188 uint32_t rgba; |
152 SkNx_cast<uint8_t>(Sk4f_round(dstPixel)).store(&rgba); | 189 SkNx_cast<uint8_t>(Sk4f_round(dstPixel)).store(&rgba); |
153 rgba |= 0xFF000000; | 190 rgba |= 0xFF000000; |
154 *dst = rgba; | 191 *((uint32_t*) dst) = rgba; |
155 } else { | 192 dst = SkTAddOffset<void>(dst, sizeof(uint32_t)); |
193 } else if (kTable_DstGamma == kDstGamma) { | |
194 #ifdef SK_PMCOLOR_IS_RGBA | |
195 auto dstPixel = rXgXbX*r + rYgYbY*g + rZgZbZ*b; | |
196 #else | |
197 auto dstPixel = rXgXbX*b + rYgYbY*g + rZgZbZ*r; | |
198 #endif | |
199 | |
156 Sk4f scaledPixel = Sk4f::Min(Sk4f::Max(1023.0f * dstPixel, 0.0f), 10 23.0f); | 200 Sk4f scaledPixel = Sk4f::Min(Sk4f::Max(1023.0f * dstPixel, 0.0f), 10 23.0f); |
157 | 201 |
158 Sk4i indices = Sk4f_round(scaledPixel); | 202 Sk4i indices = Sk4f_round(scaledPixel); |
159 | 203 |
160 *dst = dstTables[0][indices[0]] | 204 *((uint32_t*) dst) = dstTables[0][indices[0]] |
161 | dstTables[1][indices[1]] << 8 | 205 | dstTables[1][indices[1]] << 8 |
162 | dstTables[2][indices[2]] << 16 | 206 | dstTables[2][indices[2]] << 16 |
163 | 0xFF << 24; | 207 | 0xFF << 24; |
208 | |
209 dst = SkTAddOffset<void>(dst, sizeof(uint32_t)); | |
210 } else { | |
211 auto dstPixel = rXgXbX*r + rYgYbY*g + rZgZbZ*b; | |
212 | |
213 uint64_t rgba = SkFloatToHalf_finite(dstPixel); | |
214 | |
215 // Set alpha to 1.0 | |
216 rgba |= 0x3C00000000000000; | |
217 *((uint64_t*) dst) = rgba; | |
218 dst = SkTAddOffset<void>(dst, sizeof(uint64_t)); | |
164 } | 219 } |
165 | 220 |
166 dst += 1; | |
167 src += 1; | 221 src += 1; |
168 len -= 1; | 222 len -= 1; |
169 } | 223 } |
170 } | 224 } |
171 | 225 |
172 static void color_xform_RGB1_to_2dot2(uint32_t* dst, const uint32_t* src, int le n, | 226 static void color_xform_RGB1_to_2dot2(uint32_t* dst, const uint32_t* src, int le n, |
173 const float* const srcTables[3], const flo at matrix[16]) { | 227 const float* const srcTables[3], const flo at matrix[12]) { |
174 color_xform_RGB1<k2Dot2_DstGamma>(dst, src, len, srcTables, matrix, nullptr) ; | 228 color_xform_RGB1<k2Dot2_DstGamma>(dst, src, len, srcTables, matrix, nullptr) ; |
175 } | 229 } |
176 | 230 |
177 static void color_xform_RGB1_to_srgb(uint32_t* dst, const uint32_t* src, int len , | 231 static void color_xform_RGB1_to_srgb(uint32_t* dst, const uint32_t* src, int len , |
178 const float* const srcTables[3], const floa t matrix[16]) { | 232 const float* const srcTables[3], const floa t matrix[12]) { |
179 color_xform_RGB1<kSRGB_DstGamma>(dst, src, len, srcTables, matrix, nullptr); | 233 color_xform_RGB1<kSRGB_DstGamma>(dst, src, len, srcTables, matrix, nullptr); |
180 } | 234 } |
181 | 235 |
182 static void color_xform_RGB1_to_table(uint32_t* dst, const uint32_t* src, int le n, | 236 static void color_xform_RGB1_to_table(uint32_t* dst, const uint32_t* src, int le n, |
183 const float* const srcTables[3], const flo at matrix[16], | 237 const float* const srcTables[3], const flo at matrix[12], |
184 const uint8_t* const dstTables[3]) { | 238 const uint8_t* const dstTables[3]) { |
185 color_xform_RGB1<kTable_DstGamma>(dst, src, len, srcTables, matrix, dstTable s); | 239 color_xform_RGB1<kTable_DstGamma>(dst, src, len, srcTables, matrix, dstTable s); |
186 } | 240 } |
187 | 241 |
242 static void color_xform_RGB1_to_linear(uint64_t* dst, const uint32_t* src, int l en, | |
243 const float* const srcTables[3], const fl oat matrix[12]) { | |
244 color_xform_RGB1<kLinear_DstGamma>(dst, src, len, srcTables, matrix, nullptr ); | |
245 } | |
246 | |
188 } // namespace SK_OPTS_NS | 247 } // namespace SK_OPTS_NS |
189 | 248 |
190 #endif // SkColorXform_opts_DEFINED | 249 #endif // SkColorXform_opts_DEFINED |
OLD | NEW |