Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(78)

Side by Side Diff: src/opts/SkColorXform_opts.h

Issue 2081933005: Do loads and math in parallel in SkColorXform_opts (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Use lambda fns to share code Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2016 Google Inc. 2 * Copyright 2016 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #ifndef SkColorXform_opts_DEFINED 8 #ifndef SkColorXform_opts_DEFINED
9 #define SkColorXform_opts_DEFINED 9 #define SkColorXform_opts_DEFINED
10 10
(...skipping 168 matching lines...) Expand 10 before | Expand all | Expand 10 after
179 179
180 static Sk4f clamp_0_to_255(const Sk4f& x) { 180 static Sk4f clamp_0_to_255(const Sk4f& x) {
181 // The order of the arguments is important here. We want to make sure that NaN 181 // The order of the arguments is important here. We want to make sure that NaN
182 // clamps to zero. Note that max(NaN, 0) = 0, while max(0, NaN) = NaN. 182 // clamps to zero. Note that max(NaN, 0) = 0, while max(0, NaN) = NaN.
183 return Sk4f::Min(Sk4f::Max(x, 0.0f), 255.0f); 183 return Sk4f::Min(Sk4f::Max(x, 0.0f), 255.0f);
184 } 184 }
185 185
186 template <const float (&linear_from_curve)[256], Sk4f (*linear_to_curve)(const S k4f&)> 186 template <const float (&linear_from_curve)[256], Sk4f (*linear_to_curve)(const S k4f&)>
187 static void color_xform_RGB1(uint32_t* dst, const uint32_t* src, int len, 187 static void color_xform_RGB1(uint32_t* dst, const uint32_t* src, int len,
188 const float matrix[16]) { 188 const float matrix[16]) {
189 // Load transformation matrix. 189
190 auto rXgXbX = Sk4f::Load(matrix + 0), 190 Sk4f reds, greens, blues;
191 auto load_next_4 = [&reds, &greens, &blues, &src, &len] {
192 reds = Sk4f{linear_from_curve[(src[0] >> 0) & 0xFF],
193 linear_from_curve[(src[1] >> 0) & 0xFF],
194 linear_from_curve[(src[2] >> 0) & 0xFF],
195 linear_from_curve[(src[3] >> 0) & 0xFF]};
196 greens = Sk4f{linear_from_curve[(src[0] >> 8) & 0xFF],
197 linear_from_curve[(src[1] >> 8) & 0xFF],
198 linear_from_curve[(src[2] >> 8) & 0xFF],
199 linear_from_curve[(src[3] >> 8) & 0xFF]};
200 blues = Sk4f{linear_from_curve[(src[0] >> 16) & 0xFF],
201 linear_from_curve[(src[1] >> 16) & 0xFF],
202 linear_from_curve[(src[2] >> 16) & 0xFF],
203 linear_from_curve[(src[3] >> 16) & 0xFF]};
204 src += 4;
205 len -= 4;
206 };
207
208 Sk4f dstReds, dstGreens, dstBlues;
209 Sk4f rXgXbX = Sk4f::Load(matrix + 0),
191 rYgYbY = Sk4f::Load(matrix + 4), 210 rYgYbY = Sk4f::Load(matrix + 4),
192 rZgZbZ = Sk4f::Load(matrix + 8); 211 rZgZbZ = Sk4f::Load(matrix + 8);
212 auto gamut_transform_4 = [&reds, &greens, &blues, &dstReds, &dstGreens, &dst Blues, &rXgXbX,
213 &rYgYbY, &rZgZbZ] {
214 dstReds = rXgXbX[0]*reds + rYgYbY[0]*greens + rZgZbZ[0]*blues;
215 dstGreens = rXgXbX[1]*reds + rYgYbY[1]*greens + rZgZbZ[1]*blues;
216 dstBlues = rXgXbX[2]*reds + rYgYbY[2]*greens + rZgZbZ[2]*blues;
217 };
193 218
194 while (len >= 4) { 219 auto gamma_transform_4 = [&dstReds, &dstGreens, &dstBlues] {
195 // Convert to linear. The look-up table has perfect accuracy.
196 auto reds = Sk4f{linear_from_curve[(src[0] >> 0) & 0xFF],
197 linear_from_curve[(src[1] >> 0) & 0xFF],
198 linear_from_curve[(src[2] >> 0) & 0xFF],
199 linear_from_curve[(src[3] >> 0) & 0xFF]};
200 auto greens = Sk4f{linear_from_curve[(src[0] >> 8) & 0xFF],
201 linear_from_curve[(src[1] >> 8) & 0xFF],
202 linear_from_curve[(src[2] >> 8) & 0xFF],
203 linear_from_curve[(src[3] >> 8) & 0xFF]};
204 auto blues = Sk4f{linear_from_curve[(src[0] >> 16) & 0xFF],
205 linear_from_curve[(src[1] >> 16) & 0xFF],
206 linear_from_curve[(src[2] >> 16) & 0xFF],
207 linear_from_curve[(src[3] >> 16) & 0xFF]};
208
209 // Apply the transformation matrix to dst gamut.
210 auto dstReds = rXgXbX[0]*reds + rYgYbY[0]*greens + rZgZbZ[0]*blues,
211 dstGreens = rXgXbX[1]*reds + rYgYbY[1]*greens + rZgZbZ[1]*blues,
212 dstBlues = rXgXbX[2]*reds + rYgYbY[2]*greens + rZgZbZ[2]*blues;
213
214 // Convert to dst gamma.
215 dstReds = linear_to_curve(dstReds); 220 dstReds = linear_to_curve(dstReds);
216 dstGreens = linear_to_curve(dstGreens); 221 dstGreens = linear_to_curve(dstGreens);
217 dstBlues = linear_to_curve(dstBlues); 222 dstBlues = linear_to_curve(dstBlues);
218 223
219 // Clamp floats to byte range.
220 dstReds = clamp_0_to_255(dstReds); 224 dstReds = clamp_0_to_255(dstReds);
221 dstGreens = clamp_0_to_255(dstGreens); 225 dstGreens = clamp_0_to_255(dstGreens);
222 dstBlues = clamp_0_to_255(dstBlues); 226 dstBlues = clamp_0_to_255(dstBlues);
227 };
223 228
224 // Convert to bytes and store to memory. 229 Sk4i rgba;
225 auto rgba = (Sk4i{(int)0xFF000000} ) 230 auto store_4 = [&dstReds, &dstGreens, &dstBlues, &rgba, &dst] {
226 | (SkNx_cast<int>(dstReds) ) 231 rgba = (Sk4i{(int)0xFF000000} )
227 | (SkNx_cast<int>(dstGreens) << 8) 232 | (SkNx_cast<int>(dstReds) )
228 | (SkNx_cast<int>(dstBlues) << 16); 233 | (SkNx_cast<int>(dstGreens) << 8)
234 | (SkNx_cast<int>(dstBlues) << 16);
229 rgba.store(dst); 235 rgba.store(dst);
236 dst += 4;
237 };
230 238
231 dst += 4; 239 if (len >= 4) {
232 src += 4; 240 load_next_4();
233 len -= 4;
234 } 241 }
235 242
243 while (len >= 4) {
244 gamut_transform_4();
245 load_next_4();
246 gamma_transform_4();
mtklein 2016/06/22 19:33:34 Why not fuse gamma_transform_4() into store_4()?
msarett 2016/06/22 19:44:43 Yup, looks good this way.
247 store_4();
248 }
249
250 gamut_transform_4();
251 gamma_transform_4();
252 store_4();
253
236 while (len > 0) { 254 while (len > 0) {
237 // Splat r,g,b across a register each. 255 // Splat r,g,b across a register each.
238 auto r = Sk4f{linear_from_curve[(*src >> 0) & 0xFF]}, 256 auto r = Sk4f{linear_from_curve[(*src >> 0) & 0xFF]},
239 g = Sk4f{linear_from_curve[(*src >> 8) & 0xFF]}, 257 g = Sk4f{linear_from_curve[(*src >> 8) & 0xFF]},
240 b = Sk4f{linear_from_curve[(*src >> 16) & 0xFF]}; 258 b = Sk4f{linear_from_curve[(*src >> 16) & 0xFF]};
241 259
242 // Apply transformation matrix to dst gamut. 260 // Apply transformation matrix to dst gamut.
243 auto dstPixel = rXgXbX*r + rYgYbY*g + rZgZbZ*b; 261 auto dstPixel = rXgXbX*r + rYgYbY*g + rZgZbZ*b;
244 262
245 // Convert to dst gamma. 263 // Convert to dst gamma.
(...skipping 30 matching lines...) Expand all
276 } 294 }
277 295
278 static void color_xform_RGB1_2dot2_to_srgb(uint32_t* dst, const uint32_t* src, i nt len, 296 static void color_xform_RGB1_2dot2_to_srgb(uint32_t* dst, const uint32_t* src, i nt len,
279 const float matrix[16]) { 297 const float matrix[16]) {
280 color_xform_RGB1<linear_from_2dot2, linear_to_srgb>(dst, src, len, matrix); 298 color_xform_RGB1<linear_from_2dot2, linear_to_srgb>(dst, src, len, matrix);
281 } 299 }
282 300
283 } // namespace SK_OPTS_NS 301 } // namespace SK_OPTS_NS
284 302
285 #endif // SkColorXform_opts_DEFINED 303 #endif // SkColorXform_opts_DEFINED
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698