Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(5)

Side by Side Diff: src/opts/SkColorXform_opts.h

Issue 2081933005: Do loads and math in parallel in SkColorXform_opts (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2016 Google Inc. 2 * Copyright 2016 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #ifndef SkColorXform_opts_DEFINED 8 #ifndef SkColorXform_opts_DEFINED
9 #define SkColorXform_opts_DEFINED 9 #define SkColorXform_opts_DEFINED
10 10
(...skipping 173 matching lines...) Expand 10 before | Expand all | Expand 10 after
184 } 184 }
185 185
186 template <const float (&linear_from_curve)[256], Sk4f (*linear_to_curve)(const S k4f&)> 186 template <const float (&linear_from_curve)[256], Sk4f (*linear_to_curve)(const S k4f&)>
187 static void color_xform_RGB1(uint32_t* dst, const uint32_t* src, int len, 187 static void color_xform_RGB1(uint32_t* dst, const uint32_t* src, int len,
188 const float matrix[16]) { 188 const float matrix[16]) {
189 // Load transformation matrix. 189 // Load transformation matrix.
190 auto rXgXbX = Sk4f::Load(matrix + 0), 190 auto rXgXbX = Sk4f::Load(matrix + 0),
191 rYgYbY = Sk4f::Load(matrix + 4), 191 rYgYbY = Sk4f::Load(matrix + 4),
192 rZgZbZ = Sk4f::Load(matrix + 8); 192 rZgZbZ = Sk4f::Load(matrix + 8);
193 193
194 // Load linear floats. Do this once outside the loop.
195 Sk4f reds, greens, blues;
196 if (len >= 4) {
197 reds = Sk4f{linear_from_curve[(src[0] >> 0) & 0xFF],
mtklein 2016/06/22 14:46:54 Let's make this a lambda to share here and in the
msarett 2016/06/22 18:34:55 Nice! I like how this looks.
198 linear_from_curve[(src[1] >> 0) & 0xFF],
199 linear_from_curve[(src[2] >> 0) & 0xFF],
200 linear_from_curve[(src[3] >> 0) & 0xFF]};
201 greens = Sk4f{linear_from_curve[(src[0] >> 8) & 0xFF],
202 linear_from_curve[(src[1] >> 8) & 0xFF],
203 linear_from_curve[(src[2] >> 8) & 0xFF],
204 linear_from_curve[(src[3] >> 8) & 0xFF]};
205 blues = Sk4f{linear_from_curve[(src[0] >> 16) & 0xFF],
206 linear_from_curve[(src[1] >> 16) & 0xFF],
207 linear_from_curve[(src[2] >> 16) & 0xFF],
208 linear_from_curve[(src[3] >> 16) & 0xFF]};
209
210 src += 4;
211 len -= 4;
212 }
213
194 while (len >= 4) { 214 while (len >= 4) {
195 // Convert to linear. The look-up table has perfect accuracy.
196 auto reds = Sk4f{linear_from_curve[(src[0] >> 0) & 0xFF],
197 linear_from_curve[(src[1] >> 0) & 0xFF],
198 linear_from_curve[(src[2] >> 0) & 0xFF],
199 linear_from_curve[(src[3] >> 0) & 0xFF]};
200 auto greens = Sk4f{linear_from_curve[(src[0] >> 8) & 0xFF],
201 linear_from_curve[(src[1] >> 8) & 0xFF],
202 linear_from_curve[(src[2] >> 8) & 0xFF],
203 linear_from_curve[(src[3] >> 8) & 0xFF]};
204 auto blues = Sk4f{linear_from_curve[(src[0] >> 16) & 0xFF],
205 linear_from_curve[(src[1] >> 16) & 0xFF],
206 linear_from_curve[(src[2] >> 16) & 0xFF],
207 linear_from_curve[(src[3] >> 16) & 0xFF]};
208
209 // Apply the transformation matrix to dst gamut. 215 // Apply the transformation matrix to dst gamut.
210 auto dstReds = rXgXbX[0]*reds + rYgYbY[0]*greens + rZgZbZ[0]*blues, 216 auto dstReds = rXgXbX[0]*reds + rYgYbY[0]*greens + rZgZbZ[0]*blues,
211 dstGreens = rXgXbX[1]*reds + rYgYbY[1]*greens + rZgZbZ[1]*blues, 217 dstGreens = rXgXbX[1]*reds + rYgYbY[1]*greens + rZgZbZ[1]*blues,
212 dstBlues = rXgXbX[2]*reds + rYgYbY[2]*greens + rZgZbZ[2]*blues; 218 dstBlues = rXgXbX[2]*reds + rYgYbY[2]*greens + rZgZbZ[2]*blues;
213 219
220 // Load floats for the next iteration. This can happen in parallel with
221 // the math intensive linear_to_curve conversion.
222 reds = Sk4f{linear_from_curve[(src[0] >> 0) & 0xFF],
223 linear_from_curve[(src[1] >> 0) & 0xFF],
224 linear_from_curve[(src[2] >> 0) & 0xFF],
225 linear_from_curve[(src[3] >> 0) & 0xFF]};
226 greens = Sk4f{linear_from_curve[(src[0] >> 8) & 0xFF],
227 linear_from_curve[(src[1] >> 8) & 0xFF],
228 linear_from_curve[(src[2] >> 8) & 0xFF],
229 linear_from_curve[(src[3] >> 8) & 0xFF]};
230 blues = Sk4f{linear_from_curve[(src[0] >> 16) & 0xFF],
231 linear_from_curve[(src[1] >> 16) & 0xFF],
232 linear_from_curve[(src[2] >> 16) & 0xFF],
233 linear_from_curve[(src[3] >> 16) & 0xFF]};
234
214 // Convert to dst gamma. 235 // Convert to dst gamma.
215 dstReds = linear_to_curve(dstReds); 236 dstReds = linear_to_curve(dstReds);
216 dstGreens = linear_to_curve(dstGreens); 237 dstGreens = linear_to_curve(dstGreens);
217 dstBlues = linear_to_curve(dstBlues); 238 dstBlues = linear_to_curve(dstBlues);
218 239
219 // Clamp floats to byte range. 240 // Clamp floats to byte range.
220 dstReds = clamp_0_to_255(dstReds); 241 dstReds = clamp_0_to_255(dstReds);
221 dstGreens = clamp_0_to_255(dstGreens); 242 dstGreens = clamp_0_to_255(dstGreens);
222 dstBlues = clamp_0_to_255(dstBlues); 243 dstBlues = clamp_0_to_255(dstBlues);
223 244
224 // Convert to bytes and store to memory. 245 // Convert to bytes and store to memory.
225 auto rgba = (Sk4i{(int)0xFF000000} ) 246 auto rgba = (Sk4i{(int)0xFF000000} )
226 | (SkNx_cast<int>(dstReds) ) 247 | (SkNx_cast<int>(dstReds) )
227 | (SkNx_cast<int>(dstGreens) << 8) 248 | (SkNx_cast<int>(dstGreens) << 8)
228 | (SkNx_cast<int>(dstBlues) << 16); 249 | (SkNx_cast<int>(dstBlues) << 16);
229 rgba.store(dst); 250 rgba.store(dst);
230 251
231 dst += 4; 252 dst += 4;
232 src += 4; 253 src += 4;
233 len -= 4; 254 len -= 4;
234 } 255 }
235 256
257 // Complete the final set of four pixels.
258 auto dstReds = rXgXbX[0]*reds + rYgYbY[0]*greens + rZgZbZ[0]*blues,
259 dstGreens = rXgXbX[1]*reds + rYgYbY[1]*greens + rZgZbZ[1]*blues,
260 dstBlues = rXgXbX[2]*reds + rYgYbY[2]*greens + rZgZbZ[2]*blues;
261
262 dstReds = linear_to_curve(dstReds);
263 dstGreens = linear_to_curve(dstGreens);
264 dstBlues = linear_to_curve(dstBlues);
265
266 dstReds = clamp_0_to_255(dstReds);
267 dstGreens = clamp_0_to_255(dstGreens);
268 dstBlues = clamp_0_to_255(dstBlues);
269
270 auto rgba = (Sk4i{(int)0xFF000000} )
271 | (SkNx_cast<int>(dstReds) )
272 | (SkNx_cast<int>(dstGreens) << 8)
273 | (SkNx_cast<int>(dstBlues) << 16);
274 rgba.store(dst);
275 dst += 4;
276
236 while (len > 0) { 277 while (len > 0) {
237 // Splat r,g,b across a register each. 278 // Splat r,g,b across a register each.
238 auto r = Sk4f{linear_from_curve[(*src >> 0) & 0xFF]}, 279 auto r = Sk4f{linear_from_curve[(*src >> 0) & 0xFF]},
239 g = Sk4f{linear_from_curve[(*src >> 8) & 0xFF]}, 280 g = Sk4f{linear_from_curve[(*src >> 8) & 0xFF]},
240 b = Sk4f{linear_from_curve[(*src >> 16) & 0xFF]}; 281 b = Sk4f{linear_from_curve[(*src >> 16) & 0xFF]};
241 282
242 // Apply transformation matrix to dst gamut. 283 // Apply transformation matrix to dst gamut.
243 auto dstPixel = rXgXbX*r + rYgYbY*g + rZgZbZ*b; 284 auto dstPixel = rXgXbX*r + rYgYbY*g + rZgZbZ*b;
244 285
245 // Convert to dst gamma. 286 // Convert to dst gamma.
(...skipping 30 matching lines...) Expand all
276 } 317 }
277 318
278 static void color_xform_RGB1_2dot2_to_srgb(uint32_t* dst, const uint32_t* src, i nt len, 319 static void color_xform_RGB1_2dot2_to_srgb(uint32_t* dst, const uint32_t* src, i nt len,
279 const float matrix[16]) { 320 const float matrix[16]) {
280 color_xform_RGB1<linear_from_2dot2, linear_to_srgb>(dst, src, len, matrix); 321 color_xform_RGB1<linear_from_2dot2, linear_to_srgb>(dst, src, len, matrix);
281 } 322 }
282 323
283 } // namespace SK_OPTS_NS 324 } // namespace SK_OPTS_NS
284 325
285 #endif // SkColorXform_opts_DEFINED 326 #endif // SkColorXform_opts_DEFINED
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698