Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(228)

Side by Side Diff: third_party/qcms/src/transform.c

Issue 1438093002: Remove //third_party/qcms. (Closed) Base URL: https://github.com/domokit/mojo.git@master
Patch Set: Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/qcms/src/qcmstypes.h ('k') | third_party/qcms/src/transform-sse1.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 /* vim: set ts=8 sw=8 noexpandtab: */
2 // qcms
3 // Copyright (C) 2009 Mozilla Corporation
4 // Copyright (C) 1998-2007 Marti Maria
5 //
6 // Permission is hereby granted, free of charge, to any person obtaining
7 // a copy of this software and associated documentation files (the "Software"),
8 // to deal in the Software without restriction, including without limitation
9 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 // and/or sell copies of the Software, and to permit persons to whom the Softwar e
11 // is furnished to do so, subject to the following conditions:
12 //
13 // The above copyright notice and this permission notice shall be included in
14 // all copies or substantial portions of the Software.
15 //
16 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
18 // THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
24 #include <stdlib.h>
25 #include <math.h>
26 #include <assert.h>
27 #include <string.h> //memcpy
28 #include "qcmsint.h"
29 #include "chain.h"
30 #include "matrix.h"
31 #include "transform_util.h"
32
33 /* for MSVC, GCC, Intel, and Sun compilers */
34 #if defined(_M_IX86) || defined(__i386__) || defined(__i386) || defined(_M_AMD64 ) || defined(__x86_64__) || defined(__x86_64)
35 #define X86
36 #endif /* _M_IX86 || __i386__ || __i386 || _M_AMD64 || __x86_64__ || __x86_64 */
37
38 // Build a White point, primary chromas transfer matrix from RGB to CIE XYZ
39 // This is just an approximation, I am not handling all the non-linear
40 // aspects of the RGB to XYZ process, and assumming that the gamma correction
41 // has transitive property in the tranformation chain.
42 //
43 // the alghoritm:
44 //
45 // - First I build the absolute conversion matrix using
46 // primaries in XYZ. This matrix is next inverted
47 // - Then I eval the source white point across this matrix
48 // obtaining the coeficients of the transformation
49 // - Then, I apply these coeficients to the original matrix
50 static struct matrix build_RGB_to_XYZ_transfer_matrix(qcms_CIE_xyY white, qcms_C IE_xyYTRIPLE primrs)
51 {
52 struct matrix primaries;
53 struct matrix primaries_invert;
54 struct matrix result;
55 struct vector white_point;
56 struct vector coefs;
57
58 double xn, yn;
59 double xr, yr;
60 double xg, yg;
61 double xb, yb;
62
63 xn = white.x;
64 yn = white.y;
65
66 if (yn == 0.0)
67 return matrix_invalid();
68
69 xr = primrs.red.x;
70 yr = primrs.red.y;
71 xg = primrs.green.x;
72 yg = primrs.green.y;
73 xb = primrs.blue.x;
74 yb = primrs.blue.y;
75
76 primaries.m[0][0] = xr;
77 primaries.m[0][1] = xg;
78 primaries.m[0][2] = xb;
79
80 primaries.m[1][0] = yr;
81 primaries.m[1][1] = yg;
82 primaries.m[1][2] = yb;
83
84 primaries.m[2][0] = 1 - xr - yr;
85 primaries.m[2][1] = 1 - xg - yg;
86 primaries.m[2][2] = 1 - xb - yb;
87 primaries.invalid = false;
88
89 white_point.v[0] = xn/yn;
90 white_point.v[1] = 1.;
91 white_point.v[2] = (1.0-xn-yn)/yn;
92
93 primaries_invert = matrix_invert(primaries);
94
95 coefs = matrix_eval(primaries_invert, white_point);
96
97 result.m[0][0] = coefs.v[0]*xr;
98 result.m[0][1] = coefs.v[1]*xg;
99 result.m[0][2] = coefs.v[2]*xb;
100
101 result.m[1][0] = coefs.v[0]*yr;
102 result.m[1][1] = coefs.v[1]*yg;
103 result.m[1][2] = coefs.v[2]*yb;
104
105 result.m[2][0] = coefs.v[0]*(1.-xr-yr);
106 result.m[2][1] = coefs.v[1]*(1.-xg-yg);
107 result.m[2][2] = coefs.v[2]*(1.-xb-yb);
108 result.invalid = primaries_invert.invalid;
109
110 return result;
111 }
112
113 struct CIE_XYZ {
114 double X;
115 double Y;
116 double Z;
117 };
118
119 /* CIE Illuminant D50 */
120 static const struct CIE_XYZ D50_XYZ = {
121 0.9642,
122 1.0000,
123 0.8249
124 };
125
126 /* from lcms: xyY2XYZ()
127 * corresponds to argyll: icmYxy2XYZ() */
128 static struct CIE_XYZ xyY2XYZ(qcms_CIE_xyY source)
129 {
130 struct CIE_XYZ dest;
131 dest.X = (source.x / source.y) * source.Y;
132 dest.Y = source.Y;
133 dest.Z = ((1 - source.x - source.y) / source.y) * source.Y;
134 return dest;
135 }
136
137 /* from lcms: ComputeChromaticAdaption */
138 // Compute chromatic adaption matrix using chad as cone matrix
139 static struct matrix
140 compute_chromatic_adaption(struct CIE_XYZ source_white_point,
141 struct CIE_XYZ dest_white_point,
142 struct matrix chad)
143 {
144 struct matrix chad_inv;
145 struct vector cone_source_XYZ, cone_source_rgb;
146 struct vector cone_dest_XYZ, cone_dest_rgb;
147 struct matrix cone, tmp;
148
149 tmp = chad;
150 chad_inv = matrix_invert(tmp);
151
152 cone_source_XYZ.v[0] = source_white_point.X;
153 cone_source_XYZ.v[1] = source_white_point.Y;
154 cone_source_XYZ.v[2] = source_white_point.Z;
155
156 cone_dest_XYZ.v[0] = dest_white_point.X;
157 cone_dest_XYZ.v[1] = dest_white_point.Y;
158 cone_dest_XYZ.v[2] = dest_white_point.Z;
159
160 cone_source_rgb = matrix_eval(chad, cone_source_XYZ);
161 cone_dest_rgb = matrix_eval(chad, cone_dest_XYZ);
162
163 cone.m[0][0] = cone_dest_rgb.v[0]/cone_source_rgb.v[0];
164 cone.m[0][1] = 0;
165 cone.m[0][2] = 0;
166 cone.m[1][0] = 0;
167 cone.m[1][1] = cone_dest_rgb.v[1]/cone_source_rgb.v[1];
168 cone.m[1][2] = 0;
169 cone.m[2][0] = 0;
170 cone.m[2][1] = 0;
171 cone.m[2][2] = cone_dest_rgb.v[2]/cone_source_rgb.v[2];
172 cone.invalid = false;
173
174 // Normalize
175 return matrix_multiply(chad_inv, matrix_multiply(cone, chad));
176 }
177
178 /* from lcms: cmsAdaptionMatrix */
179 // Returns the final chrmatic adaptation from illuminant FromIll to Illuminant T oIll
180 // Bradford is assumed
181 static struct matrix
182 adaption_matrix(struct CIE_XYZ source_illumination, struct CIE_XYZ target_illumi nation)
183 {
184 #if defined (_MSC_VER)
185 #pragma warning(push)
186 /* Disable double to float truncation warning 4305 */
187 #pragma warning(disable:4305)
188 #endif
189 struct matrix lam_rigg = {{ // Bradford matrix
190 { 0.8951, 0.2664, -0.1614 },
191 { -0.7502, 1.7135, 0.0367 },
192 { 0.0389, -0.0685, 1.0296 }
193 }};
194 #if defined (_MSC_VER)
195 /* Restore warnings */
196 #pragma warning(pop)
197 #endif
198 return compute_chromatic_adaption(source_illumination, target_illuminati on, lam_rigg);
199 }
200
201 /* from lcms: cmsAdaptMatrixToD50 */
202 static struct matrix adapt_matrix_to_D50(struct matrix r, qcms_CIE_xyY source_wh ite_pt)
203 {
204 struct CIE_XYZ Dn;
205 struct matrix Bradford;
206
207 if (source_white_pt.y == 0.0)
208 return matrix_invalid();
209
210 Dn = xyY2XYZ(source_white_pt);
211
212 Bradford = adaption_matrix(Dn, D50_XYZ);
213 return matrix_multiply(Bradford, r);
214 }
215
216 qcms_bool set_rgb_colorants(qcms_profile *profile, qcms_CIE_xyY white_point, qcm s_CIE_xyYTRIPLE primaries)
217 {
218 struct matrix colorants;
219 colorants = build_RGB_to_XYZ_transfer_matrix(white_point, primaries);
220 colorants = adapt_matrix_to_D50(colorants, white_point);
221
222 if (colorants.invalid)
223 return false;
224
225 /* note: there's a transpose type of operation going on here */
226 profile->redColorant.X = double_to_s15Fixed16Number(colorants.m[0][0]);
227 profile->redColorant.Y = double_to_s15Fixed16Number(colorants.m[1][0]);
228 profile->redColorant.Z = double_to_s15Fixed16Number(colorants.m[2][0]);
229
230 profile->greenColorant.X = double_to_s15Fixed16Number(colorants.m[0][1]) ;
231 profile->greenColorant.Y = double_to_s15Fixed16Number(colorants.m[1][1]) ;
232 profile->greenColorant.Z = double_to_s15Fixed16Number(colorants.m[2][1]) ;
233
234 profile->blueColorant.X = double_to_s15Fixed16Number(colorants.m[0][2]);
235 profile->blueColorant.Y = double_to_s15Fixed16Number(colorants.m[1][2]);
236 profile->blueColorant.Z = double_to_s15Fixed16Number(colorants.m[2][2]);
237
238 return true;
239 }
240
241 #if 0
242 static void qcms_transform_data_rgb_out_pow(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
243 {
244 const int r_out = output_format.r;
245 const int b_out = output_format.b;
246
247 int i;
248 float (*mat)[4] = transform->matrix;
249 for (i=0; i<length; i++) {
250 unsigned char device_r = *src++;
251 unsigned char device_g = *src++;
252 unsigned char device_b = *src++;
253
254 float linear_r = transform->input_gamma_table_r[device_r];
255 float linear_g = transform->input_gamma_table_g[device_g];
256 float linear_b = transform->input_gamma_table_b[device_b];
257
258 float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + m at[2][0]*linear_b;
259 float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + m at[2][1]*linear_b;
260 float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + m at[2][2]*linear_b;
261
262 float out_device_r = pow(out_linear_r, transform->out_gamma_r);
263 float out_device_g = pow(out_linear_g, transform->out_gamma_g);
264 float out_device_b = pow(out_linear_b, transform->out_gamma_b);
265
266 dest[r_out] = clamp_u8(out_device_r*255);
267 dest[1] = clamp_u8(out_device_g*255);
268 dest[b_out] = clamp_u8(out_device_b*255);
269 dest += 3;
270 }
271 }
272 #endif
273
274 static void qcms_transform_data_gray_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
275 {
276 const int r_out = output_format.r;
277 const int b_out = output_format.b;
278
279 unsigned int i;
280 for (i = 0; i < length; i++) {
281 float out_device_r, out_device_g, out_device_b;
282 unsigned char device = *src++;
283
284 float linear = transform->input_gamma_table_gray[device];
285
286 out_device_r = lut_interp_linear(linear, transform->output_gamma _lut_r, transform->output_gamma_lut_r_length);
287 out_device_g = lut_interp_linear(linear, transform->output_gamma _lut_g, transform->output_gamma_lut_g_length);
288 out_device_b = lut_interp_linear(linear, transform->output_gamma _lut_b, transform->output_gamma_lut_b_length);
289
290 dest[r_out] = clamp_u8(out_device_r*255);
291 dest[1] = clamp_u8(out_device_g*255);
292 dest[b_out] = clamp_u8(out_device_b*255);
293 dest += 3;
294 }
295 }
296
297 /* Alpha is not corrected.
298 A rationale for this is found in Alvy Ray's "Should Alpha Be Nonlinear If
299 RGB Is?" Tech Memo 17 (December 14, 1998).
300 See: ftp://ftp.alvyray.com/Acrobat/17_Nonln.pdf
301 */
302
303 static void qcms_transform_data_graya_out_lut(qcms_transform *transform, unsigne d char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
304 {
305 const int r_out = output_format.r;
306 const int b_out = output_format.b;
307
308 unsigned int i;
309 for (i = 0; i < length; i++) {
310 float out_device_r, out_device_g, out_device_b;
311 unsigned char device = *src++;
312 unsigned char alpha = *src++;
313
314 float linear = transform->input_gamma_table_gray[device];
315
316 out_device_r = lut_interp_linear(linear, transform->output_gamma _lut_r, transform->output_gamma_lut_r_length);
317 out_device_g = lut_interp_linear(linear, transform->output_gamma _lut_g, transform->output_gamma_lut_g_length);
318 out_device_b = lut_interp_linear(linear, transform->output_gamma _lut_b, transform->output_gamma_lut_b_length);
319
320 dest[r_out] = clamp_u8(out_device_r*255);
321 dest[1] = clamp_u8(out_device_g*255);
322 dest[b_out] = clamp_u8(out_device_b*255);
323 dest[3] = alpha;
324 dest += 4;
325 }
326 }
327
328
329 static void qcms_transform_data_gray_out_precache(qcms_transform *transform, uns igned char *src, unsigned char *dest, size_t length, qcms_format_type output_for mat)
330 {
331 const int r_out = output_format.r;
332 const int b_out = output_format.b;
333
334 unsigned int i;
335 for (i = 0; i < length; i++) {
336 unsigned char device = *src++;
337 uint16_t gray;
338
339 float linear = transform->input_gamma_table_gray[device];
340
341 /* we could round here... */
342 gray = linear * PRECACHE_OUTPUT_MAX;
343
344 dest[r_out] = transform->output_table_r->data[gray];
345 dest[1] = transform->output_table_g->data[gray];
346 dest[b_out] = transform->output_table_b->data[gray];
347 dest += 3;
348 }
349 }
350
351
352 static void qcms_transform_data_graya_out_precache(qcms_transform *transform, un signed char *src, unsigned char *dest, size_t length, qcms_format_type output_fo rmat)
353 {
354 const int r_out = output_format.r;
355 const int b_out = output_format.b;
356
357 unsigned int i;
358 for (i = 0; i < length; i++) {
359 unsigned char device = *src++;
360 unsigned char alpha = *src++;
361 uint16_t gray;
362
363 float linear = transform->input_gamma_table_gray[device];
364
365 /* we could round here... */
366 gray = linear * PRECACHE_OUTPUT_MAX;
367
368 dest[r_out] = transform->output_table_r->data[gray];
369 dest[1] = transform->output_table_g->data[gray];
370 dest[b_out] = transform->output_table_b->data[gray];
371 dest[3] = alpha;
372 dest += 4;
373 }
374 }
375
376 static void qcms_transform_data_rgb_out_lut_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_ format)
377 {
378 const int r_out = output_format.r;
379 const int b_out = output_format.b;
380
381 unsigned int i;
382 float (*mat)[4] = transform->matrix;
383 for (i = 0; i < length; i++) {
384 unsigned char device_r = *src++;
385 unsigned char device_g = *src++;
386 unsigned char device_b = *src++;
387 uint16_t r, g, b;
388
389 float linear_r = transform->input_gamma_table_r[device_r];
390 float linear_g = transform->input_gamma_table_g[device_g];
391 float linear_b = transform->input_gamma_table_b[device_b];
392
393 float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + m at[2][0]*linear_b;
394 float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + m at[2][1]*linear_b;
395 float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + m at[2][2]*linear_b;
396
397 out_linear_r = clamp_float(out_linear_r);
398 out_linear_g = clamp_float(out_linear_g);
399 out_linear_b = clamp_float(out_linear_b);
400
401 /* we could round here... */
402 r = out_linear_r * PRECACHE_OUTPUT_MAX;
403 g = out_linear_g * PRECACHE_OUTPUT_MAX;
404 b = out_linear_b * PRECACHE_OUTPUT_MAX;
405
406 dest[r_out] = transform->output_table_r->data[r];
407 dest[1] = transform->output_table_g->data[g];
408 dest[b_out] = transform->output_table_b->data[b];
409 dest += 3;
410 }
411 }
412
413 static void qcms_transform_data_rgba_out_lut_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output _format)
414 {
415 const int r_out = output_format.r;
416 const int b_out = output_format.b;
417
418 unsigned int i;
419 float (*mat)[4] = transform->matrix;
420 for (i = 0; i < length; i++) {
421 unsigned char device_r = *src++;
422 unsigned char device_g = *src++;
423 unsigned char device_b = *src++;
424 unsigned char alpha = *src++;
425 uint16_t r, g, b;
426
427 float linear_r = transform->input_gamma_table_r[device_r];
428 float linear_g = transform->input_gamma_table_g[device_g];
429 float linear_b = transform->input_gamma_table_b[device_b];
430
431 float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + m at[2][0]*linear_b;
432 float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + m at[2][1]*linear_b;
433 float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + m at[2][2]*linear_b;
434
435 out_linear_r = clamp_float(out_linear_r);
436 out_linear_g = clamp_float(out_linear_g);
437 out_linear_b = clamp_float(out_linear_b);
438
439 /* we could round here... */
440 r = out_linear_r * PRECACHE_OUTPUT_MAX;
441 g = out_linear_g * PRECACHE_OUTPUT_MAX;
442 b = out_linear_b * PRECACHE_OUTPUT_MAX;
443
444 dest[r_out] = transform->output_table_r->data[r];
445 dest[1] = transform->output_table_g->data[g];
446 dest[b_out] = transform->output_table_b->data[b];
447 dest[3] = alpha;
448 dest += 4;
449 }
450 }
451
452 // Not used
453 /*
454 static void qcms_transform_data_clut(qcms_transform *transform, unsigned char *s rc, unsigned char *dest, size_t length, qcms_format_type output_format)
455 {
456 const int r_out = output_format.r;
457 const int b_out = output_format.b;
458
459 unsigned int i;
460 int xy_len = 1;
461 int x_len = transform->grid_size;
462 int len = x_len * x_len;
463 float* r_table = transform->r_clut;
464 float* g_table = transform->g_clut;
465 float* b_table = transform->b_clut;
466
467 for (i = 0; i < length; i++) {
468 unsigned char in_r = *src++;
469 unsigned char in_g = *src++;
470 unsigned char in_b = *src++;
471 float linear_r = in_r/255.0f, linear_g=in_g/255.0f, linear_b = i n_b/255.0f;
472
473 int x = floor(linear_r * (transform->grid_size-1));
474 int y = floor(linear_g * (transform->grid_size-1));
475 int z = floor(linear_b * (transform->grid_size-1));
476 int x_n = ceil(linear_r * (transform->grid_size-1));
477 int y_n = ceil(linear_g * (transform->grid_size-1));
478 int z_n = ceil(linear_b * (transform->grid_size-1));
479 float x_d = linear_r * (transform->grid_size-1) - x;
480 float y_d = linear_g * (transform->grid_size-1) - y;
481 float z_d = linear_b * (transform->grid_size-1) - z;
482
483 float r_x1 = lerp(CLU(r_table,x,y,z), CLU(r_table,x_n,y,z), x_d) ;
484 float r_x2 = lerp(CLU(r_table,x,y_n,z), CLU(r_table,x_n,y_n,z), x_d);
485 float r_y1 = lerp(r_x1, r_x2, y_d);
486 float r_x3 = lerp(CLU(r_table,x,y,z_n), CLU(r_table,x_n,y,z_n), x_d);
487 float r_x4 = lerp(CLU(r_table,x,y_n,z_n), CLU(r_table,x_n,y_n,z_ n), x_d);
488 float r_y2 = lerp(r_x3, r_x4, y_d);
489 float clut_r = lerp(r_y1, r_y2, z_d);
490
491 float g_x1 = lerp(CLU(g_table,x,y,z), CLU(g_table,x_n,y,z), x_d) ;
492 float g_x2 = lerp(CLU(g_table,x,y_n,z), CLU(g_table,x_n,y_n,z), x_d);
493 float g_y1 = lerp(g_x1, g_x2, y_d);
494 float g_x3 = lerp(CLU(g_table,x,y,z_n), CLU(g_table,x_n,y,z_n), x_d);
495 float g_x4 = lerp(CLU(g_table,x,y_n,z_n), CLU(g_table,x_n,y_n,z_ n), x_d);
496 float g_y2 = lerp(g_x3, g_x4, y_d);
497 float clut_g = lerp(g_y1, g_y2, z_d);
498
499 float b_x1 = lerp(CLU(b_table,x,y,z), CLU(b_table,x_n,y,z), x_d) ;
500 float b_x2 = lerp(CLU(b_table,x,y_n,z), CLU(b_table,x_n,y_n,z), x_d);
501 float b_y1 = lerp(b_x1, b_x2, y_d);
502 float b_x3 = lerp(CLU(b_table,x,y,z_n), CLU(b_table,x_n,y,z_n), x_d);
503 float b_x4 = lerp(CLU(b_table,x,y_n,z_n), CLU(b_table,x_n,y_n,z_ n), x_d);
504 float b_y2 = lerp(b_x3, b_x4, y_d);
505 float clut_b = lerp(b_y1, b_y2, z_d);
506
507 dest[r_out] = clamp_u8(clut_r*255.0f);
508 dest[1] = clamp_u8(clut_g*255.0f);
509 dest[b_out] = clamp_u8(clut_b*255.0f);
510 dest += 3;
511 }
512 }
513 */
514
515 // Using lcms' tetra interpolation algorithm.
516 static void qcms_transform_data_tetra_clut_rgba(qcms_transform *transform, unsig ned char *src, unsigned char *dest, size_t length, qcms_format_type output_forma t)
517 {
518 const int r_out = output_format.r;
519 const int b_out = output_format.b;
520
521 unsigned int i;
522 int xy_len = 1;
523 int x_len = transform->grid_size;
524 int len = x_len * x_len;
525 float* r_table = transform->r_clut;
526 float* g_table = transform->g_clut;
527 float* b_table = transform->b_clut;
528 float c0_r, c1_r, c2_r, c3_r;
529 float c0_g, c1_g, c2_g, c3_g;
530 float c0_b, c1_b, c2_b, c3_b;
531 float clut_r, clut_g, clut_b;
532 for (i = 0; i < length; i++) {
533 unsigned char in_r = *src++;
534 unsigned char in_g = *src++;
535 unsigned char in_b = *src++;
536 unsigned char in_a = *src++;
537 float linear_r = in_r/255.0f, linear_g=in_g/255.0f, linear_b = i n_b/255.0f;
538
539 int x = floor(linear_r * (transform->grid_size-1));
540 int y = floor(linear_g * (transform->grid_size-1));
541 int z = floor(linear_b * (transform->grid_size-1));
542 int x_n = ceil(linear_r * (transform->grid_size-1));
543 int y_n = ceil(linear_g * (transform->grid_size-1));
544 int z_n = ceil(linear_b * (transform->grid_size-1));
545 float rx = linear_r * (transform->grid_size-1) - x;
546 float ry = linear_g * (transform->grid_size-1) - y;
547 float rz = linear_b * (transform->grid_size-1) - z;
548
549 c0_r = CLU(r_table, x, y, z);
550 c0_g = CLU(g_table, x, y, z);
551 c0_b = CLU(b_table, x, y, z);
552
553 if( rx >= ry ) {
554 if (ry >= rz) { //rx >= ry && ry >= rz
555 c1_r = CLU(r_table, x_n, y, z) - c0_r;
556 c2_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x_n, y, z);
557 c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table , x_n, y_n, z);
558 c1_g = CLU(g_table, x_n, y, z) - c0_g;
559 c2_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x_n, y, z);
560 c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table , x_n, y_n, z);
561 c1_b = CLU(b_table, x_n, y, z) - c0_b;
562 c2_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x_n, y, z);
563 c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table , x_n, y_n, z);
564 } else {
565 if (rx >= rz) { //rx >= rz && rz >= ry
566 c1_r = CLU(r_table, x_n, y, z) - c0_r;
567 c2_r = CLU(r_table, x_n, y_n, z_n) - CLU (r_table, x_n, y, z_n);
568 c3_r = CLU(r_table, x_n, y, z_n) - CLU(r _table, x_n, y, z);
569 c1_g = CLU(g_table, x_n, y, z) - c0_g;
570 c2_g = CLU(g_table, x_n, y_n, z_n) - CLU (g_table, x_n, y, z_n);
571 c3_g = CLU(g_table, x_n, y, z_n) - CLU(g _table, x_n, y, z);
572 c1_b = CLU(b_table, x_n, y, z) - c0_b;
573 c2_b = CLU(b_table, x_n, y_n, z_n) - CLU (b_table, x_n, y, z_n);
574 c3_b = CLU(b_table, x_n, y, z_n) - CLU(b _table, x_n, y, z);
575 } else { //rz > rx && rx >= ry
576 c1_r = CLU(r_table, x_n, y, z_n) - CLU(r _table, x, y, z_n);
577 c2_r = CLU(r_table, x_n, y_n, z_n) - CLU (r_table, x_n, y, z_n);
578 c3_r = CLU(r_table, x, y, z_n) - c0_r;
579 c1_g = CLU(g_table, x_n, y, z_n) - CLU(g _table, x, y, z_n);
580 c2_g = CLU(g_table, x_n, y_n, z_n) - CLU (g_table, x_n, y, z_n);
581 c3_g = CLU(g_table, x, y, z_n) - c0_g;
582 c1_b = CLU(b_table, x_n, y, z_n) - CLU(b _table, x, y, z_n);
583 c2_b = CLU(b_table, x_n, y_n, z_n) - CLU (b_table, x_n, y, z_n);
584 c3_b = CLU(b_table, x, y, z_n) - c0_b;
585 }
586 }
587 } else {
588 if (rx >= rz) { //ry > rx && rx >= rz
589 c1_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x, y_n, z);
590 c2_r = CLU(r_table, x, y_n, z) - c0_r;
591 c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table , x_n, y_n, z);
592 c1_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x, y_n, z);
593 c2_g = CLU(g_table, x, y_n, z) - c0_g;
594 c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table , x_n, y_n, z);
595 c1_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x, y_n, z);
596 c2_b = CLU(b_table, x, y_n, z) - c0_b;
597 c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table , x_n, y_n, z);
598 } else {
599 if (ry >= rz) { //ry >= rz && rz > rx
600 c1_r = CLU(r_table, x_n, y_n, z_n) - CLU (r_table, x, y_n, z_n);
601 c2_r = CLU(r_table, x, y_n, z) - c0_r;
602 c3_r = CLU(r_table, x, y_n, z_n) - CLU(r _table, x, y_n, z);
603 c1_g = CLU(g_table, x_n, y_n, z_n) - CLU (g_table, x, y_n, z_n);
604 c2_g = CLU(g_table, x, y_n, z) - c0_g;
605 c3_g = CLU(g_table, x, y_n, z_n) - CLU(g _table, x, y_n, z);
606 c1_b = CLU(b_table, x_n, y_n, z_n) - CLU (b_table, x, y_n, z_n);
607 c2_b = CLU(b_table, x, y_n, z) - c0_b;
608 c3_b = CLU(b_table, x, y_n, z_n) - CLU(b _table, x, y_n, z);
609 } else { //rz > ry && ry > rx
610 c1_r = CLU(r_table, x_n, y_n, z_n) - CLU (r_table, x, y_n, z_n);
611 c2_r = CLU(r_table, x, y_n, z_n) - CLU(r _table, x, y, z_n);
612 c3_r = CLU(r_table, x, y, z_n) - c0_r;
613 c1_g = CLU(g_table, x_n, y_n, z_n) - CLU (g_table, x, y_n, z_n);
614 c2_g = CLU(g_table, x, y_n, z_n) - CLU(g _table, x, y, z_n);
615 c3_g = CLU(g_table, x, y, z_n) - c0_g;
616 c1_b = CLU(b_table, x_n, y_n, z_n) - CLU (b_table, x, y_n, z_n);
617 c2_b = CLU(b_table, x, y_n, z_n) - CLU(b _table, x, y, z_n);
618 c3_b = CLU(b_table, x, y, z_n) - c0_b;
619 }
620 }
621 }
622
623 clut_r = c0_r + c1_r*rx + c2_r*ry + c3_r*rz;
624 clut_g = c0_g + c1_g*rx + c2_g*ry + c3_g*rz;
625 clut_b = c0_b + c1_b*rx + c2_b*ry + c3_b*rz;
626
627 dest[r_out] = clamp_u8(clut_r*255.0f);
628 dest[1] = clamp_u8(clut_g*255.0f);
629 dest[b_out] = clamp_u8(clut_b*255.0f);
630 dest[3] = in_a;
631 dest += 4;
632 }
633 }
634
635 // Using lcms' tetra interpolation code.
636 static void qcms_transform_data_tetra_clut(qcms_transform *transform, unsigned c har *src, unsigned char *dest, size_t length, qcms_format_type output_format)
637 {
638 const int r_out = output_format.r;
639 const int b_out = output_format.b;
640
641 unsigned int i;
642 int xy_len = 1;
643 int x_len = transform->grid_size;
644 int len = x_len * x_len;
645 float* r_table = transform->r_clut;
646 float* g_table = transform->g_clut;
647 float* b_table = transform->b_clut;
648 float c0_r, c1_r, c2_r, c3_r;
649 float c0_g, c1_g, c2_g, c3_g;
650 float c0_b, c1_b, c2_b, c3_b;
651 float clut_r, clut_g, clut_b;
652 for (i = 0; i < length; i++) {
653 unsigned char in_r = *src++;
654 unsigned char in_g = *src++;
655 unsigned char in_b = *src++;
656 float linear_r = in_r/255.0f, linear_g=in_g/255.0f, linear_b = i n_b/255.0f;
657
658 int x = floor(linear_r * (transform->grid_size-1));
659 int y = floor(linear_g * (transform->grid_size-1));
660 int z = floor(linear_b * (transform->grid_size-1));
661 int x_n = ceil(linear_r * (transform->grid_size-1));
662 int y_n = ceil(linear_g * (transform->grid_size-1));
663 int z_n = ceil(linear_b * (transform->grid_size-1));
664 float rx = linear_r * (transform->grid_size-1) - x;
665 float ry = linear_g * (transform->grid_size-1) - y;
666 float rz = linear_b * (transform->grid_size-1) - z;
667
668 c0_r = CLU(r_table, x, y, z);
669 c0_g = CLU(g_table, x, y, z);
670 c0_b = CLU(b_table, x, y, z);
671
672 if( rx >= ry ) {
673 if (ry >= rz) { //rx >= ry && ry >= rz
674 c1_r = CLU(r_table, x_n, y, z) - c0_r;
675 c2_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x_n, y, z);
676 c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table , x_n, y_n, z);
677 c1_g = CLU(g_table, x_n, y, z) - c0_g;
678 c2_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x_n, y, z);
679 c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table , x_n, y_n, z);
680 c1_b = CLU(b_table, x_n, y, z) - c0_b;
681 c2_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x_n, y, z);
682 c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table , x_n, y_n, z);
683 } else {
684 if (rx >= rz) { //rx >= rz && rz >= ry
685 c1_r = CLU(r_table, x_n, y, z) - c0_r;
686 c2_r = CLU(r_table, x_n, y_n, z_n) - CLU (r_table, x_n, y, z_n);
687 c3_r = CLU(r_table, x_n, y, z_n) - CLU(r _table, x_n, y, z);
688 c1_g = CLU(g_table, x_n, y, z) - c0_g;
689 c2_g = CLU(g_table, x_n, y_n, z_n) - CLU (g_table, x_n, y, z_n);
690 c3_g = CLU(g_table, x_n, y, z_n) - CLU(g _table, x_n, y, z);
691 c1_b = CLU(b_table, x_n, y, z) - c0_b;
692 c2_b = CLU(b_table, x_n, y_n, z_n) - CLU (b_table, x_n, y, z_n);
693 c3_b = CLU(b_table, x_n, y, z_n) - CLU(b _table, x_n, y, z);
694 } else { //rz > rx && rx >= ry
695 c1_r = CLU(r_table, x_n, y, z_n) - CLU(r _table, x, y, z_n);
696 c2_r = CLU(r_table, x_n, y_n, z_n) - CLU (r_table, x_n, y, z_n);
697 c3_r = CLU(r_table, x, y, z_n) - c0_r;
698 c1_g = CLU(g_table, x_n, y, z_n) - CLU(g _table, x, y, z_n);
699 c2_g = CLU(g_table, x_n, y_n, z_n) - CLU (g_table, x_n, y, z_n);
700 c3_g = CLU(g_table, x, y, z_n) - c0_g;
701 c1_b = CLU(b_table, x_n, y, z_n) - CLU(b _table, x, y, z_n);
702 c2_b = CLU(b_table, x_n, y_n, z_n) - CLU (b_table, x_n, y, z_n);
703 c3_b = CLU(b_table, x, y, z_n) - c0_b;
704 }
705 }
706 } else {
707 if (rx >= rz) { //ry > rx && rx >= rz
708 c1_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x, y_n, z);
709 c2_r = CLU(r_table, x, y_n, z) - c0_r;
710 c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table , x_n, y_n, z);
711 c1_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x, y_n, z);
712 c2_g = CLU(g_table, x, y_n, z) - c0_g;
713 c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table , x_n, y_n, z);
714 c1_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x, y_n, z);
715 c2_b = CLU(b_table, x, y_n, z) - c0_b;
716 c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table , x_n, y_n, z);
717 } else {
718 if (ry >= rz) { //ry >= rz && rz > rx
719 c1_r = CLU(r_table, x_n, y_n, z_n) - CLU (r_table, x, y_n, z_n);
720 c2_r = CLU(r_table, x, y_n, z) - c0_r;
721 c3_r = CLU(r_table, x, y_n, z_n) - CLU(r _table, x, y_n, z);
722 c1_g = CLU(g_table, x_n, y_n, z_n) - CLU (g_table, x, y_n, z_n);
723 c2_g = CLU(g_table, x, y_n, z) - c0_g;
724 c3_g = CLU(g_table, x, y_n, z_n) - CLU(g _table, x, y_n, z);
725 c1_b = CLU(b_table, x_n, y_n, z_n) - CLU (b_table, x, y_n, z_n);
726 c2_b = CLU(b_table, x, y_n, z) - c0_b;
727 c3_b = CLU(b_table, x, y_n, z_n) - CLU(b _table, x, y_n, z);
728 } else { //rz > ry && ry > rx
729 c1_r = CLU(r_table, x_n, y_n, z_n) - CLU (r_table, x, y_n, z_n);
730 c2_r = CLU(r_table, x, y_n, z_n) - CLU(r _table, x, y, z_n);
731 c3_r = CLU(r_table, x, y, z_n) - c0_r;
732 c1_g = CLU(g_table, x_n, y_n, z_n) - CLU (g_table, x, y_n, z_n);
733 c2_g = CLU(g_table, x, y_n, z_n) - CLU(g _table, x, y, z_n);
734 c3_g = CLU(g_table, x, y, z_n) - c0_g;
735 c1_b = CLU(b_table, x_n, y_n, z_n) - CLU (b_table, x, y_n, z_n);
736 c2_b = CLU(b_table, x, y_n, z_n) - CLU(b _table, x, y, z_n);
737 c3_b = CLU(b_table, x, y, z_n) - c0_b;
738 }
739 }
740 }
741
742 clut_r = c0_r + c1_r*rx + c2_r*ry + c3_r*rz;
743 clut_g = c0_g + c1_g*rx + c2_g*ry + c3_g*rz;
744 clut_b = c0_b + c1_b*rx + c2_b*ry + c3_b*rz;
745
746 dest[r_out] = clamp_u8(clut_r*255.0f);
747 dest[1] = clamp_u8(clut_g*255.0f);
748 dest[b_out] = clamp_u8(clut_b*255.0f);
749 dest += 3;
750 }
751 }
752
753 static void qcms_transform_data_rgb_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
754 {
755 const int r_out = output_format.r;
756 const int b_out = output_format.b;
757
758 unsigned int i;
759 float (*mat)[4] = transform->matrix;
760 for (i = 0; i < length; i++) {
761 unsigned char device_r = *src++;
762 unsigned char device_g = *src++;
763 unsigned char device_b = *src++;
764 float out_device_r, out_device_g, out_device_b;
765
766 float linear_r = transform->input_gamma_table_r[device_r];
767 float linear_g = transform->input_gamma_table_g[device_g];
768 float linear_b = transform->input_gamma_table_b[device_b];
769
770 float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + m at[2][0]*linear_b;
771 float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + m at[2][1]*linear_b;
772 float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + m at[2][2]*linear_b;
773
774 out_linear_r = clamp_float(out_linear_r);
775 out_linear_g = clamp_float(out_linear_g);
776 out_linear_b = clamp_float(out_linear_b);
777
778 out_device_r = lut_interp_linear(out_linear_r,
779 transform->output_gamma_lut_r, transform->output _gamma_lut_r_length);
780 out_device_g = lut_interp_linear(out_linear_g,
781 transform->output_gamma_lut_g, transform->output _gamma_lut_g_length);
782 out_device_b = lut_interp_linear(out_linear_b,
783 transform->output_gamma_lut_b, transform->output _gamma_lut_b_length);
784
785 dest[r_out] = clamp_u8(out_device_r*255);
786 dest[1] = clamp_u8(out_device_g*255);
787 dest[b_out] = clamp_u8(out_device_b*255);
788 dest += 3;
789 }
790 }
791
792 static void qcms_transform_data_rgba_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
793 {
794 const int r_out = output_format.r;
795 const int b_out = output_format.b;
796
797 unsigned int i;
798 float (*mat)[4] = transform->matrix;
799 for (i = 0; i < length; i++) {
800 unsigned char device_r = *src++;
801 unsigned char device_g = *src++;
802 unsigned char device_b = *src++;
803 unsigned char alpha = *src++;
804 float out_device_r, out_device_g, out_device_b;
805
806 float linear_r = transform->input_gamma_table_r[device_r];
807 float linear_g = transform->input_gamma_table_g[device_g];
808 float linear_b = transform->input_gamma_table_b[device_b];
809
810 float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + m at[2][0]*linear_b;
811 float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + m at[2][1]*linear_b;
812 float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + m at[2][2]*linear_b;
813
814 out_linear_r = clamp_float(out_linear_r);
815 out_linear_g = clamp_float(out_linear_g);
816 out_linear_b = clamp_float(out_linear_b);
817
818 out_device_r = lut_interp_linear(out_linear_r,
819 transform->output_gamma_lut_r, transform->output _gamma_lut_r_length);
820 out_device_g = lut_interp_linear(out_linear_g,
821 transform->output_gamma_lut_g, transform->output _gamma_lut_g_length);
822 out_device_b = lut_interp_linear(out_linear_b,
823 transform->output_gamma_lut_b, transform->output _gamma_lut_b_length);
824
825 dest[r_out] = clamp_u8(out_device_r*255);
826 dest[1] = clamp_u8(out_device_g*255);
827 dest[b_out] = clamp_u8(out_device_b*255);
828 dest[3] = alpha;
829 dest += 4;
830 }
831 }
832
833 #if 0
834 static void qcms_transform_data_rgb_out_linear(qcms_transform *transform, unsign ed char *src, unsigned char *dest, size_t length, qcms_format_type output_format )
835 {
836 const int r_out = output_format.r;
837 const int b_out = output_format.b;
838
839 int i;
840 float (*mat)[4] = transform->matrix;
841 for (i = 0; i < length; i++) {
842 unsigned char device_r = *src++;
843 unsigned char device_g = *src++;
844 unsigned char device_b = *src++;
845
846 float linear_r = transform->input_gamma_table_r[device_r];
847 float linear_g = transform->input_gamma_table_g[device_g];
848 float linear_b = transform->input_gamma_table_b[device_b];
849
850 float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + m at[2][0]*linear_b;
851 float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + m at[2][1]*linear_b;
852 float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + m at[2][2]*linear_b;
853
854 dest[r_out] = clamp_u8(out_linear_r*255);
855 dest[1] = clamp_u8(out_linear_g*255);
856 dest[b_out] = clamp_u8(out_linear_b*255);
857 dest += 3;
858 }
859 }
860 #endif
861
862 /*
863 * If users create and destroy objects on different threads, even if the same
864 * objects aren't used on different threads at the same time, we can still run
865 * in to trouble with refcounts if they aren't atomic.
866 *
867 * This can lead to us prematurely deleting the precache if threads get unlucky
868 * and write the wrong value to the ref count.
869 */
870 static struct precache_output *precache_reference(struct precache_output *p)
871 {
872 qcms_atomic_increment(p->ref_count);
873 return p;
874 }
875
876 static struct precache_output *precache_create()
877 {
878 struct precache_output *p = malloc(sizeof(struct precache_output));
879 if (p)
880 p->ref_count = 1;
881 return p;
882 }
883
884 void precache_release(struct precache_output *p)
885 {
886 if (qcms_atomic_decrement(p->ref_count) == 0) {
887 free(p);
888 }
889 }
890
891 #ifdef HAVE_POSIX_MEMALIGN
892 static qcms_transform *transform_alloc(void)
893 {
894 qcms_transform *t;
895 if (!posix_memalign(&t, 16, sizeof(*t))) {
896 return t;
897 } else {
898 return NULL;
899 }
900 }
901 static void transform_free(qcms_transform *t)
902 {
903 free(t);
904 }
905 #else
906 static qcms_transform *transform_alloc(void)
907 {
908 /* transform needs to be aligned on a 16byte boundrary */
909 char *original_block = calloc(sizeof(qcms_transform) + sizeof(void*) + 1 6, 1);
910 /* make room for a pointer to the block returned by calloc */
911 void *transform_start = original_block + sizeof(void*);
912 /* align transform_start */
913 qcms_transform *transform_aligned = (qcms_transform*)(((uintptr_t)transf orm_start + 15) & ~0xf);
914
915 /* store a pointer to the block returned by calloc so that we can free i t later */
916 void **(original_block_ptr) = (void**)transform_aligned;
917 if (!original_block)
918 return NULL;
919 original_block_ptr--;
920 *original_block_ptr = original_block;
921
922 return transform_aligned;
923 }
924 static void transform_free(qcms_transform *t)
925 {
926 /* get at the pointer to the unaligned block returned by calloc */
927 void **p = (void**)t;
928 p--;
929 free(*p);
930 }
931 #endif
932
933 void qcms_transform_release(qcms_transform *t)
934 {
935 /* ensure we only free the gamma tables once even if there are
936 * multiple references to the same data */
937
938 if (t->output_table_r)
939 precache_release(t->output_table_r);
940 if (t->output_table_g)
941 precache_release(t->output_table_g);
942 if (t->output_table_b)
943 precache_release(t->output_table_b);
944
945 free(t->input_gamma_table_r);
946 if (t->input_gamma_table_g != t->input_gamma_table_r)
947 free(t->input_gamma_table_g);
948 if (t->input_gamma_table_g != t->input_gamma_table_r &&
949 t->input_gamma_table_g != t->input_gamma_table_b)
950 free(t->input_gamma_table_b);
951
952 free(t->input_gamma_table_gray);
953
954 free(t->output_gamma_lut_r);
955 free(t->output_gamma_lut_g);
956 free(t->output_gamma_lut_b);
957
958 transform_free(t);
959 }
960
961 #ifdef X86
962 // Determine if we can build with SSE2 (this was partly copied from jmorecfg.h i n
963 // mozilla/jpeg)
964 // -------------------------------------------------------------------------
965 #if defined(_M_IX86) && defined(_MSC_VER)
966 #define HAS_CPUID
967 /* Get us a CPUID function. Avoid clobbering EBX because sometimes it's the PIC
968 register - I'm not sure if that ever happens on windows, but cpuid isn't
969 on the critical path so we just preserve the register to be safe and to be
970 consistent with the non-windows version. */
971 static void cpuid(uint32_t fxn, uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d) {
972 uint32_t a_, b_, c_, d_;
973 __asm {
974 xchg ebx, esi
975 mov eax, fxn
976 cpuid
977 mov a_, eax
978 mov b_, ebx
979 mov c_, ecx
980 mov d_, edx
981 xchg ebx, esi
982 }
983 *a = a_;
984 *b = b_;
985 *c = c_;
986 *d = d_;
987 }
988 #elif (defined(__GNUC__) || defined(__SUNPRO_C)) && (defined(__i386__) || define d(__i386))
989 #define HAS_CPUID
990 /* Get us a CPUID function. We can't use ebx because it's the PIC register on
991 some platforms, so we use ESI instead and save ebx to avoid clobbering it. */
992 static void cpuid(uint32_t fxn, uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d) {
993
994 uint32_t a_, b_, c_, d_;
995 __asm__ __volatile__ ("xchgl %%ebx, %%esi; cpuid; xchgl %%ebx, %%esi;"
996 : "=a" (a_), "=S" (b_), "=c" (c_), "=d" (d_) : "a" (fxn));
997 *a = a_;
998 *b = b_;
999 *c = c_;
1000 *d = d_;
1001 }
1002 #endif
1003
1004 // -------------------------Runtime SSEx Detection-----------------------------
1005
1006 /* MMX is always supported per
1007 * Gecko v1.9.1 minimum CPU requirements */
1008 #define SSE1_EDX_MASK (1UL << 25)
1009 #define SSE2_EDX_MASK (1UL << 26)
1010 #define SSE3_ECX_MASK (1UL << 0)
1011
1012 static int sse_version_available(void)
1013 {
1014 #if defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64)
1015 /* we know at build time that 64-bit CPUs always have SSE2
1016 * this tells the compiler that non-SSE2 branches will never be
1017 * taken (i.e. OK to optimze away the SSE1 and non-SIMD code */
1018 return 2;
1019 #elif defined(HAS_CPUID)
1020 static int sse_version = -1;
1021 uint32_t a, b, c, d;
1022 uint32_t function = 0x00000001;
1023
1024 if (sse_version == -1) {
1025 sse_version = 0;
1026 cpuid(function, &a, &b, &c, &d);
1027 if (c & SSE3_ECX_MASK)
1028 sse_version = 3;
1029 else if (d & SSE2_EDX_MASK)
1030 sse_version = 2;
1031 else if (d & SSE1_EDX_MASK)
1032 sse_version = 1;
1033 }
1034
1035 return sse_version;
1036 #else
1037 return 0;
1038 #endif
1039 }
1040 #endif
1041
1042 static const struct matrix bradford_matrix = {{ { 0.8951f, 0.2664f,-0.1614f},
1043 {-0.7502f, 1.7135f, 0.0367f},
1044 { 0.0389f,-0.0685f, 1.0296f}},
1045 false};
1046
1047 static const struct matrix bradford_matrix_inv = {{ { 0.9869929f,-0.1470543f, 0. 1599627f},
1048 { 0.4323053f, 0.5183603f, 0. 0492912f},
1049 {-0.0085287f, 0.0400428f, 0. 9684867f}},
1050 false};
1051
1052 // See ICCv4 E.3
1053 struct matrix compute_whitepoint_adaption(float X, float Y, float Z) {
1054 float p = (0.96422f*bradford_matrix.m[0][0] + 1.000f*bradford_matrix.m[1 ][0] + 0.82521f*bradford_matrix.m[2][0]) /
1055 (X*bradford_matrix.m[0][0] + Y*bradford_matrix.m[1][0] + Z*bradford_matrix.m[2][0] );
1056 float y = (0.96422f*bradford_matrix.m[0][1] + 1.000f*bradford_matrix.m[1 ][1] + 0.82521f*bradford_matrix.m[2][1]) /
1057 (X*bradford_matrix.m[0][1] + Y*bradford_matrix.m[1][1] + Z*bradford_matrix.m[2][1] );
1058 float b = (0.96422f*bradford_matrix.m[0][2] + 1.000f*bradford_matrix.m[1 ][2] + 0.82521f*bradford_matrix.m[2][2]) /
1059 (X*bradford_matrix.m[0][2] + Y*bradford_matrix.m[1][2] + Z*bradford_matrix.m[2][2] );
1060 struct matrix white_adaption = {{ {p,0,0}, {0,y,0}, {0,0,b}}, false};
1061 return matrix_multiply( bradford_matrix_inv, matrix_multiply(white_adapt ion, bradford_matrix) );
1062 }
1063
1064 void qcms_profile_precache_output_transform(qcms_profile *profile)
1065 {
1066 /* we only support precaching on rgb profiles */
1067 if (profile->color_space != RGB_SIGNATURE)
1068 return;
1069
1070 if (qcms_supports_iccv4) {
1071 /* don't precache since we will use the B2A LUT */
1072 if (profile->B2A0)
1073 return;
1074
1075 /* don't precache since we will use the mBA LUT */
1076 if (profile->mBA)
1077 return;
1078 }
1079
1080 /* don't precache if we do not have the TRC curves */
1081 if (!profile->redTRC || !profile->greenTRC || !profile->blueTRC)
1082 return;
1083
1084 if (!profile->output_table_r) {
1085 profile->output_table_r = precache_create();
1086 if (profile->output_table_r &&
1087 !compute_precache(profile->redTRC, profile->outp ut_table_r->data)) {
1088 precache_release(profile->output_table_r);
1089 profile->output_table_r = NULL;
1090 }
1091 }
1092 if (!profile->output_table_g) {
1093 profile->output_table_g = precache_create();
1094 if (profile->output_table_g &&
1095 !compute_precache(profile->greenTRC, profile->ou tput_table_g->data)) {
1096 precache_release(profile->output_table_g);
1097 profile->output_table_g = NULL;
1098 }
1099 }
1100 if (!profile->output_table_b) {
1101 profile->output_table_b = precache_create();
1102 if (profile->output_table_b &&
1103 !compute_precache(profile->blueTRC, profile->out put_table_b->data)) {
1104 precache_release(profile->output_table_b);
1105 profile->output_table_b = NULL;
1106 }
1107 }
1108 }
1109
1110 /* Replace the current transformation with a LUT transformation using a given nu mber of sample points */
1111 qcms_transform* qcms_transform_precacheLUT_float(qcms_transform *transform, qcms _profile *in, qcms_profile *out,
1112 int samples, qcms_data_type in_ type)
1113 {
1114 /* The range between which 2 consecutive sample points can be used to in terpolate */
1115 uint16_t x,y,z;
1116 uint32_t l;
1117 uint32_t lutSize = 3 * samples * samples * samples;
1118 float* src = NULL;
1119 float* dest = NULL;
1120 float* lut = NULL;
1121 float inverse;
1122
1123 src = malloc(lutSize*sizeof(float));
1124 dest = malloc(lutSize*sizeof(float));
1125
1126 if (src && dest) {
1127 /* Prepare a list of points we want to sample: x, y, z order */
1128 l = 0;
1129 inverse = 1 / (float)(samples-1);
1130 for (x = 0; x < samples; x++) {
1131 for (y = 0; y < samples; y++) {
1132 for (z = 0; z < samples; z++) {
1133 src[l++] = x * inverse; // r
1134 src[l++] = y * inverse; // g
1135 src[l++] = z * inverse; // b
1136 }
1137 }
1138 }
1139
1140 lut = qcms_chain_transform(in, out, src, dest, lutSize);
1141
1142 if (lut) {
1143 transform->r_clut = &lut[0]; // r
1144 transform->g_clut = &lut[1]; // g
1145 transform->b_clut = &lut[2]; // b
1146 transform->grid_size = samples;
1147 if (in_type == QCMS_DATA_RGBA_8) {
1148 transform->transform_fn = qcms_transform_data_te tra_clut_rgba;
1149 } else {
1150 transform->transform_fn = qcms_transform_data_te tra_clut;
1151 }
1152 }
1153 }
1154
1155 // XXX: qcms_modular_transform_data may return the lut in either the src or the
1156 // dest buffer. If so, it must not be free-ed.
1157 if (src && lut != src) {
1158 free(src);
1159 }
1160 if (dest && lut != dest) {
1161 free(dest);
1162 }
1163
1164 if (lut == NULL) {
1165 return NULL;
1166 }
1167 return transform;
1168 }
1169
1170 /* Create a transform LUT using the given number of sample points. The transform LUT data is stored
1171 in the output (cube) in bgra format in zyx sample order. */
1172 qcms_bool qcms_transform_create_LUT_zyx_bgra(qcms_profile *in, qcms_profile *out , qcms_intent intent,
1173 int samples, unsigned char* cube)
1174 {
1175 uint16_t z,y,x;
1176 uint32_t l,index;
1177 uint32_t lutSize = 3 * samples * samples * samples;
1178
1179 float* src = NULL;
1180 float* dest = NULL;
1181 float* lut = NULL;
1182 float inverse;
1183
1184 src = malloc(lutSize*sizeof(float));
1185 dest = malloc(lutSize*sizeof(float));
1186
1187 if (src && dest) {
1188 /* Prepare a list of points we want to sample: z, y, x order */
1189 l = 0;
1190 inverse = 1 / (float)(samples-1);
1191 for (z = 0; z < samples; z++) {
1192 for (y = 0; y < samples; y++) {
1193 for (x = 0; x < samples; x++) {
1194 src[l++] = x * inverse; // r
1195 src[l++] = y * inverse; // g
1196 src[l++] = z * inverse; // b
1197 }
1198 }
1199 }
1200
1201 lut = qcms_chain_transform(in, out, src, dest, lutSize);
1202
1203 if (lut) {
1204 index = l = 0;
1205 for (z = 0; z < samples; z++) {
1206 for (y = 0; y < samples; y++) {
1207 for (x = 0; x < samples; x++) {
1208 cube[index++] = (int)floorf(lut[ l + 2] * 255.0f + 0.5f); // b
1209 cube[index++] = (int)floorf(lut[ l + 1] * 255.0f + 0.5f); // g
1210 cube[index++] = (int)floorf(lut[ l + 0] * 255.0f + 0.5f); // r
1211 cube[index++] = 255; // a
1212 l += 3;
1213 }
1214 }
1215 }
1216 }
1217 }
1218
1219 // XXX: qcms_modular_transform_data may return the lut data in either th e src or
1220 // dest buffer so free src, dest, and lut with care.
1221
1222 if (src && lut != src)
1223 free(src);
1224 if (dest && lut != dest)
1225 free(dest);
1226
1227 if (lut) {
1228 free(lut);
1229 return true;
1230 }
1231
1232 return false;
1233 }
1234
1235 #define NO_MEM_TRANSFORM NULL
1236
1237 qcms_transform* qcms_transform_create(
1238 qcms_profile *in, qcms_data_type in_type,
1239 qcms_profile *out, qcms_data_type out_type,
1240 qcms_intent intent)
1241 {
1242 bool precache = false;
1243
1244 qcms_transform *transform = transform_alloc();
1245 if (!transform) {
1246 return NULL;
1247 }
1248 if (out_type != QCMS_DATA_RGB_8 &&
1249 out_type != QCMS_DATA_RGBA_8) {
1250 assert(0 && "output type");
1251 transform_free(transform);
1252 return NULL;
1253 }
1254
1255 if (out->output_table_r &&
1256 out->output_table_g &&
1257 out->output_table_b) {
1258 precache = true;
1259 }
1260
1261 if (qcms_supports_iccv4 && (in->A2B0 || out->B2A0 || in->mAB || out->mAB )) {
1262 // Precache the transformation to a CLUT 33x33x33 in size.
1263 // 33 is used by many profiles and works well in pratice.
1264 // This evenly divides 256 into blocks of 8x8x8.
1265 // TODO For transforming small data sets of about 200x200 or les s
1266 // precaching should be avoided.
1267 qcms_transform *result = qcms_transform_precacheLUT_float(transf orm, in, out, 33, in_type);
1268 if (!result) {
1269 assert(0 && "precacheLUT failed");
1270 transform_free(transform);
1271 return NULL;
1272 }
1273 return result;
1274 }
1275
1276 if (precache) {
1277 transform->output_table_r = precache_reference(out->output_table _r);
1278 transform->output_table_g = precache_reference(out->output_table _g);
1279 transform->output_table_b = precache_reference(out->output_table _b);
1280 } else {
1281 if (!out->redTRC || !out->greenTRC || !out->blueTRC) {
1282 qcms_transform_release(transform);
1283 return NO_MEM_TRANSFORM;
1284 }
1285 build_output_lut(out->redTRC, &transform->output_gamma_lut_r, &t ransform->output_gamma_lut_r_length);
1286 build_output_lut(out->greenTRC, &transform->output_gamma_lut_g, &transform->output_gamma_lut_g_length);
1287 build_output_lut(out->blueTRC, &transform->output_gamma_lut_b, & transform->output_gamma_lut_b_length);
1288 if (!transform->output_gamma_lut_r || !transform->output_gamma_l ut_g || !transform->output_gamma_lut_b) {
1289 qcms_transform_release(transform);
1290 return NO_MEM_TRANSFORM;
1291 }
1292 }
1293
1294 if (in->color_space == RGB_SIGNATURE) {
1295 struct matrix in_matrix, out_matrix, result;
1296
1297 if (in_type != QCMS_DATA_RGB_8 &&
1298 in_type != QCMS_DATA_RGBA_8){
1299 assert(0 && "input type");
1300 transform_free(transform);
1301 return NULL;
1302 }
1303 if (precache) {
1304 #if defined(SSE2_ENABLE) && defined(X86)
1305 if (sse_version_available() >= 2) {
1306 if (in_type == QCMS_DATA_RGB_8)
1307 transform->transform_fn = qcms_transform_dat a_rgb_out_lut_sse2;
1308 else
1309 transform->transform_fn = qcms_transform_dat a_rgba_out_lut_sse2;
1310
1311 #if defined(SSE2_ENABLE) && !(defined(_MSC_VER) && defined(_M_AMD64))
1312 /* Microsoft Compiler for x64 doesn't support MMX.
1313 * SSE code uses MMX so that we disable on x64 */
1314 } else
1315 if (sse_version_available() >= 1) {
1316 if (in_type == QCMS_DATA_RGB_8)
1317 transform->transform_fn = qcms_transform_dat a_rgb_out_lut_sse1;
1318 else
1319 transform->transform_fn = qcms_transform_dat a_rgba_out_lut_sse1;
1320 #endif
1321 } else
1322 #endif
1323 {
1324 if (in_type == QCMS_DATA_RGB_8)
1325 transform->transform_fn = qcms_transform _data_rgb_out_lut_precache;
1326 else
1327 transform->transform_fn = qcms_transform _data_rgba_out_lut_precache;
1328 }
1329 } else {
1330 if (in_type == QCMS_DATA_RGB_8)
1331 transform->transform_fn = qcms_transform_data_rg b_out_lut;
1332 else
1333 transform->transform_fn = qcms_transform_data_rg ba_out_lut;
1334 }
1335
1336 //XXX: avoid duplicating tables if we can
1337 transform->input_gamma_table_r = build_input_gamma_table(in->red TRC);
1338 transform->input_gamma_table_g = build_input_gamma_table(in->gre enTRC);
1339 transform->input_gamma_table_b = build_input_gamma_table(in->blu eTRC);
1340 if (!transform->input_gamma_table_r || !transform->input_gamma_t able_g || !transform->input_gamma_table_b) {
1341 qcms_transform_release(transform);
1342 return NO_MEM_TRANSFORM;
1343 }
1344
1345
1346 /* build combined colorant matrix */
1347 in_matrix = build_colorant_matrix(in);
1348 out_matrix = build_colorant_matrix(out);
1349 out_matrix = matrix_invert(out_matrix);
1350 if (out_matrix.invalid) {
1351 qcms_transform_release(transform);
1352 return NULL;
1353 }
1354 result = matrix_multiply(out_matrix, in_matrix);
1355
1356 /* store the results in column major mode
1357 * this makes doing the multiplication with sse easier */
1358 transform->matrix[0][0] = result.m[0][0];
1359 transform->matrix[1][0] = result.m[0][1];
1360 transform->matrix[2][0] = result.m[0][2];
1361 transform->matrix[0][1] = result.m[1][0];
1362 transform->matrix[1][1] = result.m[1][1];
1363 transform->matrix[2][1] = result.m[1][2];
1364 transform->matrix[0][2] = result.m[2][0];
1365 transform->matrix[1][2] = result.m[2][1];
1366 transform->matrix[2][2] = result.m[2][2];
1367
1368 } else if (in->color_space == GRAY_SIGNATURE) {
1369 if (in_type != QCMS_DATA_GRAY_8 &&
1370 in_type != QCMS_DATA_GRAYA_8){
1371 assert(0 && "input type");
1372 transform_free(transform);
1373 return NULL;
1374 }
1375
1376 transform->input_gamma_table_gray = build_input_gamma_table(in-> grayTRC);
1377 if (!transform->input_gamma_table_gray) {
1378 qcms_transform_release(transform);
1379 return NO_MEM_TRANSFORM;
1380 }
1381
1382 if (precache) {
1383 if (in_type == QCMS_DATA_GRAY_8) {
1384 transform->transform_fn = qcms_transform_data_gr ay_out_precache;
1385 } else {
1386 transform->transform_fn = qcms_transform_data_gr aya_out_precache;
1387 }
1388 } else {
1389 if (in_type == QCMS_DATA_GRAY_8) {
1390 transform->transform_fn = qcms_transform_data_gr ay_out_lut;
1391 } else {
1392 transform->transform_fn = qcms_transform_data_gr aya_out_lut;
1393 }
1394 }
1395 } else {
1396 assert(0 && "unexpected colorspace");
1397 transform_free(transform);
1398 return NULL;
1399 }
1400 return transform;
1401 }
1402
1403 /* __force_align_arg_pointer__ is an x86-only attribute, and gcc/clang warns on unused
1404 * attributes. Don't use this on ARM or AMD64. __has_attribute can detect the pr esence
1405 * of the attribute but is currently only supported by clang */
1406 #if defined(__has_attribute)
1407 #define HAS_FORCE_ALIGN_ARG_POINTER __has_attribute(__force_align_arg_pointer__)
1408 #elif defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__) && !defin ed(__arm__) && !defined(__mips__)
1409 #define HAS_FORCE_ALIGN_ARG_POINTER 1
1410 #else
1411 #define HAS_FORCE_ALIGN_ARG_POINTER 0
1412 #endif
1413
1414 #if HAS_FORCE_ALIGN_ARG_POINTER
1415 /* we need this to avoid crashes when gcc assumes the stack is 128bit aligned */
1416 __attribute__((__force_align_arg_pointer__))
1417 #endif
1418 void qcms_transform_data(qcms_transform *transform, void *src, void *dest, size_ t length)
1419 {
1420 static const struct _qcms_format_type output_rgbx = { 0, 2 };
1421
1422 transform->transform_fn(transform, src, dest, length, output_rgbx);
1423 }
1424
1425 void qcms_transform_data_type(qcms_transform *transform, void *src, void *dest, size_t length, qcms_output_type type)
1426 {
1427 static const struct _qcms_format_type output_rgbx = { 0, 2 };
1428 static const struct _qcms_format_type output_bgrx = { 2, 0 };
1429
1430 transform->transform_fn(transform, src, dest, length, type == QCMS_OUTPU T_BGRX ? output_bgrx : output_rgbx);
1431 }
1432
1433 qcms_bool qcms_supports_iccv4;
1434 void qcms_enable_iccv4()
1435 {
1436 qcms_supports_iccv4 = true;
1437 }
OLDNEW
« no previous file with comments | « third_party/qcms/src/qcmstypes.h ('k') | third_party/qcms/src/transform-sse1.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698