third_party/qcms/src/transform.c - Issue 9969111: Adds qcms to third_party for use in handling ICC color profiles.

Side by Side Diff: third_party/qcms/src/transform.c

Issue 9969111: Adds qcms to third_party for use in handling ICC color profiles. (Closed) Base URL: http://git.chromium.org/chromium/src.git@bug143

Patch Set: Moved downloaded src to third_party/qcms/src Created 8 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 /* vim: set ts=8 sw=8 noexpandtab: */

	2 // qcms

	3 // Copyright (C) 2009 Mozilla Corporation

	4 // Copyright (C) 1998-2007 Marti Maria

	5 //

	6 // Permission is hereby granted, free of charge, to any person obtaining

	7 // a copy of this software and associated documentation files (the "Software"),

	8 // to deal in the Software without restriction, including without limitation

	9 // the rights to use, copy, modify, merge, publish, distribute, sublicense,

	10 // and/or sell copies of the Software, and to permit persons to whom the Softwar e

	11 // is furnished to do so, subject to the following conditions:

	12 //

	13 // The above copyright notice and this permission notice shall be included in

	14 // all copies or substantial portions of the Software.

	15 //

	16 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,

	17 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO

	18 // THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND

	19 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE

	20 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION

	21 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION

	22 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

	23

	24 #include <stdlib.h>

	25 #include <math.h>

	26 #include <assert.h>

	27 #include <string.h> //memcpy

	28 #include "qcmsint.h"

	29 #include "chain.h"

	30 #include "matrix.h"

	31 #include "transform_util.h"

	32

	33 /* for MSVC, GCC, Intel, and Sun compilers */

	34 #if defined(_M_IX86) \|\| defined(__i386__) \|\| defined(__i386) \|\| defined(_M_AMD64 ) \|\| defined(__x86_64__) \|\| defined(__x86_64)

	35 #define X86

	36 #endif /* _M_IX86 \|\| __i386__ \|\| __i386 \|\| _M_AMD64 \|\| __x86_64__ \|\| __x86_64 */

	37

	38 // Build a White point, primary chromas transfer matrix from RGB to CIE XYZ

	39 // This is just an approximation, I am not handling all the non-linear

	40 // aspects of the RGB to XYZ process, and assumming that the gamma correction

	41 // has transitive property in the tranformation chain.

	42 //

	43 // the alghoritm:

	44 //

	45 // - First I build the absolute conversion matrix using

	46 // primaries in XYZ. This matrix is next inverted

	47 // - Then I eval the source white point across this matrix

	48 // obtaining the coeficients of the transformation

	49 // - Then, I apply these coeficients to the original matrix

	50 static struct matrix build_RGB_to_XYZ_transfer_matrix(qcms_CIE_xyY white, qcms_C IE_xyYTRIPLE primrs)

	51 {

	52 struct matrix primaries;

	53 struct matrix primaries_invert;

	54 struct matrix result;

	55 struct vector white_point;

	56 struct vector coefs;

	57

	58 double xn, yn;

	59 double xr, yr;

	60 double xg, yg;

	61 double xb, yb;

	62

	63 xn = white.x;

	64 yn = white.y;

	65

	66 if (yn == 0.0)

	67 return matrix_invalid();

	68

	69 xr = primrs.red.x;

	70 yr = primrs.red.y;

	71 xg = primrs.green.x;

	72 yg = primrs.green.y;

	73 xb = primrs.blue.x;

	74 yb = primrs.blue.y;

	75

	76 primaries.m[0][0] = xr;

	77 primaries.m[0][1] = xg;

	78 primaries.m[0][2] = xb;

	79

	80 primaries.m[1][0] = yr;

	81 primaries.m[1][1] = yg;

	82 primaries.m[1][2] = yb;

	83

	84 primaries.m[2][0] = 1 - xr - yr;

	85 primaries.m[2][1] = 1 - xg - yg;

	86 primaries.m[2][2] = 1 - xb - yb;

	87 primaries.invalid = false;

	88

	89 white_point.v[0] = xn/yn;

	90 white_point.v[1] = 1.;

	91 white_point.v[2] = (1.0-xn-yn)/yn;

	92

	93 primaries_invert = matrix_invert(primaries);

	94

	95 coefs = matrix_eval(primaries_invert, white_point);

	96

	97 result.m[0][0] = coefs.v[0]*xr;

	98 result.m[0][1] = coefs.v[1]*xg;

	99 result.m[0][2] = coefs.v[2]*xb;

	100

	101 result.m[1][0] = coefs.v[0]*yr;

	102 result.m[1][1] = coefs.v[1]*yg;

	103 result.m[1][2] = coefs.v[2]*yb;

	104

	105 result.m[2][0] = coefs.v[0]*(1.-xr-yr);

	106 result.m[2][1] = coefs.v[1]*(1.-xg-yg);

	107 result.m[2][2] = coefs.v[2]*(1.-xb-yb);

	108 result.invalid = primaries_invert.invalid;

	109

	110 return result;

	111 }

	112

	113 struct CIE_XYZ {

	114 double X;

	115 double Y;

	116 double Z;

	117 };

	118

	119 /* CIE Illuminant D50 */

	120 static const struct CIE_XYZ D50_XYZ = {

	121 0.9642,

	122 1.0000,

	123 0.8249

	124 };

	125

	126 /* from lcms: xyY2XYZ()

	127 * corresponds to argyll: icmYxy2XYZ() */

	128 static struct CIE_XYZ xyY2XYZ(qcms_CIE_xyY source)

	129 {

	130 struct CIE_XYZ dest;

	131 dest.X = (source.x / source.y) * source.Y;

	132 dest.Y = source.Y;

	133 dest.Z = ((1 - source.x - source.y) / source.y) * source.Y;

	134 return dest;

	135 }

	136

	137 /* from lcms: ComputeChromaticAdaption */

	138 // Compute chromatic adaption matrix using chad as cone matrix

	139 static struct matrix

	140 compute_chromatic_adaption(struct CIE_XYZ source_white_point,

	141 struct CIE_XYZ dest_white_point,

	142 struct matrix chad)

	143 {

	144 struct matrix chad_inv;

	145 struct vector cone_source_XYZ, cone_source_rgb;

	146 struct vector cone_dest_XYZ, cone_dest_rgb;

	147 struct matrix cone, tmp;

	148

	149 tmp = chad;

	150 chad_inv = matrix_invert(tmp);

	151

	152 cone_source_XYZ.v[0] = source_white_point.X;

	153 cone_source_XYZ.v[1] = source_white_point.Y;

	154 cone_source_XYZ.v[2] = source_white_point.Z;

	155

	156 cone_dest_XYZ.v[0] = dest_white_point.X;

	157 cone_dest_XYZ.v[1] = dest_white_point.Y;

	158 cone_dest_XYZ.v[2] = dest_white_point.Z;

	159

	160 cone_source_rgb = matrix_eval(chad, cone_source_XYZ);

	161 cone_dest_rgb = matrix_eval(chad, cone_dest_XYZ);

	162

	163 cone.m[0][0] = cone_dest_rgb.v[0]/cone_source_rgb.v[0];

	164 cone.m[0][1] = 0;

	165 cone.m[0][2] = 0;

	166 cone.m[1][0] = 0;

	167 cone.m[1][1] = cone_dest_rgb.v[1]/cone_source_rgb.v[1];

	168 cone.m[1][2] = 0;

	169 cone.m[2][0] = 0;

	170 cone.m[2][1] = 0;

	171 cone.m[2][2] = cone_dest_rgb.v[2]/cone_source_rgb.v[2];

	172 cone.invalid = false;

	173

	174 // Normalize

	175 return matrix_multiply(chad_inv, matrix_multiply(cone, chad));

	176 }

	177

	178 /* from lcms: cmsAdaptionMatrix */

	179 // Returns the final chrmatic adaptation from illuminant FromIll to Illuminant T oIll

	180 // Bradford is assumed

	181 static struct matrix

	182 adaption_matrix(struct CIE_XYZ source_illumination, struct CIE_XYZ target_illumi nation)

	183 {

	184 struct matrix lam_rigg = {{ // Bradford matrix

	185 { 0.8951, 0.2664, -0.1614 },

	186 { -0.7502, 1.7135, 0.0367 },

	187 { 0.0389, -0.0685, 1.0296 }

	188 }};

	189 return compute_chromatic_adaption(source_illumination, target_illuminati on, lam_rigg);

	190 }

	191

	192 /* from lcms: cmsAdaptMatrixToD50 */

	193 static struct matrix adapt_matrix_to_D50(struct matrix r, qcms_CIE_xyY source_wh ite_pt)

	194 {

	195 struct CIE_XYZ Dn;

	196 struct matrix Bradford;

	197

	198 if (source_white_pt.y == 0.0)

	199 return matrix_invalid();

	200

	201 Dn = xyY2XYZ(source_white_pt);

	202

	203 Bradford = adaption_matrix(Dn, D50_XYZ);

	204 return matrix_multiply(Bradford, r);

	205 }

	206

	207 qcms_bool set_rgb_colorants(qcms_profile *profile, qcms_CIE_xyY white_point, qcm s_CIE_xyYTRIPLE primaries)

	208 {

	209 struct matrix colorants;

	210 colorants = build_RGB_to_XYZ_transfer_matrix(white_point, primaries);

	211 colorants = adapt_matrix_to_D50(colorants, white_point);

	212

	213 if (colorants.invalid)

	214 return false;

	215

	216 /* note: there's a transpose type of operation going on here */

	217 profile->redColorant.X = double_to_s15Fixed16Number(colorants.m[0][0]);

	218 profile->redColorant.Y = double_to_s15Fixed16Number(colorants.m[1][0]);

	219 profile->redColorant.Z = double_to_s15Fixed16Number(colorants.m[2][0]);

	220

	221 profile->greenColorant.X = double_to_s15Fixed16Number(colorants.m[0][1]) ;

	222 profile->greenColorant.Y = double_to_s15Fixed16Number(colorants.m[1][1]) ;

	223 profile->greenColorant.Z = double_to_s15Fixed16Number(colorants.m[2][1]) ;

	224

	225 profile->blueColorant.X = double_to_s15Fixed16Number(colorants.m[0][2]);

	226 profile->blueColorant.Y = double_to_s15Fixed16Number(colorants.m[1][2]);

	227 profile->blueColorant.Z = double_to_s15Fixed16Number(colorants.m[2][2]);

	228

	229 return true;

	230 }

	231

	232 #if 0

	233 static void qcms_transform_data_rgb_out_pow(qcms_transform transform, unsigned char src, unsigned char *dest, size_t length)

	234 {

	235 int i;

	236 float (*mat)[4] = transform->matrix;

	237 for (i=0; i<length; i++) {

	238 unsigned char device_r = *src++;

	239 unsigned char device_g = *src++;

	240 unsigned char device_b = *src++;

	241

	242 float linear_r = transform->input_gamma_table_r[device_r];

	243 float linear_g = transform->input_gamma_table_g[device_g];

	244 float linear_b = transform->input_gamma_table_b[device_b];

	245

	246 float out_linear_r = mat[0][0]linear_r + mat[1][0]linear_g + m at[2][0]*linear_b;

	247 float out_linear_g = mat[0][1]linear_r + mat[1][1]linear_g + m at[2][1]*linear_b;

	248 float out_linear_b = mat[0][2]linear_r + mat[1][2]linear_g + m at[2][2]*linear_b;

	249

	250 float out_device_r = pow(out_linear_r, transform->out_gamma_r);

	251 float out_device_g = pow(out_linear_g, transform->out_gamma_g);

	252 float out_device_b = pow(out_linear_b, transform->out_gamma_b);

	253

	254 dest++ = clamp_u8(255out_device_r);

	255 dest++ = clamp_u8(255out_device_g);

	256 dest++ = clamp_u8(255out_device_b);

	257 }

	258 }

	259 #endif

	260

	261 static void qcms_transform_data_gray_out_lut(qcms_transform transform, unsigned char src, unsigned char *dest, size_t length)

	262 {

	263 unsigned int i;

	264 for (i = 0; i < length; i++) {

	265 float out_device_r, out_device_g, out_device_b;

	266 unsigned char device = *src++;

	267

	268 float linear = transform->input_gamma_table_gray[device];

	269

	270 out_device_r = lut_interp_linear(linear, transform->output_gamma _lut_r, transform->output_gamma_lut_r_length);

	271 out_device_g = lut_interp_linear(linear, transform->output_gamma _lut_g, transform->output_gamma_lut_g_length);

	272 out_device_b = lut_interp_linear(linear, transform->output_gamma _lut_b, transform->output_gamma_lut_b_length);

	273

	274 dest++ = clamp_u8(out_device_r255);

	275 dest++ = clamp_u8(out_device_g255);

	276 dest++ = clamp_u8(out_device_b255);

	277 }

	278 }

	279

	280 /* Alpha is not corrected.

	281 A rationale for this is found in Alvy Ray's "Should Alpha Be Nonlinear If

	282 RGB Is?" Tech Memo 17 (December 14, 1998).

	283 See: ftp://ftp.alvyray.com/Acrobat/17_Nonln.pdf

	284 */

	285

	286 static void qcms_transform_data_graya_out_lut(qcms_transform transform, unsigne d char src, unsigned char *dest, size_t length)

	287 {

	288 unsigned int i;

	289 for (i = 0; i < length; i++) {

	290 float out_device_r, out_device_g, out_device_b;

	291 unsigned char device = *src++;

	292 unsigned char alpha = *src++;

	293

	294 float linear = transform->input_gamma_table_gray[device];

	295

	296 out_device_r = lut_interp_linear(linear, transform->output_gamma _lut_r, transform->output_gamma_lut_r_length);

	297 out_device_g = lut_interp_linear(linear, transform->output_gamma _lut_g, transform->output_gamma_lut_g_length);

	298 out_device_b = lut_interp_linear(linear, transform->output_gamma _lut_b, transform->output_gamma_lut_b_length);

	299

	300 dest++ = clamp_u8(out_device_r255);

	301 dest++ = clamp_u8(out_device_g255);

	302 dest++ = clamp_u8(out_device_b255);

	303 *dest++ = alpha;

	304 }

	305 }

	306

	307

	308 static void qcms_transform_data_gray_out_precache(qcms_transform transform, uns igned char src, unsigned char *dest, size_t length)

	309 {

	310 unsigned int i;

	311 for (i = 0; i < length; i++) {

	312 unsigned char device = *src++;

	313 uint16_t gray;

	314

	315 float linear = transform->input_gamma_table_gray[device];

	316

	317 /* we could round here... */

	318 gray = linear * PRECACHE_OUTPUT_MAX;

	319

	320 *dest++ = transform->output_table_r->data[gray];

	321 *dest++ = transform->output_table_g->data[gray];

	322 *dest++ = transform->output_table_b->data[gray];

	323 }

	324 }

	325

	326 static void qcms_transform_data_graya_out_precache(qcms_transform transform, un signed char src, unsigned char *dest, size_t length)

	327 {

	328 unsigned int i;

	329 for (i = 0; i < length; i++) {

	330 unsigned char device = *src++;

	331 unsigned char alpha = *src++;

	332 uint16_t gray;

	333

	334 float linear = transform->input_gamma_table_gray[device];

	335

	336 /* we could round here... */

	337 gray = linear * PRECACHE_OUTPUT_MAX;

	338

	339 *dest++ = transform->output_table_r->data[gray];

	340 *dest++ = transform->output_table_g->data[gray];

	341 *dest++ = transform->output_table_b->data[gray];

	342 *dest++ = alpha;

	343 }

	344 }

	345

	346 static void qcms_transform_data_rgb_out_lut_precache(qcms_transform transform, unsigned char src, unsigned char *dest, size_t length)

	347 {

	348 unsigned int i;

	349 float (*mat)[4] = transform->matrix;

	350 for (i = 0; i < length; i++) {

	351 unsigned char device_r = *src++;

	352 unsigned char device_g = *src++;

	353 unsigned char device_b = *src++;

	354 uint16_t r, g, b;

	355

	356 float linear_r = transform->input_gamma_table_r[device_r];

	357 float linear_g = transform->input_gamma_table_g[device_g];

	358 float linear_b = transform->input_gamma_table_b[device_b];

	359

	360 float out_linear_r = mat[0][0]linear_r + mat[1][0]linear_g + m at[2][0]*linear_b;

	361 float out_linear_g = mat[0][1]linear_r + mat[1][1]linear_g + m at[2][1]*linear_b;

	362 float out_linear_b = mat[0][2]linear_r + mat[1][2]linear_g + m at[2][2]*linear_b;

	363

	364 out_linear_r = clamp_float(out_linear_r);

	365 out_linear_g = clamp_float(out_linear_g);

	366 out_linear_b = clamp_float(out_linear_b);

	367

	368 /* we could round here... */

	369 r = out_linear_r * PRECACHE_OUTPUT_MAX;

	370 g = out_linear_g * PRECACHE_OUTPUT_MAX;

	371 b = out_linear_b * PRECACHE_OUTPUT_MAX;

	372

	373 *dest++ = transform->output_table_r->data[r];

	374 *dest++ = transform->output_table_g->data[g];

	375 *dest++ = transform->output_table_b->data[b];

	376 }

	377 }

	378

	379 static void qcms_transform_data_rgba_out_lut_precache(qcms_transform transform, unsigned char src, unsigned char *dest, size_t length)

	380 {

	381 unsigned int i;

	382 float (*mat)[4] = transform->matrix;

	383 for (i = 0; i < length; i++) {

	384 unsigned char device_r = *src++;

	385 unsigned char device_g = *src++;

	386 unsigned char device_b = *src++;

	387 unsigned char alpha = *src++;

	388 uint16_t r, g, b;

	389

	390 float linear_r = transform->input_gamma_table_r[device_r];

	391 float linear_g = transform->input_gamma_table_g[device_g];

	392 float linear_b = transform->input_gamma_table_b[device_b];

	393

	394 float out_linear_r = mat[0][0]linear_r + mat[1][0]linear_g + m at[2][0]*linear_b;

	395 float out_linear_g = mat[0][1]linear_r + mat[1][1]linear_g + m at[2][1]*linear_b;

	396 float out_linear_b = mat[0][2]linear_r + mat[1][2]linear_g + m at[2][2]*linear_b;

	397

	398 out_linear_r = clamp_float(out_linear_r);

	399 out_linear_g = clamp_float(out_linear_g);

	400 out_linear_b = clamp_float(out_linear_b);

	401

	402 /* we could round here... */

	403 r = out_linear_r * PRECACHE_OUTPUT_MAX;

	404 g = out_linear_g * PRECACHE_OUTPUT_MAX;

	405 b = out_linear_b * PRECACHE_OUTPUT_MAX;

	406

	407 *dest++ = transform->output_table_r->data[r];

	408 *dest++ = transform->output_table_g->data[g];

	409 *dest++ = transform->output_table_b->data[b];

	410 *dest++ = alpha;

	411 }

	412 }

	413

	414 // Not used

	415 /*

	416 static void qcms_transform_data_clut(qcms_transform transform, unsigned char s rc, unsigned char *dest, size_t length) {

	417 unsigned int i;

	418 int xy_len = 1;

	419 int x_len = transform->grid_size;

	420 int len = x_len * x_len;

	421 float* r_table = transform->r_clut;

	422 float* g_table = transform->g_clut;

	423 float* b_table = transform->b_clut;

	424

	425 for (i = 0; i < length; i++) {

	426 unsigned char in_r = *src++;

	427 unsigned char in_g = *src++;

	428 unsigned char in_b = *src++;

	429 float linear_r = in_r/255.0f, linear_g=in_g/255.0f, linear_b = i n_b/255.0f;

	430

	431 int x = floor(linear_r * (transform->grid_size-1));

	432 int y = floor(linear_g * (transform->grid_size-1));

	433 int z = floor(linear_b * (transform->grid_size-1));

	434 int x_n = ceil(linear_r * (transform->grid_size-1));

	435 int y_n = ceil(linear_g * (transform->grid_size-1));

	436 int z_n = ceil(linear_b * (transform->grid_size-1));

	437 float x_d = linear_r * (transform->grid_size-1) - x;

	438 float y_d = linear_g * (transform->grid_size-1) - y;

	439 float z_d = linear_b * (transform->grid_size-1) - z;

	440

	441 float r_x1 = lerp(CLU(r_table,x,y,z), CLU(r_table,x_n,y,z), x_d) ;

	442 float r_x2 = lerp(CLU(r_table,x,y_n,z), CLU(r_table,x_n,y_n,z), x_d);

	443 float r_y1 = lerp(r_x1, r_x2, y_d);

	444 float r_x3 = lerp(CLU(r_table,x,y,z_n), CLU(r_table,x_n,y,z_n), x_d);

	445 float r_x4 = lerp(CLU(r_table,x,y_n,z_n), CLU(r_table,x_n,y_n,z_ n), x_d);

	446 float r_y2 = lerp(r_x3, r_x4, y_d);

	447 float clut_r = lerp(r_y1, r_y2, z_d);

	448

	449 float g_x1 = lerp(CLU(g_table,x,y,z), CLU(g_table,x_n,y,z), x_d) ;

	450 float g_x2 = lerp(CLU(g_table,x,y_n,z), CLU(g_table,x_n,y_n,z), x_d);

	451 float g_y1 = lerp(g_x1, g_x2, y_d);

	452 float g_x3 = lerp(CLU(g_table,x,y,z_n), CLU(g_table,x_n,y,z_n), x_d);

	453 float g_x4 = lerp(CLU(g_table,x,y_n,z_n), CLU(g_table,x_n,y_n,z_ n), x_d);

	454 float g_y2 = lerp(g_x3, g_x4, y_d);

	455 float clut_g = lerp(g_y1, g_y2, z_d);

	456

	457 float b_x1 = lerp(CLU(b_table,x,y,z), CLU(b_table,x_n,y,z), x_d) ;

	458 float b_x2 = lerp(CLU(b_table,x,y_n,z), CLU(b_table,x_n,y_n,z), x_d);

	459 float b_y1 = lerp(b_x1, b_x2, y_d);

	460 float b_x3 = lerp(CLU(b_table,x,y,z_n), CLU(b_table,x_n,y,z_n), x_d);

	461 float b_x4 = lerp(CLU(b_table,x,y_n,z_n), CLU(b_table,x_n,y_n,z_ n), x_d);

	462 float b_y2 = lerp(b_x3, b_x4, y_d);

	463 float clut_b = lerp(b_y1, b_y2, z_d);

	464

	465 dest++ = clamp_u8(clut_r255.0f);

	466 dest++ = clamp_u8(clut_g255.0f);

	467 dest++ = clamp_u8(clut_b255.0f);

	468 }

	469 }

	470 */

	471

	472 // Using lcms' tetra interpolation algorithm.

	473 static void qcms_transform_data_tetra_clut_rgba(qcms_transform transform, unsig ned char src, unsigned char *dest, size_t length) {

	474 unsigned int i;

	475 int xy_len = 1;

	476 int x_len = transform->grid_size;

	477 int len = x_len * x_len;

	478 float* r_table = transform->r_clut;

	479 float* g_table = transform->g_clut;

	480 float* b_table = transform->b_clut;

	481 float c0_r, c1_r, c2_r, c3_r;

	482 float c0_g, c1_g, c2_g, c3_g;

	483 float c0_b, c1_b, c2_b, c3_b;

	484 float clut_r, clut_g, clut_b;

	485 for (i = 0; i < length; i++) {

	486 unsigned char in_r = *src++;

	487 unsigned char in_g = *src++;

	488 unsigned char in_b = *src++;

	489 unsigned char in_a = *src++;

	490 float linear_r = in_r/255.0f, linear_g=in_g/255.0f, linear_b = i n_b/255.0f;

	491

	492 int x = floor(linear_r * (transform->grid_size-1));

	493 int y = floor(linear_g * (transform->grid_size-1));

	494 int z = floor(linear_b * (transform->grid_size-1));

	495 int x_n = ceil(linear_r * (transform->grid_size-1));

	496 int y_n = ceil(linear_g * (transform->grid_size-1));

	497 int z_n = ceil(linear_b * (transform->grid_size-1));

	498 float rx = linear_r * (transform->grid_size-1) - x;

	499 float ry = linear_g * (transform->grid_size-1) - y;

	500 float rz = linear_b * (transform->grid_size-1) - z;

	501

	502 c0_r = CLU(r_table, x, y, z);

	503 c0_g = CLU(g_table, x, y, z);

	504 c0_b = CLU(b_table, x, y, z);

	505

	506 if( rx >= ry ) {

	507 if (ry >= rz) { //rx >= ry && ry >= rz

	508 c1_r = CLU(r_table, x_n, y, z) - c0_r;

	509 c2_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x_n, y, z);

	510 c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table , x_n, y_n, z);

	511 c1_g = CLU(g_table, x_n, y, z) - c0_g;

	512 c2_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x_n, y, z);

	513 c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table , x_n, y_n, z);

	514 c1_b = CLU(b_table, x_n, y, z) - c0_b;

	515 c2_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x_n, y, z);

	516 c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table , x_n, y_n, z);

	517 } else {

	518 if (rx >= rz) { //rx >= rz && rz >= ry

	519 c1_r = CLU(r_table, x_n, y, z) - c0_r;

	520 c2_r = CLU(r_table, x_n, y_n, z_n) - CLU (r_table, x_n, y, z_n);

	521 c3_r = CLU(r_table, x_n, y, z_n) - CLU(r _table, x_n, y, z);

	522 c1_g = CLU(g_table, x_n, y, z) - c0_g;

	523 c2_g = CLU(g_table, x_n, y_n, z_n) - CLU (g_table, x_n, y, z_n);

	524 c3_g = CLU(g_table, x_n, y, z_n) - CLU(g _table, x_n, y, z);

	525 c1_b = CLU(b_table, x_n, y, z) - c0_b;

	526 c2_b = CLU(b_table, x_n, y_n, z_n) - CLU (b_table, x_n, y, z_n);

	527 c3_b = CLU(b_table, x_n, y, z_n) - CLU(b _table, x_n, y, z);

	528 } else { //rz > rx && rx >= ry

	529 c1_r = CLU(r_table, x_n, y, z_n) - CLU(r _table, x, y, z_n);

	530 c2_r = CLU(r_table, x_n, y_n, z_n) - CLU (r_table, x_n, y, z_n);

	531 c3_r = CLU(r_table, x, y, z_n) - c0_r;

	532 c1_g = CLU(g_table, x_n, y, z_n) - CLU(g _table, x, y, z_n);

	533 c2_g = CLU(g_table, x_n, y_n, z_n) - CLU (g_table, x_n, y, z_n);

	534 c3_g = CLU(g_table, x, y, z_n) - c0_g;

	535 c1_b = CLU(b_table, x_n, y, z_n) - CLU(b _table, x, y, z_n);

	536 c2_b = CLU(b_table, x_n, y_n, z_n) - CLU (b_table, x_n, y, z_n);

	537 c3_b = CLU(b_table, x, y, z_n) - c0_b;

	538 }

	539 }

	540 } else {

	541 if (rx >= rz) { //ry > rx && rx >= rz

	542 c1_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x, y_n, z);

	543 c2_r = CLU(r_table, x, y_n, z) - c0_r;

	544 c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table , x_n, y_n, z);

	545 c1_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x, y_n, z);

	546 c2_g = CLU(g_table, x, y_n, z) - c0_g;

	547 c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table , x_n, y_n, z);

	548 c1_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x, y_n, z);

	549 c2_b = CLU(b_table, x, y_n, z) - c0_b;

	550 c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table , x_n, y_n, z);

	551 } else {

	552 if (ry >= rz) { //ry >= rz && rz > rx

	553 c1_r = CLU(r_table, x_n, y_n, z_n) - CLU (r_table, x, y_n, z_n);

	554 c2_r = CLU(r_table, x, y_n, z) - c0_r;

	555 c3_r = CLU(r_table, x, y_n, z_n) - CLU(r _table, x, y_n, z);

	556 c1_g = CLU(g_table, x_n, y_n, z_n) - CLU (g_table, x, y_n, z_n);

	557 c2_g = CLU(g_table, x, y_n, z) - c0_g;

	558 c3_g = CLU(g_table, x, y_n, z_n) - CLU(g _table, x, y_n, z);

	559 c1_b = CLU(b_table, x_n, y_n, z_n) - CLU (b_table, x, y_n, z_n);

	560 c2_b = CLU(b_table, x, y_n, z) - c0_b;

	561 c3_b = CLU(b_table, x, y_n, z_n) - CLU(b _table, x, y_n, z);

	562 } else { //rz > ry && ry > rx

	563 c1_r = CLU(r_table, x_n, y_n, z_n) - CLU (r_table, x, y_n, z_n);

	564 c2_r = CLU(r_table, x, y_n, z_n) - CLU(r _table, x, y, z_n);

	565 c3_r = CLU(r_table, x, y, z_n) - c0_r;

	566 c1_g = CLU(g_table, x_n, y_n, z_n) - CLU (g_table, x, y_n, z_n);

	567 c2_g = CLU(g_table, x, y_n, z_n) - CLU(g _table, x, y, z_n);

	568 c3_g = CLU(g_table, x, y, z_n) - c0_g;

	569 c1_b = CLU(b_table, x_n, y_n, z_n) - CLU (b_table, x, y_n, z_n);

	570 c2_b = CLU(b_table, x, y_n, z_n) - CLU(b _table, x, y, z_n);

	571 c3_b = CLU(b_table, x, y, z_n) - c0_b;

	572 }

	573 }

	574 }

	575

	576 clut_r = c0_r + c1_rrx + c2_rry + c3_r*rz;

	577 clut_g = c0_g + c1_grx + c2_gry + c3_g*rz;

	578 clut_b = c0_b + c1_brx + c2_bry + c3_b*rz;

	579

	580 dest++ = clamp_u8(clut_r255.0f);

	581 dest++ = clamp_u8(clut_g255.0f);

	582 dest++ = clamp_u8(clut_b255.0f);

	583 *dest++ = in_a;

	584 }

	585 }

	586

	587 // Using lcms' tetra interpolation code.

	588 static void qcms_transform_data_tetra_clut(qcms_transform transform, unsigned c har src, unsigned char *dest, size_t length) {

	589 unsigned int i;

	590 int xy_len = 1;

	591 int x_len = transform->grid_size;

	592 int len = x_len * x_len;

	593 float* r_table = transform->r_clut;

	594 float* g_table = transform->g_clut;

	595 float* b_table = transform->b_clut;

	596 float c0_r, c1_r, c2_r, c3_r;

	597 float c0_g, c1_g, c2_g, c3_g;

	598 float c0_b, c1_b, c2_b, c3_b;

	599 float clut_r, clut_g, clut_b;

	600 for (i = 0; i < length; i++) {

	601 unsigned char in_r = *src++;

	602 unsigned char in_g = *src++;

	603 unsigned char in_b = *src++;

	604 float linear_r = in_r/255.0f, linear_g=in_g/255.0f, linear_b = i n_b/255.0f;

	605

	606 int x = floor(linear_r * (transform->grid_size-1));

	607 int y = floor(linear_g * (transform->grid_size-1));

	608 int z = floor(linear_b * (transform->grid_size-1));

	609 int x_n = ceil(linear_r * (transform->grid_size-1));

	610 int y_n = ceil(linear_g * (transform->grid_size-1));

	611 int z_n = ceil(linear_b * (transform->grid_size-1));

	612 float rx = linear_r * (transform->grid_size-1) - x;

	613 float ry = linear_g * (transform->grid_size-1) - y;

	614 float rz = linear_b * (transform->grid_size-1) - z;

	615

	616 c0_r = CLU(r_table, x, y, z);

	617 c0_g = CLU(g_table, x, y, z);

	618 c0_b = CLU(b_table, x, y, z);

	619

	620 if( rx >= ry ) {

	621 if (ry >= rz) { //rx >= ry && ry >= rz

	622 c1_r = CLU(r_table, x_n, y, z) - c0_r;

	623 c2_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x_n, y, z);

	624 c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table , x_n, y_n, z);

	625 c1_g = CLU(g_table, x_n, y, z) - c0_g;

	626 c2_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x_n, y, z);

	627 c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table , x_n, y_n, z);

	628 c1_b = CLU(b_table, x_n, y, z) - c0_b;

	629 c2_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x_n, y, z);

	630 c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table , x_n, y_n, z);

	631 } else {

	632 if (rx >= rz) { //rx >= rz && rz >= ry

	633 c1_r = CLU(r_table, x_n, y, z) - c0_r;

	634 c2_r = CLU(r_table, x_n, y_n, z_n) - CLU (r_table, x_n, y, z_n);

	635 c3_r = CLU(r_table, x_n, y, z_n) - CLU(r _table, x_n, y, z);

	636 c1_g = CLU(g_table, x_n, y, z) - c0_g;

	637 c2_g = CLU(g_table, x_n, y_n, z_n) - CLU (g_table, x_n, y, z_n);

	638 c3_g = CLU(g_table, x_n, y, z_n) - CLU(g _table, x_n, y, z);

	639 c1_b = CLU(b_table, x_n, y, z) - c0_b;

	640 c2_b = CLU(b_table, x_n, y_n, z_n) - CLU (b_table, x_n, y, z_n);

	641 c3_b = CLU(b_table, x_n, y, z_n) - CLU(b _table, x_n, y, z);

	642 } else { //rz > rx && rx >= ry

	643 c1_r = CLU(r_table, x_n, y, z_n) - CLU(r _table, x, y, z_n);

	644 c2_r = CLU(r_table, x_n, y_n, z_n) - CLU (r_table, x_n, y, z_n);

	645 c3_r = CLU(r_table, x, y, z_n) - c0_r;

	646 c1_g = CLU(g_table, x_n, y, z_n) - CLU(g _table, x, y, z_n);

	647 c2_g = CLU(g_table, x_n, y_n, z_n) - CLU (g_table, x_n, y, z_n);

	648 c3_g = CLU(g_table, x, y, z_n) - c0_g;

	649 c1_b = CLU(b_table, x_n, y, z_n) - CLU(b _table, x, y, z_n);

	650 c2_b = CLU(b_table, x_n, y_n, z_n) - CLU (b_table, x_n, y, z_n);

	651 c3_b = CLU(b_table, x, y, z_n) - c0_b;

	652 }

	653 }

	654 } else {

	655 if (rx >= rz) { //ry > rx && rx >= rz

	656 c1_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x, y_n, z);

	657 c2_r = CLU(r_table, x, y_n, z) - c0_r;

	658 c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table , x_n, y_n, z);

	659 c1_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x, y_n, z);

	660 c2_g = CLU(g_table, x, y_n, z) - c0_g;

	661 c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table , x_n, y_n, z);

	662 c1_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x, y_n, z);

	663 c2_b = CLU(b_table, x, y_n, z) - c0_b;

	664 c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table , x_n, y_n, z);

	665 } else {

	666 if (ry >= rz) { //ry >= rz && rz > rx

	667 c1_r = CLU(r_table, x_n, y_n, z_n) - CLU (r_table, x, y_n, z_n);

	668 c2_r = CLU(r_table, x, y_n, z) - c0_r;

	669 c3_r = CLU(r_table, x, y_n, z_n) - CLU(r _table, x, y_n, z);

	670 c1_g = CLU(g_table, x_n, y_n, z_n) - CLU (g_table, x, y_n, z_n);

	671 c2_g = CLU(g_table, x, y_n, z) - c0_g;

	672 c3_g = CLU(g_table, x, y_n, z_n) - CLU(g _table, x, y_n, z);

	673 c1_b = CLU(b_table, x_n, y_n, z_n) - CLU (b_table, x, y_n, z_n);

	674 c2_b = CLU(b_table, x, y_n, z) - c0_b;

	675 c3_b = CLU(b_table, x, y_n, z_n) - CLU(b _table, x, y_n, z);

	676 } else { //rz > ry && ry > rx

	677 c1_r = CLU(r_table, x_n, y_n, z_n) - CLU (r_table, x, y_n, z_n);

	678 c2_r = CLU(r_table, x, y_n, z_n) - CLU(r _table, x, y, z_n);

	679 c3_r = CLU(r_table, x, y, z_n) - c0_r;

	680 c1_g = CLU(g_table, x_n, y_n, z_n) - CLU (g_table, x, y_n, z_n);

	681 c2_g = CLU(g_table, x, y_n, z_n) - CLU(g _table, x, y, z_n);

	682 c3_g = CLU(g_table, x, y, z_n) - c0_g;

	683 c1_b = CLU(b_table, x_n, y_n, z_n) - CLU (b_table, x, y_n, z_n);

	684 c2_b = CLU(b_table, x, y_n, z_n) - CLU(b _table, x, y, z_n);

	685 c3_b = CLU(b_table, x, y, z_n) - c0_b;

	686 }

	687 }

	688 }

	689

	690 clut_r = c0_r + c1_rrx + c2_rry + c3_r*rz;

	691 clut_g = c0_g + c1_grx + c2_gry + c3_g*rz;

	692 clut_b = c0_b + c1_brx + c2_bry + c3_b*rz;

	693

	694 dest++ = clamp_u8(clut_r255.0f);

	695 dest++ = clamp_u8(clut_g255.0f);

	696 dest++ = clamp_u8(clut_b255.0f);

	697 }

	698 }

	699

	700 static void qcms_transform_data_rgb_out_lut(qcms_transform transform, unsigned char src, unsigned char *dest, size_t length)

	701 {

	702 unsigned int i;

	703 float (*mat)[4] = transform->matrix;

	704 for (i = 0; i < length; i++) {

	705 unsigned char device_r = *src++;

	706 unsigned char device_g = *src++;

	707 unsigned char device_b = *src++;

	708 float out_device_r, out_device_g, out_device_b;

	709

	710 float linear_r = transform->input_gamma_table_r[device_r];

	711 float linear_g = transform->input_gamma_table_g[device_g];

	712 float linear_b = transform->input_gamma_table_b[device_b];

	713

	714 float out_linear_r = mat[0][0]linear_r + mat[1][0]linear_g + m at[2][0]*linear_b;

	715 float out_linear_g = mat[0][1]linear_r + mat[1][1]linear_g + m at[2][1]*linear_b;

	716 float out_linear_b = mat[0][2]linear_r + mat[1][2]linear_g + m at[2][2]*linear_b;

	717

	718 out_linear_r = clamp_float(out_linear_r);

	719 out_linear_g = clamp_float(out_linear_g);

	720 out_linear_b = clamp_float(out_linear_b);

	721

	722 out_device_r = lut_interp_linear(out_linear_r,

	723 transform->output_gamma_lut_r, transform->output _gamma_lut_r_length);

	724 out_device_g = lut_interp_linear(out_linear_g,

	725 transform->output_gamma_lut_g, transform->output _gamma_lut_g_length);

	726 out_device_b = lut_interp_linear(out_linear_b,

	727 transform->output_gamma_lut_b, transform->output _gamma_lut_b_length);

	728

	729 dest++ = clamp_u8(out_device_r255);

	730 dest++ = clamp_u8(out_device_g255);

	731 dest++ = clamp_u8(out_device_b255);

	732 }

	733 }

	734

	735 static void qcms_transform_data_rgba_out_lut(qcms_transform transform, unsigned char src, unsigned char *dest, size_t length)

	736 {

	737 unsigned int i;

	738 float (*mat)[4] = transform->matrix;

	739 for (i = 0; i < length; i++) {

	740 unsigned char device_r = *src++;

	741 unsigned char device_g = *src++;

	742 unsigned char device_b = *src++;

	743 unsigned char alpha = *src++;

	744 float out_device_r, out_device_g, out_device_b;

	745

	746 float linear_r = transform->input_gamma_table_r[device_r];

	747 float linear_g = transform->input_gamma_table_g[device_g];

	748 float linear_b = transform->input_gamma_table_b[device_b];

	749

	750 float out_linear_r = mat[0][0]linear_r + mat[1][0]linear_g + m at[2][0]*linear_b;

	751 float out_linear_g = mat[0][1]linear_r + mat[1][1]linear_g + m at[2][1]*linear_b;

	752 float out_linear_b = mat[0][2]linear_r + mat[1][2]linear_g + m at[2][2]*linear_b;

	753

	754 out_linear_r = clamp_float(out_linear_r);

	755 out_linear_g = clamp_float(out_linear_g);

	756 out_linear_b = clamp_float(out_linear_b);

	757

	758 out_device_r = lut_interp_linear(out_linear_r,

	759 transform->output_gamma_lut_r, transform->output _gamma_lut_r_length);

	760 out_device_g = lut_interp_linear(out_linear_g,

	761 transform->output_gamma_lut_g, transform->output _gamma_lut_g_length);

	762 out_device_b = lut_interp_linear(out_linear_b,

	763 transform->output_gamma_lut_b, transform->output _gamma_lut_b_length);

	764

	765 dest++ = clamp_u8(out_device_r255);

	766 dest++ = clamp_u8(out_device_g255);

	767 dest++ = clamp_u8(out_device_b255);

	768 *dest++ = alpha;

	769 }

	770 }

	771

	772 #if 0

	773 static void qcms_transform_data_rgb_out_linear(qcms_transform transform, unsign ed char src, unsigned char *dest, size_t length)

	774 {

	775 int i;

	776 float (*mat)[4] = transform->matrix;

	777 for (i = 0; i < length; i++) {

	778 unsigned char device_r = *src++;

	779 unsigned char device_g = *src++;

	780 unsigned char device_b = *src++;

	781

	782 float linear_r = transform->input_gamma_table_r[device_r];

	783 float linear_g = transform->input_gamma_table_g[device_g];

	784 float linear_b = transform->input_gamma_table_b[device_b];

	785

	786 float out_linear_r = mat[0][0]linear_r + mat[1][0]linear_g + m at[2][0]*linear_b;

	787 float out_linear_g = mat[0][1]linear_r + mat[1][1]linear_g + m at[2][1]*linear_b;

	788 float out_linear_b = mat[0][2]linear_r + mat[1][2]linear_g + m at[2][2]*linear_b;

	789

	790 dest++ = clamp_u8(out_linear_r255);

	791 dest++ = clamp_u8(out_linear_g255);

	792 dest++ = clamp_u8(out_linear_b255);

	793 }

	794 }

	795 #endif

	796

	797 static struct precache_output precache_reference(struct precache_output p)

	798 {

	799 p->ref_count++;

	800 return p;

	801 }

	802

	803 static struct precache_output *precache_create()

	804 {

	805 struct precache_output *p = malloc(sizeof(struct precache_output));

	806 if (p)

	807 p->ref_count = 1;

	808 return p;

	809 }

	810

	811 void precache_release(struct precache_output *p)

	812 {

	813 if (--p->ref_count == 0) {

	814 free(p);

	815 }

	816 }

	817

	818 #ifdef HAS_POSIX_MEMALIGN

	819 static qcms_transform *transform_alloc(void)

	820 {

	821 qcms_transform *t;

	822 if (!posix_memalign(&t, 16, sizeof(*t))) {

	823 return t;

	824 } else {

	825 return NULL;

	826 }

	827 }

	828 static void transform_free(qcms_transform *t)

	829 {

	830 free(t);

	831 }

	832 #else

	833 static qcms_transform *transform_alloc(void)

	834 {

	835 /* transform needs to be aligned on a 16byte boundrary */

	836 char original_block = calloc(sizeof(qcms_transform) + sizeof(void) + 1 6, 1);

	837 /* make room for a pointer to the block returned by calloc */

	838 void transform_start = original_block + sizeof(void);

	839 /* align transform_start */

	840 qcms_transform transform_aligned = (qcms_transform)(((uintptr_t)transf orm_start + 15) & ~0xf);

	841

	842 /* store a pointer to the block returned by calloc so that we can free i t later */

	843 void (original_block_ptr) = (void)transform_aligned;

	844 if (!original_block)

	845 return NULL;

	846 original_block_ptr--;

	847 *original_block_ptr = original_block;

	848

	849 return transform_aligned;

	850 }

	851 static void transform_free(qcms_transform *t)

	852 {

	853 /* get at the pointer to the unaligned block returned by calloc */

	854 void p = (void)t;

	855 p--;

	856 free(*p);

	857 }

	858 #endif

	859

	860 void qcms_transform_release(qcms_transform *t)

	861 {

	862 /* ensure we only free the gamma tables once even if there are

	863 * multiple references to the same data */

	864

	865 if (t->output_table_r)

	866 precache_release(t->output_table_r);

	867 if (t->output_table_g)

	868 precache_release(t->output_table_g);

	869 if (t->output_table_b)

	870 precache_release(t->output_table_b);

	871

	872 free(t->input_gamma_table_r);

	873 if (t->input_gamma_table_g != t->input_gamma_table_r)

	874 free(t->input_gamma_table_g);

	875 if (t->input_gamma_table_g != t->input_gamma_table_r &&

	876 t->input_gamma_table_g != t->input_gamma_table_b)

	877 free(t->input_gamma_table_b);

	878

	879 free(t->input_gamma_table_gray);

	880

	881 free(t->output_gamma_lut_r);

	882 free(t->output_gamma_lut_g);

	883 free(t->output_gamma_lut_b);

	884

	885 transform_free(t);

	886 }

	887

	888 #ifdef X86

	889 // Determine if we can build with SSE2 (this was partly copied from jmorecfg.h i n

	890 // mozilla/jpeg)

	891 // -------------------------------------------------------------------------

	892 #if defined(_M_IX86) && defined(_MSC_VER)

	893 #define HAS_CPUID

	894 /* Get us a CPUID function. Avoid clobbering EBX because sometimes it's the PIC

	895 register - I'm not sure if that ever happens on windows, but cpuid isn't

	896 on the critical path so we just preserve the register to be safe and to be

	897 consistent with the non-windows version. */

	898 static void cpuid(uint32_t fxn, uint32_t a, uint32_t b, uint32_t c, uint32_t d) {

	899 uint32_t a_, b_, c_, d_;

	900 __asm {

	901 xchg ebx, esi

	902 mov eax, fxn

	903 cpuid

	904 mov a_, eax

	905 mov b_, ebx

	906 mov c_, ecx

	907 mov d_, edx

	908 xchg ebx, esi

	909 }

	910 *a = a_;

	911 *b = b_;

	912 *c = c_;

	913 *d = d_;

	914 }

	915 #elif (defined(__GNUC__) \|\| defined(__SUNPRO_C)) && (defined(__i386__) \|\| define d(__i386))

	916 #define HAS_CPUID

	917 /* Get us a CPUID function. We can't use ebx because it's the PIC register on

	918 some platforms, so we use ESI instead and save ebx to avoid clobbering it. */

	919 static void cpuid(uint32_t fxn, uint32_t a, uint32_t b, uint32_t c, uint32_t d) {

	920

	921 uint32_t a_, b_, c_, d_;

	922 __asm__ __volatile__ ("xchgl %%ebx, %%esi; cpuid; xchgl %%ebx, %%esi;"

	923 : "=a" (a_), "=S" (b_), "=c" (c_), "=d" (d_) : "a" (fxn));

	924 *a = a_;

	925 *b = b_;

	926 *c = c_;

	927 *d = d_;

	928 }

	929 #endif

	930

	931 // -------------------------Runtime SSEx Detection-----------------------------

	932

	933 /* MMX is always supported per

	934 * Gecko v1.9.1 minimum CPU requirements */

	935 #define SSE1_EDX_MASK (1UL << 25)

	936 #define SSE2_EDX_MASK (1UL << 26)

	937 #define SSE3_ECX_MASK (1UL << 0)

	938

	939 static int sse_version_available(void)

	940 {

	941 #if defined(__x86_64__) \|\| defined(__x86_64) \|\| defined(_M_AMD64)

	942 /* we know at build time that 64-bit CPUs always have SSE2

	943 * this tells the compiler that non-SSE2 branches will never be

	944 * taken (i.e. OK to optimze away the SSE1 and non-SIMD code */

	945 return 2;

	946 #elif defined(HAS_CPUID)

	947 static int sse_version = -1;

	948 uint32_t a, b, c, d;

	949 uint32_t function = 0x00000001;

	950

	951 if (sse_version == -1) {

	952 sse_version = 0;

	953 cpuid(function, &a, &b, &c, &d);

	954 if (c & SSE3_ECX_MASK)

	955 sse_version = 3;

	956 else if (d & SSE2_EDX_MASK)

	957 sse_version = 2;

	958 else if (d & SSE1_EDX_MASK)

	959 sse_version = 1;

	960 }

	961

	962 return sse_version;

	963 #else

	964 return 0;

	965 #endif

	966 }

	967 #endif

	968

	969 static const struct matrix bradford_matrix = {{ { 0.8951f, 0.2664f,-0.1614f},

	970 {-0.7502f, 1.7135f, 0.0367f},

	971 { 0.0389f,-0.0685f, 1.0296f}},

	972 false};

	973

	974 static const struct matrix bradford_matrix_inv = {{ { 0.9869929f,-0.1470543f, 0. 1599627f},

	975 { 0.4323053f, 0.5183603f, 0. 0492912f},

	976 {-0.0085287f, 0.0400428f, 0. 9684867f}},

	977 false};

	978

	979 // See ICCv4 E.3

	980 struct matrix compute_whitepoint_adaption(float X, float Y, float Z) {

	981 float p = (0.96422fbradford_matrix.m[0][0] + 1.000fbradford_matrix.m[1 ][0] + 0.82521f*bradford_matrix.m[2][0]) /

	982 (Xbradford_matrix.m[0][0] + Ybradford_matrix.m[1][0] + Z*bradford_matrix.m[2][0] );

	983 float y = (0.96422fbradford_matrix.m[0][1] + 1.000fbradford_matrix.m[1 ][1] + 0.82521f*bradford_matrix.m[2][1]) /

	984 (Xbradford_matrix.m[0][1] + Ybradford_matrix.m[1][1] + Z*bradford_matrix.m[2][1] );

	985 float b = (0.96422fbradford_matrix.m[0][2] + 1.000fbradford_matrix.m[1 ][2] + 0.82521f*bradford_matrix.m[2][2]) /

	986 (Xbradford_matrix.m[0][2] + Ybradford_matrix.m[1][2] + Z*bradford_matrix.m[2][2] );

	987 struct matrix white_adaption = {{ {p,0,0}, {0,y,0}, {0,0,b}}, false};

	988 return matrix_multiply( bradford_matrix_inv, matrix_multiply(white_adapt ion, bradford_matrix) );

	989 }

	990

	991 void qcms_profile_precache_output_transform(qcms_profile *profile)

	992 {

	993 /* we only support precaching on rgb profiles */

	994 if (profile->color_space != RGB_SIGNATURE)

	995 return;

	996

	997 /* don't precache since we will use the B2A LUT */

	998 if (profile->B2A0)

	999 return;

	1000

	1001 /* don't precache since we will use the mBA LUT */

	1002 if (profile->mBA)

	1003 return;

	1004

	1005 /* don't precache if we do not have the TRC curves */

	1006 if (!profile->redTRC \|\| !profile->greenTRC \|\| !profile->blueTRC)

	1007 return;

	1008

	1009 if (!profile->output_table_r) {

	1010 profile->output_table_r = precache_create();

	1011 if (profile->output_table_r &&

	1012 !compute_precache(profile->redTRC, profile->outp ut_table_r->data)) {

	1013 precache_release(profile->output_table_r);

	1014 profile->output_table_r = NULL;

	1015 }

	1016 }

	1017 if (!profile->output_table_g) {

	1018 profile->output_table_g = precache_create();

	1019 if (profile->output_table_g &&

	1020 !compute_precache(profile->greenTRC, profile->ou tput_table_g->data)) {

	1021 precache_release(profile->output_table_g);

	1022 profile->output_table_g = NULL;

	1023 }

	1024 }

	1025 if (!profile->output_table_b) {

	1026 profile->output_table_b = precache_create();

	1027 if (profile->output_table_b &&

	1028 !compute_precache(profile->blueTRC, profile->out put_table_b->data)) {

	1029 precache_release(profile->output_table_b);

	1030 profile->output_table_b = NULL;

	1031 }

	1032 }

	1033 }

	1034

	1035 /* Replace the current transformation with a LUT transformation using a given nu mber of sample points */

	1036 qcms_transform* qcms_transform_precacheLUT_float(qcms_transform transform, qcms _profile in, qcms_profile *out,

	1037 int samples, qcms_data_type in_ type)

	1038 {

	1039 /* The range between which 2 consecutive sample points can be used to in terpolate */

	1040 uint16_t x,y,z;

	1041 uint32_t l;

	1042 uint32_t lutSize = 3 * samples * samples * samples;

	1043 float* src = NULL;

	1044 float* dest = NULL;

	1045 float* lut = NULL;

	1046

	1047 src = malloc(lutSize*sizeof(float));

	1048 dest = malloc(lutSize*sizeof(float));

	1049

	1050 if (src && dest) {

	1051 /* Prepare a list of points we want to sample */

	1052 l = 0;

	1053 for (x = 0; x < samples; x++) {

	1054 for (y = 0; y < samples; y++) {

	1055 for (z = 0; z < samples; z++) {

	1056 src[l++] = x / (float)(samples-1);

	1057 src[l++] = y / (float)(samples-1);

	1058 src[l++] = z / (float)(samples-1);

	1059 }

	1060 }

	1061 }

	1062

	1063 lut = qcms_chain_transform(in, out, src, dest, lutSize);

	1064 if (lut) {

	1065 transform->r_clut = &lut[0];

	1066 transform->g_clut = &lut[1];

	1067 transform->b_clut = &lut[2];

	1068 transform->grid_size = samples;

	1069 if (in_type == QCMS_DATA_RGBA_8) {

	1070 transform->transform_fn = qcms_transform_data_te tra_clut_rgba;

	1071 } else {

	1072 transform->transform_fn = qcms_transform_data_te tra_clut;

	1073 }

	1074 }

	1075 }

	1076

	1077

	1078 //XXX: qcms_modular_transform_data may return either the src or dest buf fer. If so it must not be free-ed

	1079 if (src && lut != src) {

	1080 free(src);

	1081 } else if (dest && lut != src) {

	1082 free(dest);

	1083 }

	1084

	1085 if (lut == NULL) {

	1086 return NULL;

	1087 }

	1088 return transform;

	1089 }

	1090

	1091 #define NO_MEM_TRANSFORM NULL

	1092

	1093 qcms_transform* qcms_transform_create(

	1094 qcms_profile *in, qcms_data_type in_type,

	1095 qcms_profile *out, qcms_data_type out_type,

	1096 qcms_intent intent)

	1097 {

	1098 bool precache = false;

	1099

	1100 qcms_transform *transform = transform_alloc();

	1101 if (!transform) {

	1102 return NULL;

	1103 }

	1104 if (out_type != QCMS_DATA_RGB_8 &&

	1105 out_type != QCMS_DATA_RGBA_8) {

	1106 assert(0 && "output type");

	1107 transform_free(transform);

	1108 return NULL;

	1109 }

	1110

	1111 if (out->output_table_r &&

	1112 out->output_table_g &&

	1113 out->output_table_b) {

	1114 precache = true;

	1115 }

	1116

	1117 if (qcms_supports_iccv4 && (in->A2B0 \|\| out->B2A0 \|\| in->mAB \|\| out->mAB )) {

	1118 // Precache the transformation to a CLUT 33x33x33 in size.

	1119 // 33 is used by many profiles and works well in pratice.

	1120 // This evenly divides 256 into blocks of 8x8x8.

	1121 // TODO For transforming small data sets of about 200x200 or les s

	1122 // precaching should be avoided.

	1123 qcms_transform *result = qcms_transform_precacheLUT_float(transf orm, in, out, 33, in_type);

	1124 if (!result) {

	1125 assert(0 && "precacheLUT failed");

	1126 transform_free(transform);

	1127 return NULL;

	1128 }

	1129 return result;

	1130 }

	1131

	1132 if (precache) {

	1133 transform->output_table_r = precache_reference(out->output_table _r);

	1134 transform->output_table_g = precache_reference(out->output_table _g);

	1135 transform->output_table_b = precache_reference(out->output_table _b);

	1136 } else {

	1137 if (!out->redTRC \|\| !out->greenTRC \|\| !out->blueTRC) {

	1138 qcms_transform_release(transform);

	1139 return NO_MEM_TRANSFORM;

	1140 }

	1141 build_output_lut(out->redTRC, &transform->output_gamma_lut_r, &t ransform->output_gamma_lut_r_length);

	1142 build_output_lut(out->greenTRC, &transform->output_gamma_lut_g, &transform->output_gamma_lut_g_length);

	1143 build_output_lut(out->blueTRC, &transform->output_gamma_lut_b, & transform->output_gamma_lut_b_length);

	1144 if (!transform->output_gamma_lut_r \|\| !transform->output_gamma_l ut_g \|\| !transform->output_gamma_lut_b) {

	1145 qcms_transform_release(transform);

	1146 return NO_MEM_TRANSFORM;

	1147 }

	1148 }

	1149

	1150 if (in->color_space == RGB_SIGNATURE) {

	1151 struct matrix in_matrix, out_matrix, result;

	1152

	1153 if (in_type != QCMS_DATA_RGB_8 &&

	1154 in_type != QCMS_DATA_RGBA_8){

	1155 assert(0 && "input type");

	1156 transform_free(transform);

	1157 return NULL;

	1158 }

	1159 if (precache) {

	1160 #ifdef X86

	1161 if (sse_version_available() >= 2) {

	1162 if (in_type == QCMS_DATA_RGB_8)

	1163 transform->transform_fn = qcms_transform_dat a_rgb_out_lut_sse2;

	1164 else

	1165 transform->transform_fn = qcms_transform_dat a_rgba_out_lut_sse2;

	1166

	1167 #if !(defined(_MSC_VER) && defined(_M_AMD64))

	1168 /* Microsoft Compiler for x64 doesn't support MMX.

	1169 * SSE code uses MMX so that we disable on x64 */

	1170 } else

	1171 if (sse_version_available() >= 1) {

	1172 if (in_type == QCMS_DATA_RGB_8)

	1173 transform->transform_fn = qcms_transform_dat a_rgb_out_lut_sse1;

	1174 else

	1175 transform->transform_fn = qcms_transform_dat a_rgba_out_lut_sse1;

	1176 #endif

	1177 } else

	1178 #endif

	1179 {

	1180 if (in_type == QCMS_DATA_RGB_8)

	1181 transform->transform_fn = qcms_transform _data_rgb_out_lut_precache;

	1182 else

	1183 transform->transform_fn = qcms_transform _data_rgba_out_lut_precache;

	1184 }

	1185 } else {

	1186 if (in_type == QCMS_DATA_RGB_8)

	1187 transform->transform_fn = qcms_transform_data_rg b_out_lut;

	1188 else

	1189 transform->transform_fn = qcms_transform_data_rg ba_out_lut;

	1190 }

	1191

	1192 //XXX: avoid duplicating tables if we can

	1193 transform->input_gamma_table_r = build_input_gamma_table(in->red TRC);

	1194 transform->input_gamma_table_g = build_input_gamma_table(in->gre enTRC);

	1195 transform->input_gamma_table_b = build_input_gamma_table(in->blu eTRC);

	1196 if (!transform->input_gamma_table_r \|\| !transform->input_gamma_t able_g \|\| !transform->input_gamma_table_b) {

	1197 qcms_transform_release(transform);

	1198 return NO_MEM_TRANSFORM;

	1199 }

	1200

	1201

	1202 /* build combined colorant matrix */

	1203 in_matrix = build_colorant_matrix(in);

	1204 out_matrix = build_colorant_matrix(out);

	1205 out_matrix = matrix_invert(out_matrix);

	1206 if (out_matrix.invalid) {

	1207 qcms_transform_release(transform);

	1208 return NULL;

	1209 }

	1210 result = matrix_multiply(out_matrix, in_matrix);

	1211

	1212 /* store the results in column major mode

	1213 * this makes doing the multiplication with sse easier */

	1214 transform->matrix[0][0] = result.m[0][0];

	1215 transform->matrix[1][0] = result.m[0][1];

	1216 transform->matrix[2][0] = result.m[0][2];

	1217 transform->matrix[0][1] = result.m[1][0];

	1218 transform->matrix[1][1] = result.m[1][1];

	1219 transform->matrix[2][1] = result.m[1][2];

	1220 transform->matrix[0][2] = result.m[2][0];

	1221 transform->matrix[1][2] = result.m[2][1];

	1222 transform->matrix[2][2] = result.m[2][2];

	1223

	1224 } else if (in->color_space == GRAY_SIGNATURE) {

	1225 if (in_type != QCMS_DATA_GRAY_8 &&

	1226 in_type != QCMS_DATA_GRAYA_8){

	1227 assert(0 && "input type");

	1228 transform_free(transform);

	1229 return NULL;

	1230 }

	1231

	1232 transform->input_gamma_table_gray = build_input_gamma_table(in-> grayTRC);

	1233 if (!transform->input_gamma_table_gray) {

	1234 qcms_transform_release(transform);

	1235 return NO_MEM_TRANSFORM;

	1236 }

	1237

	1238 if (precache) {

	1239 if (in_type == QCMS_DATA_GRAY_8) {

	1240 transform->transform_fn = qcms_transform_data_gr ay_out_precache;

	1241 } else {

	1242 transform->transform_fn = qcms_transform_data_gr aya_out_precache;

	1243 }

	1244 } else {

	1245 if (in_type == QCMS_DATA_GRAY_8) {

	1246 transform->transform_fn = qcms_transform_data_gr ay_out_lut;

	1247 } else {

	1248 transform->transform_fn = qcms_transform_data_gr aya_out_lut;

	1249 }

	1250 }

	1251 } else {

	1252 assert(0 && "unexpected colorspace");

	1253 transform_free(transform);

	1254 return NULL;

	1255 }

	1256 return transform;

	1257 }

	1258

	1259 #if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)

	1260 /* we need this to avoid crashes when gcc assumes the stack is 128bit aligned */

	1261 __attribute__((__force_align_arg_pointer__))

	1262 #endif

	1263 void qcms_transform_data(qcms_transform transform, void src, void *dest, size_ t length)

	1264 {

	1265 transform->transform_fn(transform, src, dest, length);

	1266 }

	1267

	1268 qcms_bool qcms_supports_iccv4;

	1269 void qcms_enable_iccv4()

	1270 {

	1271 qcms_supports_iccv4 = true;

	1272 }

OLD	NEW

« no previous file with comments | « third_party/qcms/src/qcmstypes.h ('k') | third_party/qcms/src/transform-sse1.c » ('j') | no next file with comments »