Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(122)

Side by Side Diff: third_party/qcms/src/transform-sse2.c

Issue 1414493004: [qcms] Reduce RMS color error for qcms_transform_data_rgb(a)_out_lut (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/qcms/README.chromium ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // qcms 1 // qcms
2 // Copyright (C) 2009 Mozilla Foundation 2 // Copyright (C) 2009 Mozilla Foundation
3 // Copyright (C) 2015 Intel Corporation 3 // Copyright (C) 2015 Intel Corporation
4 // 4 //
5 // Permission is hereby granted, free of charge, to any person obtaining 5 // Permission is hereby granted, free of charge, to any person obtaining
6 // a copy of this software and associated documentation files (the "Software"), 6 // a copy of this software and associated documentation files (the "Software"),
7 // to deal in the Software without restriction, including without limitation 7 // to deal in the Software without restriction, including without limitation
8 // the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 // and/or sell copies of the Software, and to permit persons to whom the Softwar e 9 // and/or sell copies of the Software, and to permit persons to whom the Softwar e
10 // is furnished to do so, subject to the following conditions: 10 // is furnished to do so, subject to the following conditions:
11 // 11 //
12 // The above copyright notice and this permission notice shall be included in 12 // The above copyright notice and this permission notice shall be included in
13 // all copies or substantial portions of the Software. 13 // all copies or substantial portions of the Software.
14 // 14 //
15 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 16 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
17 // THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 // THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22 22
23 #include <emmintrin.h> 23 #include <emmintrin.h>
24 24
25 #include "qcmsint.h" 25 #include "qcmsint.h"
26 26
27 /* pre-shuffled: just load these into XMM reg instead of load-scalar/shufps sequ ence */ 27 /* pre-shuffled: just load these into XMM reg instead of load-scalar/shufps sequ ence */
28 #define FLOATSCALE (float)(PRECACHE_OUTPUT_SIZE) 28 #define FLOATSCALE (float)(PRECACHE_OUTPUT_SIZE - 1)
29 #define CLAMPMAXVAL ( ((float) (PRECACHE_OUTPUT_SIZE - 1)) / PRECACHE_OUTPUT_SIZ E ) 29 #define CLAMPMAXVAL 1.0f
30
30 static const ALIGN float floatScaleX4[4] = 31 static const ALIGN float floatScaleX4[4] =
31 { FLOATSCALE, FLOATSCALE, FLOATSCALE, FLOATSCALE}; 32 { FLOATSCALE, FLOATSCALE, FLOATSCALE, FLOATSCALE};
32 static const ALIGN float clampMaxValueX4[4] = 33 static const ALIGN float clampMaxValueX4[4] =
33 { CLAMPMAXVAL, CLAMPMAXVAL, CLAMPMAXVAL, CLAMPMAXVAL}; 34 { CLAMPMAXVAL, CLAMPMAXVAL, CLAMPMAXVAL, CLAMPMAXVAL};
34 35
35 void qcms_transform_data_rgb_out_lut_sse2(qcms_transform *transform, 36 void qcms_transform_data_rgb_out_lut_sse2(qcms_transform *transform,
36 unsigned char *src, 37 unsigned char *src,
37 unsigned char *dest, 38 unsigned char *dest,
38 size_t length, 39 size_t length,
39 qcms_format_type output_format) 40 qcms_format_type output_format)
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after
96 vec_r = _mm_shuffle_ps(vec_r, vec_r, 0); 97 vec_r = _mm_shuffle_ps(vec_r, vec_r, 0);
97 vec_g = _mm_shuffle_ps(vec_g, vec_g, 0); 98 vec_g = _mm_shuffle_ps(vec_g, vec_g, 0);
98 vec_b = _mm_shuffle_ps(vec_b, vec_b, 0); 99 vec_b = _mm_shuffle_ps(vec_b, vec_b, 0);
99 100
100 /* gamma * matrix */ 101 /* gamma * matrix */
101 vec_r = _mm_mul_ps(vec_r, mat0); 102 vec_r = _mm_mul_ps(vec_r, mat0);
102 vec_g = _mm_mul_ps(vec_g, mat1); 103 vec_g = _mm_mul_ps(vec_g, mat1);
103 vec_b = _mm_mul_ps(vec_b, mat2); 104 vec_b = _mm_mul_ps(vec_b, mat2);
104 105
105 /* crunch, crunch, crunch */ 106 /* crunch, crunch, crunch */
106 vec_r = _mm_add_ps(vec_r, _mm_add_ps(vec_g, vec_b)); 107 vec_r = _mm_add_ps(vec_g, _mm_add_ps(vec_r, vec_b));
107 vec_r = _mm_max_ps(min, vec_r); 108 vec_r = _mm_max_ps(min, vec_r);
108 vec_r = _mm_min_ps(max, vec_r); 109 vec_r = _mm_min_ps(max, vec_r);
109 result = _mm_mul_ps(vec_r, scale); 110 result = _mm_mul_ps(vec_r, scale);
110 111
111 /* store calc'd output tables indices */ 112 /* store calc'd output tables indices */
112 _mm_store_si128((__m128i*)output, _mm_cvtps_epi32(result)); 113 _mm_store_si128((__m128i*)output, _mm_cvtps_epi32(result));
113 114
114 /* load for next loop while store completes */ 115 /* load for next loop while store completes */
115 vec_r = _mm_load_ss(&igtbl_r[src[0]]); 116 vec_r = _mm_load_ss(&igtbl_r[src[0]]);
116 vec_g = _mm_load_ss(&igtbl_g[src[1]]); 117 vec_g = _mm_load_ss(&igtbl_g[src[1]]);
(...skipping 10 matching lines...) Expand all
127 /* handle final (maybe only) pixel */ 128 /* handle final (maybe only) pixel */
128 129
129 vec_r = _mm_shuffle_ps(vec_r, vec_r, 0); 130 vec_r = _mm_shuffle_ps(vec_r, vec_r, 0);
130 vec_g = _mm_shuffle_ps(vec_g, vec_g, 0); 131 vec_g = _mm_shuffle_ps(vec_g, vec_g, 0);
131 vec_b = _mm_shuffle_ps(vec_b, vec_b, 0); 132 vec_b = _mm_shuffle_ps(vec_b, vec_b, 0);
132 133
133 vec_r = _mm_mul_ps(vec_r, mat0); 134 vec_r = _mm_mul_ps(vec_r, mat0);
134 vec_g = _mm_mul_ps(vec_g, mat1); 135 vec_g = _mm_mul_ps(vec_g, mat1);
135 vec_b = _mm_mul_ps(vec_b, mat2); 136 vec_b = _mm_mul_ps(vec_b, mat2);
136 137
137 vec_r = _mm_add_ps(vec_r, _mm_add_ps(vec_g, vec_b)); 138 vec_r = _mm_add_ps(vec_g, _mm_add_ps(vec_r, vec_b));
138 vec_r = _mm_max_ps(min, vec_r); 139 vec_r = _mm_max_ps(min, vec_r);
139 vec_r = _mm_min_ps(max, vec_r); 140 vec_r = _mm_min_ps(max, vec_r);
140 result = _mm_mul_ps(vec_r, scale); 141 result = _mm_mul_ps(vec_r, scale);
141 142
142 _mm_store_si128((__m128i*)output, _mm_cvtps_epi32(result)); 143 _mm_store_si128((__m128i*)output, _mm_cvtps_epi32(result));
143 144
144 dest[r_out] = otdata_r[output[0]]; 145 dest[r_out] = otdata_r[output[0]];
145 dest[1] = otdata_g[output[1]]; 146 dest[1] = otdata_g[output[1]];
146 dest[b_out] = otdata_b[output[2]]; 147 dest[b_out] = otdata_b[output[2]];
147 } 148 }
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after
216 /* gamma * matrix */ 217 /* gamma * matrix */
217 vec_r = _mm_mul_ps(vec_r, mat0); 218 vec_r = _mm_mul_ps(vec_r, mat0);
218 vec_g = _mm_mul_ps(vec_g, mat1); 219 vec_g = _mm_mul_ps(vec_g, mat1);
219 vec_b = _mm_mul_ps(vec_b, mat2); 220 vec_b = _mm_mul_ps(vec_b, mat2);
220 221
221 /* store alpha for this pixel; load alpha for next */ 222 /* store alpha for this pixel; load alpha for next */
222 dest[3] = alpha; 223 dest[3] = alpha;
223 alpha = src[3]; 224 alpha = src[3];
224 225
225 /* crunch, crunch, crunch */ 226 /* crunch, crunch, crunch */
226 vec_r = _mm_add_ps(vec_r, _mm_add_ps(vec_g, vec_b)); 227 vec_r = _mm_add_ps(vec_g, _mm_add_ps(vec_r, vec_b));
227 vec_r = _mm_max_ps(min, vec_r); 228 vec_r = _mm_max_ps(min, vec_r);
228 vec_r = _mm_min_ps(max, vec_r); 229 vec_r = _mm_min_ps(max, vec_r);
229 result = _mm_mul_ps(vec_r, scale); 230 result = _mm_mul_ps(vec_r, scale);
230 231
231 /* store calc'd output tables indices */ 232 /* store calc'd output tables indices */
232 _mm_store_si128((__m128i*)output, _mm_cvtps_epi32(result)); 233 _mm_store_si128((__m128i*)output, _mm_cvtps_epi32(result));
233 234
234 /* load gamma values for next loop while store completes */ 235 /* load gamma values for next loop while store completes */
235 vec_r = _mm_load_ss(&igtbl_r[src[0]]); 236 vec_r = _mm_load_ss(&igtbl_r[src[0]]);
236 vec_g = _mm_load_ss(&igtbl_g[src[1]]); 237 vec_g = _mm_load_ss(&igtbl_g[src[1]]);
(...skipping 12 matching lines...) Expand all
249 vec_r = _mm_shuffle_ps(vec_r, vec_r, 0); 250 vec_r = _mm_shuffle_ps(vec_r, vec_r, 0);
250 vec_g = _mm_shuffle_ps(vec_g, vec_g, 0); 251 vec_g = _mm_shuffle_ps(vec_g, vec_g, 0);
251 vec_b = _mm_shuffle_ps(vec_b, vec_b, 0); 252 vec_b = _mm_shuffle_ps(vec_b, vec_b, 0);
252 253
253 vec_r = _mm_mul_ps(vec_r, mat0); 254 vec_r = _mm_mul_ps(vec_r, mat0);
254 vec_g = _mm_mul_ps(vec_g, mat1); 255 vec_g = _mm_mul_ps(vec_g, mat1);
255 vec_b = _mm_mul_ps(vec_b, mat2); 256 vec_b = _mm_mul_ps(vec_b, mat2);
256 257
257 dest[3] = alpha; 258 dest[3] = alpha;
258 259
259 vec_r = _mm_add_ps(vec_r, _mm_add_ps(vec_g, vec_b)); 260 vec_r = _mm_add_ps(vec_g, _mm_add_ps(vec_r, vec_b));
260 vec_r = _mm_max_ps(min, vec_r); 261 vec_r = _mm_max_ps(min, vec_r);
261 vec_r = _mm_min_ps(max, vec_r); 262 vec_r = _mm_min_ps(max, vec_r);
262 result = _mm_mul_ps(vec_r, scale); 263 result = _mm_mul_ps(vec_r, scale);
263 264
264 _mm_store_si128((__m128i*)output, _mm_cvtps_epi32(result)); 265 _mm_store_si128((__m128i*)output, _mm_cvtps_epi32(result));
265 266
266 dest[r_out] = otdata_r[output[0]]; 267 dest[r_out] = otdata_r[output[0]];
267 dest[1] = otdata_g[output[1]]; 268 dest[1] = otdata_g[output[1]];
268 dest[b_out] = otdata_b[output[2]]; 269 dest[b_out] = otdata_b[output[2]];
269 } 270 }
(...skipping 178 matching lines...) Expand 10 before | Expand all | Expand 10 after
448 result = __mm_swizzle_epi32(result, bgra); 449 result = __mm_swizzle_epi32(result, bgra);
449 result = _mm_packus_epi16(result, result); 450 result = _mm_packus_epi16(result, result);
450 result = _mm_packus_epi16(result, result); 451 result = _mm_packus_epi16(result, result);
451 452
452 // store into uint32_t* pixel destination 453 // store into uint32_t* pixel destination
453 454
454 *(uint32_t *)dest = _mm_cvtsi128_si32(result); 455 *(uint32_t *)dest = _mm_cvtsi128_si32(result);
455 dest += 4; 456 dest += 4;
456 } 457 }
457 } 458 }
OLDNEW
« no previous file with comments | « third_party/qcms/README.chromium ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698