OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2016 Google Inc. | 2 * Copyright 2016 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #ifndef SkColorXform_opts_DEFINED | 8 #ifndef SkColorXform_opts_DEFINED |
9 #define SkColorXform_opts_DEFINED | 9 #define SkColorXform_opts_DEFINED |
10 | 10 |
11 #include "SkNx.h" | 11 #include "SkNx.h" |
12 #include "SkColorPriv.h" | 12 #include "SkColorPriv.h" |
13 #include "SkHalf.h" | 13 #include "SkHalf.h" |
14 #include "SkSRGB.h" | 14 #include "SkSRGB.h" |
15 #include "SkTemplates.h" | 15 #include "SkTemplates.h" |
16 | 16 |
17 namespace SK_OPTS_NS { | 17 namespace SK_OPTS_NS { |
18 | 18 |
19 static Sk4i linear_to_2dot2(const Sk4f& x) { | 19 // Strange that we need a wrapper on SkNx_cast to use as a function ptr. |
| 20 static Sk4i Sk4f_trunc(const Sk4f& x) { |
| 21 return SkNx_cast<int>(x); |
| 22 } |
| 23 |
| 24 static Sk4f linear_to_2dot2(const Sk4f& x) { |
20 // x^(29/64) is a very good approximation of the true value, x^(1/2.2). | 25 // x^(29/64) is a very good approximation of the true value, x^(1/2.2). |
21 auto x2 = x.rsqrt(), // x^(-1/2) | 26 auto x2 = x.rsqrt(), // x^(-1/2) |
22 x32 = x2.rsqrt().rsqrt().rsqrt().rsqrt(), // x^(-1/32) | 27 x32 = x2.rsqrt().rsqrt().rsqrt().rsqrt(), // x^(-1/32) |
23 x64 = x32.rsqrt(); // x^(+1/64) | 28 x64 = x32.rsqrt(); // x^(+1/64) |
24 | 29 |
25 // 29 = 32 - 2 - 1 | 30 // 29 = 32 - 2 - 1 |
26 return Sk4f_round(sk_clamp_0_255(255.0f * x2.invert() * x32 * x64.invert()))
; | 31 return 255.0f * x2.invert() * x32 * x64.invert(); |
27 } | 32 } |
28 | 33 |
29 enum DstGamma { | 34 enum DstGamma { |
30 // 8888 | 35 // 8888 |
31 kSRGB_DstGamma, | 36 kSRGB_DstGamma, |
32 k2Dot2_DstGamma, | 37 k2Dot2_DstGamma, |
33 kTable_DstGamma, | 38 kTable_DstGamma, |
34 | 39 |
35 // F16 | 40 // F16 |
36 kLinear_DstGamma, | 41 kLinear_DstGamma, |
37 }; | 42 }; |
38 | 43 |
39 template <DstGamma kDstGamma> | 44 template <DstGamma kDstGamma, bool kSwapRB> |
40 static void color_xform_RGB1(void* dst, const uint32_t* src, int len, | 45 static void color_xform_RGB1(void* dst, const uint32_t* src, int len, |
41 const float* const srcTables[3], const float matrix
[16], | 46 const float* const srcTables[3], const float matrix
[16], |
42 const uint8_t* const dstTables[3]) { | 47 const uint8_t* const dstTables[3]) { |
43 Sk4f rXgXbX = Sk4f::Load(matrix + 0), | 48 int kRShift = 0; |
44 rYgYbY = Sk4f::Load(matrix + 4), | 49 int kGShift = 8; |
45 rZgZbZ = Sk4f::Load(matrix + 8); | 50 int kBShift = 16; |
| 51 int kAShift = 24; |
| 52 if (kSwapRB) { |
| 53 kBShift = 0; |
| 54 kRShift = 16; |
| 55 } |
| 56 |
| 57 Sk4f rXgXbX = Sk4f::Load(matrix + 0), |
| 58 rYgYbY = Sk4f::Load(matrix + 4), |
| 59 rZgZbZ = Sk4f::Load(matrix + 8), |
| 60 rTgTbT = Sk4f::Load(matrix + 12); |
46 | 61 |
47 if (len >= 4) { | 62 if (len >= 4) { |
48 Sk4f reds, greens, blues; | 63 Sk4f reds, greens, blues; |
49 auto load_next_4 = [&reds, &greens, &blues, &src, &len, &srcTables] { | 64 auto load_next_4 = [&reds, &greens, &blues, &src, &len, &srcTables] { |
50 reds = Sk4f{srcTables[0][(src[0] >> 0) & 0xFF], | 65 reds = Sk4f{srcTables[0][(src[0] >> 0) & 0xFF], |
51 srcTables[0][(src[1] >> 0) & 0xFF], | 66 srcTables[0][(src[1] >> 0) & 0xFF], |
52 srcTables[0][(src[2] >> 0) & 0xFF], | 67 srcTables[0][(src[2] >> 0) & 0xFF], |
53 srcTables[0][(src[3] >> 0) & 0xFF]}; | 68 srcTables[0][(src[3] >> 0) & 0xFF]}; |
54 greens = Sk4f{srcTables[1][(src[0] >> 8) & 0xFF], | 69 greens = Sk4f{srcTables[1][(src[0] >> 8) & 0xFF], |
55 srcTables[1][(src[1] >> 8) & 0xFF], | 70 srcTables[1][(src[1] >> 8) & 0xFF], |
56 srcTables[1][(src[2] >> 8) & 0xFF], | 71 srcTables[1][(src[2] >> 8) & 0xFF], |
57 srcTables[1][(src[3] >> 8) & 0xFF]}; | 72 srcTables[1][(src[3] >> 8) & 0xFF]}; |
58 blues = Sk4f{srcTables[2][(src[0] >> 16) & 0xFF], | 73 blues = Sk4f{srcTables[2][(src[0] >> 16) & 0xFF], |
59 srcTables[2][(src[1] >> 16) & 0xFF], | 74 srcTables[2][(src[1] >> 16) & 0xFF], |
60 srcTables[2][(src[2] >> 16) & 0xFF], | 75 srcTables[2][(src[2] >> 16) & 0xFF], |
61 srcTables[2][(src[3] >> 16) & 0xFF]}; | 76 srcTables[2][(src[3] >> 16) & 0xFF]}; |
62 src += 4; | 77 src += 4; |
63 len -= 4; | 78 len -= 4; |
64 }; | 79 }; |
65 | 80 |
66 Sk4f dstReds, dstGreens, dstBlues; | 81 Sk4f dstReds, dstGreens, dstBlues; |
67 auto transform_4 = [&reds, &greens, &blues, &dstReds, &dstGreens, &dstBl
ues, &rXgXbX, | 82 auto transform_4 = [&reds, &greens, &blues, &dstReds, &dstGreens, &dstBl
ues, &rXgXbX, |
68 &rYgYbY, &rZgZbZ] { | 83 &rYgYbY, &rZgZbZ, &rTgTbT] { |
69 dstReds = rXgXbX[0]*reds + rYgYbY[0]*greens + rZgZbZ[0]*blues; | 84 dstReds = rXgXbX[0]*reds + rYgYbY[0]*greens + rZgZbZ[0]*blues + rT
gTbT[0]; |
70 dstGreens = rXgXbX[1]*reds + rYgYbY[1]*greens + rZgZbZ[1]*blues; | 85 dstGreens = rXgXbX[1]*reds + rYgYbY[1]*greens + rZgZbZ[1]*blues + rT
gTbT[1]; |
71 dstBlues = rXgXbX[2]*reds + rYgYbY[2]*greens + rZgZbZ[2]*blues; | 86 dstBlues = rXgXbX[2]*reds + rYgYbY[2]*greens + rZgZbZ[2]*blues + rT
gTbT[2]; |
72 }; | 87 }; |
73 | 88 |
74 auto store_4 = [&dstReds, &dstGreens, &dstBlues, &dst, &dstTables] { | 89 auto store_4 = [&dstReds, &dstGreens, &dstBlues, &dst, &dstTables, kRShi
ft, kGShift, |
| 90 kBShift, kAShift] { |
75 if (kSRGB_DstGamma == kDstGamma || k2Dot2_DstGamma == kDstGamma) { | 91 if (kSRGB_DstGamma == kDstGamma || k2Dot2_DstGamma == kDstGamma) { |
76 Sk4i (*linear_to_curve)(const Sk4f&) = | 92 Sk4f (*linear_to_curve)(const Sk4f&) = (kSRGB_DstGamma == kDstGa
mma) ? |
77 (kSRGB_DstGamma == kDstGamma) ? sk_linear_to_srgb : line
ar_to_2dot2; | 93 sk_linear_to_srgb_needs_trunc : linear_to_2dot2; |
| 94 Sk4i (*float_to_int)(const Sk4f&) = (kSRGB_DstGamma == kDstGamma
) ? |
| 95 Sk4f_trunc : Sk4f_round; |
78 | 96 |
79 auto reds = linear_to_curve(dstReds); | 97 dstReds = linear_to_curve(dstReds); |
80 auto greens = linear_to_curve(dstGreens); | 98 dstGreens = linear_to_curve(dstGreens); |
81 auto blues = linear_to_curve(dstBlues); | 99 dstBlues = linear_to_curve(dstBlues); |
82 | 100 |
83 auto rgba = (reds << SK_R32_SHIFT) | 101 dstReds = sk_clamp_0_255(dstReds); |
84 | (greens << SK_G32_SHIFT) | 102 dstGreens = sk_clamp_0_255(dstGreens); |
85 | (blues << SK_B32_SHIFT) | 103 dstBlues = sk_clamp_0_255(dstBlues); |
86 | (Sk4i{0xFF} << SK_A32_SHIFT); | 104 |
| 105 auto rgba = (float_to_int(dstReds) << kRShift) |
| 106 | (float_to_int(dstGreens) << kGShift) |
| 107 | (float_to_int(dstBlues) << kBShift) |
| 108 | (Sk4i{0xFF} << kAShift); |
87 rgba.store((uint32_t*) dst); | 109 rgba.store((uint32_t*) dst); |
88 | 110 |
89 dst = SkTAddOffset<void>(dst, 4 * sizeof(uint32_t)); | 111 dst = SkTAddOffset<void>(dst, 4 * sizeof(uint32_t)); |
90 } else if (kTable_DstGamma == kDstGamma) { | 112 } else if (kTable_DstGamma == kDstGamma) { |
91 Sk4f scaledReds = Sk4f::Min(Sk4f::Max(1023.0f * dstReds, 0.0
f), 1023.0f); | 113 Sk4f scaledReds = Sk4f::Min(Sk4f::Max(1023.0f * dstReds, 0.0
f), 1023.0f); |
92 Sk4f scaledGreens = Sk4f::Min(Sk4f::Max(1023.0f * dstGreens, 0.0
f), 1023.0f); | 114 Sk4f scaledGreens = Sk4f::Min(Sk4f::Max(1023.0f * dstGreens, 0.0
f), 1023.0f); |
93 Sk4f scaledBlues = Sk4f::Min(Sk4f::Max(1023.0f * dstBlues, 0.0
f), 1023.0f); | 115 Sk4f scaledBlues = Sk4f::Min(Sk4f::Max(1023.0f * dstBlues, 0.0
f), 1023.0f); |
94 | 116 |
95 Sk4i indicesReds = Sk4f_round(scaledReds); | 117 Sk4i indicesReds = Sk4f_round(scaledReds); |
96 Sk4i indicesGreens = Sk4f_round(scaledGreens); | 118 Sk4i indicesGreens = Sk4f_round(scaledGreens); |
97 Sk4i indicesBlues = Sk4f_round(scaledBlues); | 119 Sk4i indicesBlues = Sk4f_round(scaledBlues); |
98 | 120 |
99 uint32_t* dst32 = (uint32_t*) dst; | 121 uint32_t* dst32 = (uint32_t*) dst; |
100 dst32[0] = dstTables[0][indicesReds [0]] << SK_R32_SHIFT | 122 dst32[0] = dstTables[0][indicesReds [0]] << kRShift |
101 | dstTables[1][indicesGreens[0]] << SK_G32_SHIFT | 123 | dstTables[1][indicesGreens[0]] << kGShift |
102 | dstTables[2][indicesBlues [0]] << SK_B32_SHIFT | 124 | dstTables[2][indicesBlues [0]] << kBShift |
103 | 0xFF << SK_A32_SHIFT; | 125 | 0xFF << kAShift; |
104 dst32[1] = dstTables[0][indicesReds [1]] << SK_R32_SHIFT | 126 dst32[1] = dstTables[0][indicesReds [1]] << kRShift |
105 | dstTables[1][indicesGreens[1]] << SK_G32_SHIFT | 127 | dstTables[1][indicesGreens[1]] << kGShift |
106 | dstTables[2][indicesBlues [1]] << SK_B32_SHIFT | 128 | dstTables[2][indicesBlues [1]] << kBShift |
107 | 0xFF << SK_A32_SHIFT; | 129 | 0xFF << kAShift; |
108 dst32[2] = dstTables[0][indicesReds [2]] << SK_R32_SHIFT | 130 dst32[2] = dstTables[0][indicesReds [2]] << kRShift |
109 | dstTables[1][indicesGreens[2]] << SK_G32_SHIFT | 131 | dstTables[1][indicesGreens[2]] << kGShift |
110 | dstTables[2][indicesBlues [2]] << SK_B32_SHIFT | 132 | dstTables[2][indicesBlues [2]] << kBShift |
111 | 0xFF << SK_A32_SHIFT; | 133 | 0xFF << kAShift; |
112 dst32[3] = dstTables[0][indicesReds [3]] << SK_R32_SHIFT | 134 dst32[3] = dstTables[0][indicesReds [3]] << kRShift |
113 | dstTables[1][indicesGreens[3]] << SK_G32_SHIFT | 135 | dstTables[1][indicesGreens[3]] << kGShift |
114 | dstTables[2][indicesBlues [3]] << SK_B32_SHIFT | 136 | dstTables[2][indicesBlues [3]] << kBShift |
115 | 0xFF << SK_A32_SHIFT; | 137 | 0xFF << kAShift; |
116 | 138 |
117 dst = SkTAddOffset<void>(dst, 4 * sizeof(uint32_t)); | 139 dst = SkTAddOffset<void>(dst, 4 * sizeof(uint32_t)); |
118 } else { | 140 } else { |
119 Sk4h_store4(dst, SkFloatToHalf_finite(dstReds), | 141 Sk4h_store4(dst, SkFloatToHalf_finite(dstReds), |
120 SkFloatToHalf_finite(dstGreens), | 142 SkFloatToHalf_finite(dstGreens), |
121 SkFloatToHalf_finite(dstBlues), | 143 SkFloatToHalf_finite(dstBlues), |
122 SK_Half1); | 144 SK_Half1); |
123 dst = SkTAddOffset<void>(dst, 4 * sizeof(uint64_t)); | 145 dst = SkTAddOffset<void>(dst, 4 * sizeof(uint64_t)); |
124 } | 146 } |
125 }; | 147 }; |
126 | 148 |
127 load_next_4(); | 149 load_next_4(); |
128 | 150 |
129 while (len >= 4) { | 151 while (len >= 4) { |
130 transform_4(); | 152 transform_4(); |
131 load_next_4(); | 153 load_next_4(); |
132 store_4(); | 154 store_4(); |
133 } | 155 } |
134 | 156 |
135 transform_4(); | 157 transform_4(); |
136 store_4(); | 158 store_4(); |
137 } | 159 } |
138 | 160 |
139 while (len > 0) { | 161 while (len > 0) { |
140 // Splat r,g,b across a register each. | 162 // Splat r,g,b across a register each. |
141 auto r = Sk4f{srcTables[0][(*src >> 0) & 0xFF]}, | 163 auto r = Sk4f{srcTables[0][(*src >> 0) & 0xFF]}, |
142 g = Sk4f{srcTables[1][(*src >> 8) & 0xFF]}, | 164 g = Sk4f{srcTables[1][(*src >> 8) & 0xFF]}, |
143 b = Sk4f{srcTables[2][(*src >> 16) & 0xFF]}; | 165 b = Sk4f{srcTables[2][(*src >> 16) & 0xFF]}; |
144 | 166 |
145 auto dstPixel = rXgXbX*r + rYgYbY*g + rZgZbZ*b; | 167 auto dstPixel = rXgXbX*r + rYgYbY*g + rZgZbZ*b + rTgTbT; |
146 | 168 |
147 if (kSRGB_DstGamma == kDstGamma || k2Dot2_DstGamma == kDstGamma) { | 169 if (kSRGB_DstGamma == kDstGamma || k2Dot2_DstGamma == kDstGamma) { |
148 Sk4i (*linear_to_curve)(const Sk4f&) = | 170 Sk4f (*linear_to_curve)(const Sk4f&) = (kSRGB_DstGamma == kDstGamma)
? |
149 (kSRGB_DstGamma == kDstGamma) ? sk_linear_to_srgb : linear_t
o_2dot2; | 171 sk_linear_to_srgb_needs_trunc : linear_to_2dot2; |
| 172 Sk4i (*float_to_int)(const Sk4f&) = (kSRGB_DstGamma == kDstGamma) ? |
| 173 Sk4f_trunc : Sk4f_round; |
150 | 174 |
151 auto pixel = linear_to_curve(dstPixel); | 175 dstPixel = sk_clamp_0_255(linear_to_curve(dstPixel)); |
152 | 176 |
153 uint32_t rgba; | 177 uint32_t rgba; |
154 SkNx_cast<uint8_t>(pixel).store(&rgba); | 178 SkNx_cast<uint8_t>(float_to_int(dstPixel)).store(&rgba); |
155 rgba |= 0xFF000000; | 179 rgba |= 0xFF000000; |
156 *((uint32_t*) dst) = SkSwizzle_RGBA_to_PMColor(rgba); | 180 if (kSwapRB) { |
| 181 *((uint32_t*) dst) = SkSwizzle_RB(rgba); |
| 182 } else { |
| 183 *((uint32_t*) dst) = rgba; |
| 184 } |
157 dst = SkTAddOffset<void>(dst, sizeof(uint32_t)); | 185 dst = SkTAddOffset<void>(dst, sizeof(uint32_t)); |
158 } else if (kTable_DstGamma == kDstGamma) { | 186 } else if (kTable_DstGamma == kDstGamma) { |
159 Sk4f scaledPixel = Sk4f::Min(Sk4f::Max(1023.0f * dstPixel, 0.0f), 10
23.0f); | 187 Sk4f scaledPixel = Sk4f::Min(Sk4f::Max(1023.0f * dstPixel, 0.0f), 10
23.0f); |
160 | 188 |
161 Sk4i indices = Sk4f_round(scaledPixel); | 189 Sk4i indices = Sk4f_round(scaledPixel); |
162 | 190 |
163 *((uint32_t*) dst) = dstTables[0][indices[0]] << SK_R32_SHIFT | 191 *((uint32_t*) dst) = dstTables[0][indices[0]] << kRShift |
164 | dstTables[1][indices[1]] << SK_G32_SHIFT | 192 | dstTables[1][indices[1]] << kGShift |
165 | dstTables[2][indices[2]] << SK_B32_SHIFT | 193 | dstTables[2][indices[2]] << kBShift |
166 | 0xFF << SK_A32_SHIFT; | 194 | 0xFF << kAShift; |
167 | 195 |
168 dst = SkTAddOffset<void>(dst, sizeof(uint32_t)); | 196 dst = SkTAddOffset<void>(dst, sizeof(uint32_t)); |
169 } else { | 197 } else { |
170 uint64_t rgba; | 198 uint64_t rgba; |
171 SkFloatToHalf_finite(dstPixel).store(&rgba); | 199 SkFloatToHalf_finite(dstPixel).store(&rgba); |
172 rgba |= static_cast<uint64_t>(SK_Half1) << 48; | 200 rgba |= static_cast<uint64_t>(SK_Half1) << 48; |
173 *((uint64_t*) dst) = rgba; | 201 *((uint64_t*) dst) = rgba; |
174 dst = SkTAddOffset<void>(dst, sizeof(uint64_t)); | 202 dst = SkTAddOffset<void>(dst, sizeof(uint64_t)); |
175 } | 203 } |
176 | 204 |
177 src += 1; | 205 src += 1; |
178 len -= 1; | 206 len -= 1; |
179 } | 207 } |
180 } | 208 } |
181 | 209 |
182 static void color_xform_RGB1_to_2dot2(uint32_t* dst, const uint32_t* src, int le
n, | 210 static void color_xform_RGB1_to_2dot2(uint32_t* dst, const uint32_t* src, int le
n, |
183 const float* const srcTables[3], const flo
at matrix[16]) { | 211 const float* const srcTables[3], const flo
at matrix[16]) { |
184 color_xform_RGB1<k2Dot2_DstGamma>(dst, src, len, srcTables, matrix, nullptr)
; | 212 color_xform_RGB1<k2Dot2_DstGamma, false>(dst, src, len, srcTables, matrix, n
ullptr); |
185 } | 213 } |
186 | 214 |
187 static void color_xform_RGB1_to_srgb(uint32_t* dst, const uint32_t* src, int len
, | 215 static void color_xform_RGB1_to_srgb(uint32_t* dst, const uint32_t* src, int len
, |
188 const float* const srcTables[3], const floa
t matrix[16]) { | 216 const float* const srcTables[3], const floa
t matrix[16]) { |
189 color_xform_RGB1<kSRGB_DstGamma>(dst, src, len, srcTables, matrix, nullptr); | 217 color_xform_RGB1<kSRGB_DstGamma, false>(dst, src, len, srcTables, matrix, nu
llptr); |
190 } | 218 } |
191 | 219 |
192 static void color_xform_RGB1_to_table(uint32_t* dst, const uint32_t* src, int le
n, | 220 static void color_xform_RGB1_to_table(uint32_t* dst, const uint32_t* src, int le
n, |
193 const float* const srcTables[3], const flo
at matrix[16], | 221 const float* const srcTables[3], const flo
at matrix[16], |
194 const uint8_t* const dstTables[3]) { | 222 const uint8_t* const dstTables[3]) { |
195 color_xform_RGB1<kTable_DstGamma>(dst, src, len, srcTables, matrix, dstTable
s); | 223 color_xform_RGB1<kTable_DstGamma, false>(dst, src, len, srcTables, matrix, d
stTables); |
196 } | 224 } |
197 | 225 |
198 static void color_xform_RGB1_to_linear(uint64_t* dst, const uint32_t* src, int l
en, | 226 static void color_xform_RGB1_to_linear(uint64_t* dst, const uint32_t* src, int l
en, |
199 const float* const srcTables[3], const fl
oat matrix[16]) { | 227 const float* const srcTables[3], const fl
oat matrix[16]) { |
200 color_xform_RGB1<kLinear_DstGamma>(dst, src, len, srcTables, matrix, nullptr
); | 228 color_xform_RGB1<kLinear_DstGamma, false>(dst, src, len, srcTables, matrix,
nullptr); |
| 229 } |
| 230 |
| 231 static void color_xform_RGB1_to_2dot2_swaprb(uint32_t* dst, const uint32_t* src,
int len, |
| 232 const float* const srcTables[3], |
| 233 const float matrix[16]) { |
| 234 color_xform_RGB1<k2Dot2_DstGamma, true>(dst, src, len, srcTables, matrix, nu
llptr); |
| 235 } |
| 236 |
| 237 static void color_xform_RGB1_to_srgb_swaprb(uint32_t* dst, const uint32_t* src,
int len, |
| 238 const float* const srcTables[3], |
| 239 const float matrix[16]) { |
| 240 color_xform_RGB1<kSRGB_DstGamma, true>(dst, src, len, srcTables, matrix, nul
lptr); |
| 241 } |
| 242 |
| 243 static void color_xform_RGB1_to_table_swaprb(uint32_t* dst, const uint32_t* src,
int len, |
| 244 const float* const srcTables[3], |
| 245 const float matrix[16], |
| 246 const uint8_t* const dstTables[3])
{ |
| 247 color_xform_RGB1<kTable_DstGamma, true>(dst, src, len, srcTables, matrix, ds
tTables); |
201 } | 248 } |
202 | 249 |
203 } // namespace SK_OPTS_NS | 250 } // namespace SK_OPTS_NS |
204 | 251 |
205 #endif // SkColorXform_opts_DEFINED | 252 #endif // SkColorXform_opts_DEFINED |
OLD | NEW |