Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(42)

Unified Diff: src/opts/SkColorXform_opts.h

Issue 2060823003: Implement fast, correct gamma conversion for color xforms (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Win test Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/core/SkOpts.cpp ('k') | src/opts/SkOpts_sse41.cpp » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/opts/SkColorXform_opts.h
diff --git a/src/opts/SkColorXform_opts.h b/src/opts/SkColorXform_opts.h
index 3fc620f014536896cc2527e0d7480e445ad979f8..da0c9010f23c7d58480f14240e3e2bb87f06b1aa 100644
--- a/src/opts/SkColorXform_opts.h
+++ b/src/opts/SkColorXform_opts.h
@@ -12,151 +12,236 @@
namespace SK_OPTS_NS {
-static uint8_t clamp_float_to_byte(float v) {
- if (v >= 254.5f) {
- return 255;
- } else if (v < 0.5f) {
- return 0;
- } else {
- return (uint8_t) (v + 0.5f);
- }
-}
+extern const float linear_from_srgb[256] = {
+ 0.000000000000000000f, 0.000303526983548838f, 0.000607053967097675f, 0.000910580950646513f,
+ 0.001214107934195350f, 0.001517634917744190f, 0.001821161901293030f, 0.002124688884841860f,
+ 0.002428215868390700f, 0.002731742851939540f, 0.003034518678424960f, 0.003346535763899160f,
+ 0.003676507324047440f, 0.004024717018496310f, 0.004391442037410290f, 0.004776953480693730f,
+ 0.005181516702338390f, 0.005605391624202720f, 0.006048833022857060f, 0.006512090792594470f,
+ 0.006995410187265390f, 0.007499032043226180f, 0.008023192985384990f, 0.008568125618069310f,
+ 0.009134058702220790f, 0.009721217320237850f, 0.010329823029626900f, 0.010960094006488200f,
+ 0.011612245179743900f, 0.012286488356915900f, 0.012983032342173000f, 0.013702083047289700f,
+ 0.014443843596092500f, 0.015208514422912700f, 0.015996293365509600f, 0.016807375752887400f,
+ 0.017641954488384100f, 0.018500220128379700f, 0.019382360956935700f, 0.020288563056652400f,
+ 0.021219010376003600f, 0.022173884793387400f, 0.023153366178110400f, 0.024157632448504800f,
+ 0.025186859627361600f, 0.026241221894849900f, 0.027320891639074900f, 0.028426039504420800f,
+ 0.029556834437808800f, 0.030713443732993600f, 0.031896033073011500f, 0.033104766570885100f,
+ 0.034339806808682200f, 0.035601314875020300f, 0.036889450401100000f, 0.038204371595346500f,
+ 0.039546235276732800f, 0.040915196906853200f, 0.042311410620809700f, 0.043735029256973500f,
+ 0.045186204385675500f, 0.046665086336880100f, 0.048171824226889400f, 0.049706565984127200f,
+ 0.051269458374043200f, 0.052860647023180200f, 0.054480276442442400f, 0.056128490049600100f,
+ 0.057805430191067200f, 0.059511238162981200f, 0.061246054231617600f, 0.063010017653167700f,
+ 0.064803266692905800f, 0.066625938643772900f, 0.068478169844400200f, 0.070360095696595900f,
+ 0.072271850682317500f, 0.074213568380149600f, 0.076185381481307900f, 0.078187421805186300f,
+ 0.080219820314468300f, 0.082282707129814800f, 0.084376211544148800f, 0.086500462036549800f,
+ 0.088655586285772900f, 0.090841711183407700f, 0.093058962846687500f, 0.095307466630964700f,
+ 0.097587347141862500f, 0.099898728247113900f, 0.102241733088101000f, 0.104616484091104000f,
+ 0.107023102978268000f, 0.109461710778299000f, 0.111932427836906000f, 0.114435373826974000f,
+ 0.116970667758511000f, 0.119538427988346000f, 0.122138772229602000f, 0.124771817560950000f,
+ 0.127437680435647000f, 0.130136476690364000f, 0.132868321553818000f, 0.135633329655206000f,
+ 0.138431615032452000f, 0.141263291140272000f, 0.144128470858058000f, 0.147027266497595000f,
+ 0.149959789810609000f, 0.152926151996150000f, 0.155926463707827000f, 0.158960835060880000f,
+ 0.162029375639111000f, 0.165132194501668000f, 0.168269400189691000f, 0.171441100732823000f,
+ 0.174647403655585000f, 0.177888415983629000f, 0.181164244249860000f, 0.184474994500441000f,
+ 0.187820772300678000f, 0.191201682740791000f, 0.194617830441576000f, 0.198069319559949000f,
+ 0.201556253794397000f, 0.205078736390317000f, 0.208636870145256000f, 0.212230757414055000f,
+ 0.215860500113899000f, 0.219526199729269000f, 0.223227957316809000f, 0.226965873510098000f,
+ 0.230740048524349000f, 0.234550582161005000f, 0.238397573812271000f, 0.242281122465555000f,
+ 0.246201326707835000f, 0.250158284729953000f, 0.254152094330827000f, 0.258182852921596000f,
+ 0.262250657529696000f, 0.266355604802862000f, 0.270497791013066000f, 0.274677312060385000f,
+ 0.278894263476810000f, 0.283148740429992000f, 0.287440837726918000f, 0.291770649817536000f,
+ 0.296138270798321000f, 0.300543794415777000f, 0.304987314069886000f, 0.309468922817509000f,
+ 0.313988713375718000f, 0.318546778125092000f, 0.323143209112951000f, 0.327778098056542000f,
+ 0.332451536346179000f, 0.337163615048330000f, 0.341914424908661000f, 0.346704056355030000f,
+ 0.351532599500439000f, 0.356400144145944000f, 0.361306779783510000f, 0.366252595598840000f,
+ 0.371237680474149000f, 0.376262122990906000f, 0.381326011432530000f, 0.386429433787049000f,
+ 0.391572477749723000f, 0.396755230725627000f, 0.401977779832196000f, 0.407240211901737000f,
+ 0.412542613483904000f, 0.417885070848138000f, 0.423267669986072000f, 0.428690496613907000f,
+ 0.434153636174749000f, 0.439657173840919000f, 0.445201194516228000f, 0.450785782838223000f,
+ 0.456411023180405000f, 0.462076999654407000f, 0.467783796112159000f, 0.473531496148010000f,
+ 0.479320183100827000f, 0.485149940056070000f, 0.491020849847836000f, 0.496932995060870000f,
+ 0.502886458032569000f, 0.508881320854934000f, 0.514917665376521000f, 0.520995573204354000f,
+ 0.527115125705813000f, 0.533276404010505000f, 0.539479489012107000f, 0.545724461370187000f,
+ 0.552011401512000000f, 0.558340389634268000f, 0.564711505704929000f, 0.571124829464873000f,
+ 0.577580440429651000f, 0.584078417891164000f, 0.590618840919337000f, 0.597201788363763000f,
+ 0.603827338855338000f, 0.610495570807865000f, 0.617206562419651000f, 0.623960391675076000f,
+ 0.630757136346147000f, 0.637596873994033000f, 0.644479681970582000f, 0.651405637419824000f,
+ 0.658374817279448000f, 0.665387298282272000f, 0.672443156957688000f, 0.679542469633094000f,
+ 0.686685312435314000f, 0.693871761291990000f, 0.701101891932973000f, 0.708375779891687000f,
+ 0.715693500506481000f, 0.723055128921969000f, 0.730460740090354000f, 0.737910408772731000f,
+ 0.745404209540387000f, 0.752942216776078000f, 0.760524504675292000f, 0.768151147247507000f,
+ 0.775822218317423000f, 0.783537791526194000f, 0.791297940332630000f, 0.799102738014409000f,
+ 0.806952257669252000f, 0.814846572216101000f, 0.822785754396284000f, 0.830769876774655000f,
+ 0.838799011740740000f, 0.846873231509858000f, 0.854992608124234000f, 0.863157213454102000f,
+ 0.871367119198797000f, 0.879622396887832000f, 0.887923117881966000f, 0.896269353374266000f,
+ 0.904661174391149000f, 0.913098651793419000f, 0.921581856277295000f, 0.930110858375424000f,
+ 0.938685728457888000f, 0.947306536733200000f, 0.955973353249286000f, 0.964686247894465000f,
+ 0.973445290398413000f, 0.982250550333117000f, 0.991102097113830000f, 1.000000000000000000f,
+};
-static void color_xform_2Dot2_RGBA_to_8888_portable(uint32_t* dst, const uint32_t* src, int len,
- const float matrix[16]) {
- while (len-- > 0) {
- float srcFloats[3];
- srcFloats[0] = (float) ((*src >> 0) & 0xFF);
- srcFloats[1] = (float) ((*src >> 8) & 0xFF);
- srcFloats[2] = (float) ((*src >> 16) & 0xFF);
+extern const float linear_from_2dot2[256] = {
+ 0.000000000000000000f, 0.000005077051900662f, 0.000023328004666099f, 0.000056921765712193f,
+ 0.000107187362341244f, 0.000175123977503027f, 0.000261543754548491f, 0.000367136269815943f,
+ 0.000492503787191433f, 0.000638182842167022f, 0.000804658499513058f, 0.000992374304074325f,
+ 0.001201739522438400f, 0.001433134589671860f, 0.001686915316789280f, 0.001963416213396470f,
+ 0.002262953160706430f, 0.002585825596234170f, 0.002932318323938360f, 0.003302703032003640f,
+ 0.003697239578900130f, 0.004116177093282750f, 0.004559754922526020f, 0.005028203456855540f,
+ 0.005521744850239660f, 0.006040593654849810f, 0.006584957382581690f, 0.007155037004573030f,
+ 0.007751027397660610f, 0.008373117745148580f, 0.009021491898012130f, 0.009696328701658230f,
+ 0.010397802292555300f, 0.011126082368383200f, 0.011881334434813700f, 0.012663720031582100f,
+ 0.013473396940142600f, 0.014310519374884100f, 0.015175238159625200f, 0.016067700890886900f,
+ 0.016988052089250000f, 0.017936433339950200f, 0.018912983423721500f, 0.019917838438785700f,
+ 0.020951131914781100f, 0.022012994919336500f, 0.023103556157921400f, 0.024222942067534200f,
+ 0.025371276904734600f, 0.026548682828472900f, 0.027755279978126000f, 0.028991186547107800f,
+ 0.030256518852388700f, 0.031551391400226400f, 0.032875916948383800f, 0.034230206565082000f,
+ 0.035614369684918800f, 0.037028514161960200f, 0.038472746320194600f, 0.039947171001525600f,
+ 0.041451891611462500f, 0.042987010162657100f, 0.044552627316421400f, 0.046148842422351000f,
+ 0.047775753556170600f, 0.049433457555908000f, 0.051122050056493400f, 0.052841625522879000f,
+ 0.054592277281760300f, 0.056374097551979800f, 0.058187177473685400f, 0.060031607136313200f,
+ 0.061907475605455800f, 0.063814870948677200f, 0.065753880260330100f, 0.067724589685424300f,
+ 0.069727084442598800f, 0.071761448846239100f, 0.073827766327784600f, 0.075926119456264800f,
+ 0.078056589958101900f, 0.080219258736215100f, 0.082414205888459200f, 0.084641510725429500f,
+ 0.086901251787660300f, 0.089193506862247800f, 0.091518352998919500f, 0.093875866525577800f,
+ 0.096266123063339700f, 0.098689197541094500f, 0.101145164209600000f, 0.103634096655137000f,
+ 0.106156067812744000f, 0.108711149979039000f, 0.111299414824660000f, 0.113920933406333000f,
+ 0.116575776178572000f, 0.119264013005047000f, 0.121985713169619000f, 0.124740945387051000f,
+ 0.127529777813422000f, 0.130352278056244000f, 0.133208513184300000f, 0.136098549737202000f,
+ 0.139022453734703000f, 0.141980290685736000f, 0.144972125597231000f, 0.147998022982685000f,
+ 0.151058046870511000f, 0.154152260812165000f, 0.157280727890073000f, 0.160443510725344000f,
+ 0.163640671485290000f, 0.166872271890766000f, 0.170138373223312000f, 0.173439036332135000f,
+ 0.176774321640903000f, 0.180144289154390000f, 0.183548998464951000f, 0.186988508758844000f,
+ 0.190462878822409000f, 0.193972167048093000f, 0.197516431440340000f, 0.201095729621346000f,
+ 0.204710118836677000f, 0.208359655960767000f, 0.212044397502288000f, 0.215764399609395000f,
+ 0.219519718074868000f, 0.223310408341127000f, 0.227136525505149000f, 0.230998124323267000f,
+ 0.234895259215880000f, 0.238827984272048000f, 0.242796353254002000f, 0.246800419601550000f,
+ 0.250840236436400000f, 0.254915856566385000f, 0.259027332489606000f, 0.263174716398492000f,
+ 0.267358060183772000f, 0.271577415438375000f, 0.275832833461245000f, 0.280124365261085000f,
+ 0.284452061560024000f, 0.288815972797219000f, 0.293216149132375000f, 0.297652640449211000f,
+ 0.302125496358853000f, 0.306634766203158000f, 0.311180499057984000f, 0.315762743736397000f,
+ 0.320381548791810000f, 0.325036962521076000f, 0.329729032967515000f, 0.334457807923889000f,
+ 0.339223334935327000f, 0.344025661302187000f, 0.348864834082879000f, 0.353740900096629000f,
+ 0.358653905926199000f, 0.363603897920553000f, 0.368590922197487000f, 0.373615024646202000f,
+ 0.378676250929840000f, 0.383774646487975000f, 0.388910256539059000f, 0.394083126082829000f,
+ 0.399293299902674000f, 0.404540822567962000f, 0.409825738436323000f, 0.415148091655907000f,
+ 0.420507926167587000f, 0.425905285707146000f, 0.431340213807410000f, 0.436812753800359000f,
+ 0.442322948819202000f, 0.447870841800410000f, 0.453456475485731000f, 0.459079892424160000f,
+ 0.464741134973889000f, 0.470440245304218000f, 0.476177265397440000f, 0.481952237050698000f,
+ 0.487765201877811000f, 0.493616201311074000f, 0.499505276603030000f, 0.505432468828216000f,
+ 0.511397818884880000f, 0.517401367496673000f, 0.523443155214325000f, 0.529523222417277000f,
+ 0.535641609315311000f, 0.541798355950137000f, 0.547993502196972000f, 0.554227087766085000f,
+ 0.560499152204328000f, 0.566809734896638000f, 0.573158875067523000f, 0.579546611782525000f,
+ 0.585972983949661000f, 0.592438030320847000f, 0.598941789493296000f, 0.605484299910907000f,
+ 0.612065599865624000f, 0.618685727498780000f, 0.625344720802427000f, 0.632042617620641000f,
+ 0.638779455650817000f, 0.645555272444935000f, 0.652370105410821000f, 0.659223991813387000f,
+ 0.666116968775851000f, 0.673049073280942000f, 0.680020342172095000f, 0.687030812154625000f,
+ 0.694080519796882000f, 0.701169501531402000f, 0.708297793656032000f, 0.715465432335048000f,
+ 0.722672453600255000f, 0.729918893352071000f, 0.737204787360605000f, 0.744530171266715000f,
+ 0.751895080583051000f, 0.759299550695091000f, 0.766743616862161000f, 0.774227314218442000f,
+ 0.781750677773962000f, 0.789313742415586000f, 0.796916542907978000f, 0.804559113894567000f,
+ 0.812241489898490000f, 0.819963705323528000f, 0.827725794455034000f, 0.835527791460841000f,
+ 0.843369730392169000f, 0.851251645184515000f, 0.859173569658532000f, 0.867135537520905000f,
+ 0.875137582365205000f, 0.883179737672745000f, 0.891262036813419000f, 0.899384513046529000f,
+ 0.907547199521614000f, 0.915750129279253000f, 0.923993335251873000f, 0.932276850264543000f,
+ 0.940600707035753000f, 0.948964938178195000f, 0.957369576199527000f, 0.965814653503130000f,
+ 0.974300202388861000f, 0.982826255053791000f, 0.991392843592940000f, 1.000000000000000000f,
+};
- // Convert to linear.
- // TODO (msarett):
- // We should use X^2.2 here instead of X^2. What is the impact on correctness?
- // We should be able to get closer to 2.2 at a small performance cost.
- srcFloats[0] = srcFloats[0] * srcFloats[0];
- srcFloats[1] = srcFloats[1] * srcFloats[1];
- srcFloats[2] = srcFloats[2] * srcFloats[2];
+#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
- // Convert to dst gamut.
- float dstFloats[3];
- // TODO (msarett): matrix[12], matrix[13], and matrix[14] are almost always zero.
- // Should we have another optimized path that avoids the extra addition when they
- // are zero?
- dstFloats[0] = srcFloats[0] * matrix[0] + srcFloats[1] * matrix[4] +
- srcFloats[2] * matrix[8] + matrix[12];
- dstFloats[1] = srcFloats[0] * matrix[1] + srcFloats[1] * matrix[5] +
- srcFloats[2] * matrix[9] + matrix[13];
- dstFloats[2] = srcFloats[0] * matrix[2] + srcFloats[1] * matrix[6] +
- srcFloats[2] * matrix[10] + matrix[14];
+// x^(29/64) is a very good approximation of the true value, x^(1/2.2).
+static __m128 linear_to_2dot2(__m128 x) {
+ // x^(-1/2)
+ __m128 x2 = _mm_rsqrt_ps(x);
- // Convert to dst gamma.
- // TODO (msarett):
- // We should use X^(1/2.2) here instead of X^(1/2). What is the impact on correctness?
- // We should be able to get closer to (1/2.2) at a small performance cost.
- dstFloats[0] = sqrtf(dstFloats[0]);
- dstFloats[1] = sqrtf(dstFloats[1]);
- dstFloats[2] = sqrtf(dstFloats[2]);
-
- *dst = SkPackARGB32NoCheck(((*src >> 24) & 0xFF),
- clamp_float_to_byte(dstFloats[0]),
- clamp_float_to_byte(dstFloats[1]),
- clamp_float_to_byte(dstFloats[2]));
+ // x^(-1/32)
+ __m128 x32 = _mm_rsqrt_ps(_mm_rsqrt_ps(_mm_rsqrt_ps(_mm_rsqrt_ps(x2))));
- dst++;
- src++;
- }
+ // x^(+1/64)
+ __m128 x64 = _mm_rsqrt_ps(x32);
+
+ // x^(+29/64) = x^(+1/2) * x^(-1/32) * x^(-1/64)
+ // Note that we also scale to the 0-255 range.
+ // These terms can be combined more minimally with 3 muls and 1 reciprocal. However, this
+ // is faster, because it allows us to start the muls in parallel with the rsqrts.
+ __m128 scale = _mm_set1_ps(255.0f);
+ return _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(scale, _mm_rcp_ps(x2)), x32), _mm_rcp_ps(x64));
}
-#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
+static __m128 clamp_0_to_255(__m128 x) {
+ // The order of the arguments is important here. We want to make sure that NaN
+ // clamps to zero. Note that max(NaN, 0) = 0, while max(0, NaN) = NaN.
+ return _mm_min_ps(_mm_max_ps(x, _mm_setzero_ps()), _mm_set1_ps(255.0f));
+}
-static void color_xform_2Dot2_RGBA_to_8888(uint32_t* dst, const uint32_t* src, int len,
- const float matrix[16]) {
+template <const float (&linear_from_curve)[256]>
+static void color_xform_RGB1(uint32_t* dst, const uint32_t* src, int len,
+ const float matrix[16]) {
// Load transformation matrix.
__m128 rXgXbX = _mm_loadu_ps(&matrix[0]);
__m128 rYgYbY = _mm_loadu_ps(&matrix[4]);
__m128 rZgZbZ = _mm_loadu_ps(&matrix[8]);
- __m128 rQgQbQ = _mm_loadu_ps(&matrix[12]);
while (len >= 4) {
- // Load 4 pixels and convert them to floats.
- __m128i rgba = _mm_loadu_si128((const __m128i*) src);
- __m128i byteMask = _mm_set1_epi32(0xFF);
- __m128 reds = _mm_cvtepi32_ps(_mm_and_si128( rgba, byteMask));
- __m128 greens = _mm_cvtepi32_ps(_mm_and_si128(_mm_srli_epi32(rgba, 8), byteMask));
- __m128 blues = _mm_cvtepi32_ps(_mm_and_si128(_mm_srli_epi32(rgba, 16), byteMask));
-
- // Convert to linear.
- // FIXME (msarett):
- // Should we be more accurate?
- reds = _mm_mul_ps(reds, reds);
- greens = _mm_mul_ps(greens, greens);
- blues = _mm_mul_ps(blues, blues);
+ // Convert to linear. The look-up table has perfect accuracy.
+ __m128 reds = _mm_setr_ps(linear_from_curve[(src[0] >> 0) & 0xFF],
+ linear_from_curve[(src[1] >> 0) & 0xFF],
+ linear_from_curve[(src[2] >> 0) & 0xFF],
+ linear_from_curve[(src[3] >> 0) & 0xFF]);
+ __m128 greens = _mm_setr_ps(linear_from_curve[(src[0] >> 8) & 0xFF],
+ linear_from_curve[(src[1] >> 8) & 0xFF],
+ linear_from_curve[(src[2] >> 8) & 0xFF],
+ linear_from_curve[(src[3] >> 8) & 0xFF]);
+ __m128 blues = _mm_setr_ps(linear_from_curve[(src[0] >> 16) & 0xFF],
+ linear_from_curve[(src[1] >> 16) & 0xFF],
+ linear_from_curve[(src[2] >> 16) & 0xFF],
+ linear_from_curve[(src[3] >> 16) & 0xFF]);
// Apply the transformation matrix to dst gamut.
- // FIXME (msarett):
- // rQ, gQ, and bQ are almost always zero. Can we save a couple instructions?
-
- // Splat rX, rY, rZ, and rQ each across a register.
+ // Splat rX, rY, and rZ each across a register.
__m128 rX = _mm_shuffle_ps(rXgXbX, rXgXbX, 0x00);
__m128 rY = _mm_shuffle_ps(rYgYbY, rYgYbY, 0x00);
__m128 rZ = _mm_shuffle_ps(rZgZbZ, rZgZbZ, 0x00);
- __m128 rQ = _mm_shuffle_ps(rQgQbQ, rQgQbQ, 0x00);
- // dstReds = rX * reds + rY * greens + rZ * blues + rQ
+ // dstReds = rX * reds + rY * greens + rZ * blues
__m128 dstReds = _mm_mul_ps(reds, rX);
dstReds = _mm_add_ps(dstReds, _mm_mul_ps(greens, rY));
dstReds = _mm_add_ps(dstReds, _mm_mul_ps(blues, rZ));
- dstReds = _mm_add_ps(dstReds, rQ);
- // Splat gX, gY, gZ, and gQ each across a register.
+ // Splat gX, gY, and gZ each across a register.
__m128 gX = _mm_shuffle_ps(rXgXbX, rXgXbX, 0x55);
__m128 gY = _mm_shuffle_ps(rYgYbY, rYgYbY, 0x55);
__m128 gZ = _mm_shuffle_ps(rZgZbZ, rZgZbZ, 0x55);
- __m128 gQ = _mm_shuffle_ps(rQgQbQ, rQgQbQ, 0x55);
- // dstGreens = gX * reds + gY * greens + gZ * blues + gQ
+ // dstGreens = gX * reds + gY * greens + gZ * blues
__m128 dstGreens = _mm_mul_ps(reds, gX);
dstGreens = _mm_add_ps(dstGreens, _mm_mul_ps(greens, gY));
dstGreens = _mm_add_ps(dstGreens, _mm_mul_ps(blues, gZ));
- dstGreens = _mm_add_ps(dstGreens, gQ);
- // Splat bX, bY, bZ, and bQ each across a register.
+ // Splat bX, bY, and bZ each across a register.
__m128 bX = _mm_shuffle_ps(rXgXbX, rXgXbX, 0xAA);
__m128 bY = _mm_shuffle_ps(rYgYbY, rYgYbY, 0xAA);
__m128 bZ = _mm_shuffle_ps(rZgZbZ, rZgZbZ, 0xAA);
- __m128 bQ = _mm_shuffle_ps(rQgQbQ, rQgQbQ, 0xAA);
- // dstBlues = bX * reds + bY * greens + bZ * blues + bQ
+ // dstBlues = bX * reds + bY * greens + bZ * blues
__m128 dstBlues = _mm_mul_ps(reds, bX);
dstBlues = _mm_add_ps(dstBlues, _mm_mul_ps(greens, bY));
dstBlues = _mm_add_ps(dstBlues, _mm_mul_ps(blues, bZ));
- dstBlues = _mm_add_ps(dstBlues, bQ);
// Convert to dst gamma.
- // Note that the reciprocal of the reciprocal sqrt, is just a fast sqrt.
- // FIXME (msarett):
- // Should we be more accurate?
- dstReds = _mm_rcp_ps(_mm_rsqrt_ps(dstReds));
- dstGreens = _mm_rcp_ps(_mm_rsqrt_ps(dstGreens));
- dstBlues = _mm_rcp_ps(_mm_rsqrt_ps(dstBlues));
+ dstReds = linear_to_2dot2(dstReds);
+ dstGreens = linear_to_2dot2(dstGreens);
+ dstBlues = linear_to_2dot2(dstBlues);
- // Clamp floats to 0-255 range.
- dstReds = _mm_max_ps(_mm_setzero_ps(), _mm_min_ps(dstReds, _mm_set1_ps(255.0f)));
- dstGreens = _mm_max_ps(_mm_setzero_ps(), _mm_min_ps(dstGreens, _mm_set1_ps(255.0f)));
- dstBlues = _mm_max_ps(_mm_setzero_ps(), _mm_min_ps(dstBlues, _mm_set1_ps(255.0f)));
+ // Clamp floats.
+ dstReds = clamp_0_to_255(dstReds);
+ dstGreens = clamp_0_to_255(dstGreens);
+ dstBlues = clamp_0_to_255(dstBlues);
// Convert to bytes and store to memory.
- rgba = _mm_and_si128(_mm_set1_epi32(0xFF000000), rgba);
-#ifdef SK_PMCOLOR_IS_RGBA
+ __m128i rgba = _mm_set1_epi32(0xFF000000);
rgba = _mm_or_si128(rgba, _mm_cvtps_epi32(dstReds) );
rgba = _mm_or_si128(rgba, _mm_slli_epi32(_mm_cvtps_epi32(dstGreens), 8));
rgba = _mm_or_si128(rgba, _mm_slli_epi32(_mm_cvtps_epi32(dstBlues), 16));
-#else
- rgba = _mm_or_si128(rgba, _mm_cvtps_epi32(dstBlues) );
- rgba = _mm_or_si128(rgba, _mm_slli_epi32(_mm_cvtps_epi32(dstGreens), 8));
- rgba = _mm_or_si128(rgba, _mm_slli_epi32(_mm_cvtps_epi32(dstReds), 16));
-#endif
_mm_storeu_si128((__m128i*) dst, rgba);
dst += 4;
@@ -164,18 +249,96 @@ static void color_xform_2Dot2_RGBA_to_8888(uint32_t* dst, const uint32_t* src, i
len -= 4;
}
- color_xform_2Dot2_RGBA_to_8888_portable(dst, src, len, matrix);
+ while (len > 0) {
+ // Splat the red, green, and blue components.
+ __m128 r = _mm_set1_ps(linear_from_curve[(src[0] >> 0) & 0xFF]),
+ g = _mm_set1_ps(linear_from_curve[(src[0] >> 8) & 0xFF]),
+ b = _mm_set1_ps(linear_from_curve[(src[0] >> 16) & 0xFF]);
+
+ // Apply the transformation matrix to dst gamut.
+ __m128 dstPixel = _mm_mul_ps(r, rXgXbX);
+ dstPixel = _mm_add_ps(dstPixel, _mm_mul_ps(g, rYgYbY));
+ dstPixel = _mm_add_ps(dstPixel, _mm_mul_ps(b, rZgZbZ));
+
+ // Convert to dst gamma.
+ dstPixel = linear_to_2dot2(dstPixel);
+
+ // Clamp floats to 0-255 range.
+ dstPixel = clamp_0_to_255(dstPixel);
+
+ // Convert to bytes and store to memory.
+ __m128i dstInts = _mm_cvtps_epi32(dstPixel);
+ __m128i dstBytes = _mm_packus_epi16(_mm_packus_epi16(dstInts, dstInts), dstInts);
+ dstBytes = _mm_or_si128(_mm_set1_epi32(0xFF000000), dstBytes);
+ _mm_store_ss((float*) dst, _mm_castsi128_ps(dstBytes));
+
+ dst += 1;
+ src += 1;
+ len -= 1;
+ }
}
#else
-static void color_xform_2Dot2_RGBA_to_8888(uint32_t* dst, const uint32_t* src, int len,
- const float matrix[16]) {
- color_xform_2Dot2_RGBA_to_8888_portable(dst, src, len, matrix);
+static uint8_t clamp_float_to_byte(float v) {
+ // The ordering of the logic is a little strange here in order
+ // to make sure we convert NaNs to 0.
+ if (v >= 254.5f) {
+ return 255;
+ } else if (v >= 0.5f) {
+ return (uint8_t) (v + 0.5f);
+ } else {
+ return 0;
+ }
+}
+
+template <const float (&linear_from_curve)[256]>
+static void color_xform_RGB1(uint32_t* dst, const uint32_t* src, int len,
+ const float matrix[16]) {
+ while (len-- > 0) {
+ // Convert to linear.
+ float srcFloats[3];
+ srcFloats[0] = linear_from_curve[(*src >> 0) & 0xFF];
+ srcFloats[1] = linear_from_curve[(*src >> 8) & 0xFF];
+ srcFloats[2] = linear_from_curve[(*src >> 16) & 0xFF];
+
+ // Convert to dst gamut.
+ float dstFloats[3];
+ dstFloats[0] = srcFloats[0] * matrix[0] + srcFloats[1] * matrix[4] +
+ srcFloats[2] * matrix[8];
+ dstFloats[1] = srcFloats[0] * matrix[1] + srcFloats[1] * matrix[5] +
+ srcFloats[2] * matrix[9];
+ dstFloats[2] = srcFloats[0] * matrix[2] + srcFloats[1] * matrix[6] +
+ srcFloats[2] * matrix[10];
+
+ // Convert to dst gamma.
+ // Note: pow is really, really slow. We will suffer when SSE2 is not supported.
+ dstFloats[0] = powf(dstFloats[0], (1/2.2f)) * 255.0f;
+ dstFloats[1] = powf(dstFloats[1], (1/2.2f)) * 255.0f;
+ dstFloats[2] = powf(dstFloats[2], (1/2.2f)) * 255.0f;
+
+ *dst = (0xFF << 24) |
+ (clamp_float_to_byte(dstFloats[2]) << 16) |
+ (clamp_float_to_byte(dstFloats[1]) << 8) |
+ (clamp_float_to_byte(dstFloats[0]) << 0);
+
+ dst++;
+ src++;
+ }
}
#endif
+static void color_xform_RGB1_srgb_to_2dot2(uint32_t* dst, const uint32_t* src, int len,
+ const float matrix[16]) {
+ color_xform_RGB1<linear_from_srgb>(dst, src, len, matrix);
+}
+
+static void color_xform_RGB1_2dot2_to_2dot2(uint32_t* dst, const uint32_t* src, int len,
+ const float matrix[16]) {
+ color_xform_RGB1<linear_from_2dot2>(dst, src, len, matrix);
+}
+
}
#endif // SkColorXform_opts_DEFINED
« no previous file with comments | « src/core/SkOpts.cpp ('k') | src/opts/SkOpts_sse41.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698