OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #include "Benchmark.h" | 8 #include "Benchmark.h" |
9 #include "SkPMFloat.h" | 9 #include "SkPMFloat.h" |
10 | 10 |
11 // Used to prevent the compiler from optimizing away the whole loop. | 11 // Used to prevent the compiler from optimizing away the whole loop. |
12 volatile uint32_t blackhole = 0; | 12 volatile uint32_t blackhole = 0; |
13 | 13 |
14 // Not a great random number generator, but it's very fast. | 14 // Not a great random number generator, but it's very fast. |
15 // The code we're measuring is quite fast, so low overhead is essential. | 15 // The code we're measuring is quite fast, so low overhead is essential. |
16 static uint32_t lcg_rand(uint32_t* seed) { | 16 static uint32_t lcg_rand(uint32_t* seed) { |
17 *seed *= 1664525; | 17 *seed *= 1664525; |
18 *seed += 1013904223; | 18 *seed += 1013904223; |
19 return *seed; | 19 return *seed; |
20 } | 20 } |
21 | 21 |
22 // I'm having better luck getting these to constant-propagate away as template p
arameters. | 22 // I'm having better luck getting these to constant-propagate away as template p
arameters. |
23 template <bool kClamp, bool kWide> | 23 struct PMFloatRoundtripBench : public Benchmark { |
24 struct PMFloatGetSetBench : public Benchmark { | 24 PMFloatRoundtripBench() {} |
25 PMFloatGetSetBench() {} | |
26 | 25 |
27 const char* onGetName() override { | 26 const char* onGetName() override { return "SkPMFloat_roundtrip"; } |
28 switch (kClamp << 1 | kWide) { | |
29 case 0: return "SkPMFloat_get_1x"; | |
30 case 1: return "SkPMFloat_get_4x"; | |
31 case 2: return "SkPMFloat_clamp_1x"; | |
32 case 3: return "SkPMFloat_clamp_4x"; | |
33 } | |
34 SkFAIL("unreachable"); | |
35 return "oh bother"; | |
36 } | |
37 bool isSuitableFor(Backend backend) override { return backend == kNonRenderi
ng_Backend; } | 27 bool isSuitableFor(Backend backend) override { return backend == kNonRenderi
ng_Backend; } |
38 | 28 |
39 void onDraw(const int loops, SkCanvas* canvas) override { | 29 void onDraw(const int loops, SkCanvas* canvas) override { |
40 // Unlike blackhole, junk can and probably will be a register. | 30 // Unlike blackhole, junk can and probably will be a register. |
41 uint32_t junk = 0; | 31 uint32_t junk = 0; |
42 uint32_t seed = 0; | 32 uint32_t seed = 0; |
43 for (int i = 0; i < loops; i++) { | 33 for (int i = 0; i < loops; i++) { |
44 SkPMColor colors[4]; | 34 SkPMColor color; |
45 #ifdef SK_DEBUG | 35 #ifdef SK_DEBUG |
46 for (int i = 0; i < 4; i++) { | 36 // Our SkASSERTs will remind us that it's technically required that
we premultiply. |
47 // Our SkASSERTs will remind us that it's technically required t
hat we premultiply. | 37 color = SkPreMultiplyColor(lcg_rand(&seed)); |
48 colors[i] = SkPreMultiplyColor(lcg_rand(&seed)); | |
49 } | |
50 #else | 38 #else |
51 // But it's a lot faster not to, and this code won't really mind the
non-PM colors. | 39 // But it's a lot faster not to, and this code won't really mind the
non-PM colors. |
52 (void)lcg_rand(&seed); | 40 color = lcg_rand(&seed); |
53 colors[0] = seed + 0; | |
54 colors[1] = seed + 1; | |
55 colors[2] = seed + 2; | |
56 colors[3] = seed + 3; | |
57 #endif | 41 #endif |
58 | 42 |
59 SkPMFloat fa,fb,fc,fd; | 43 auto f = SkPMFloat::FromPMColor(color); |
60 if (kWide) { | 44 SkPMColor back = f.round(); |
61 SkPMFloat::From4PMColors(colors, &fa, &fb, &fc, &fd); | 45 junk ^= back; |
62 } else { | |
63 fa = SkPMFloat::FromPMColor(colors[0]); | |
64 fb = SkPMFloat::FromPMColor(colors[1]); | |
65 fc = SkPMFloat::FromPMColor(colors[2]); | |
66 fd = SkPMFloat::FromPMColor(colors[3]); | |
67 } | |
68 | |
69 SkPMColor back[4]; | |
70 switch (kClamp << 1 | kWide) { | |
71 case 0: { | |
72 back[0] = fa.round(); | |
73 back[1] = fb.round(); | |
74 back[2] = fc.round(); | |
75 back[3] = fd.round(); | |
76 } break; | |
77 case 1: SkPMFloat::RoundTo4PMColors(fa, fb, fc, fd, back); break
; | |
78 case 2: { | |
79 back[0] = fa.roundClamp(); | |
80 back[1] = fb.roundClamp(); | |
81 back[2] = fc.roundClamp(); | |
82 back[3] = fd.roundClamp(); | |
83 } break; | |
84 case 3: SkPMFloat::RoundClampTo4PMColors(fa, fb, fc, fd, back);
break; | |
85 } | |
86 for (int i = 0; i < 4; i++) { | |
87 junk ^= back[i]; | |
88 } | |
89 } | 46 } |
90 blackhole ^= junk; | 47 blackhole ^= junk; |
91 } | 48 } |
92 }; | 49 }; |
93 | 50 DEF_BENCH(return new PMFloatRoundtripBench;) |
94 // Extra () help DEF_BENCH not get confused by the comma inside the <>. | |
95 DEF_BENCH(return (new PMFloatGetSetBench< true, true>);) | |
96 DEF_BENCH(return (new PMFloatGetSetBench<false, true>);) | |
97 DEF_BENCH(return (new PMFloatGetSetBench< true, false>);) | |
98 DEF_BENCH(return (new PMFloatGetSetBench<false, false>);) | |
99 | 51 |
100 struct PMFloatGradientBench : public Benchmark { | 52 struct PMFloatGradientBench : public Benchmark { |
101 const char* onGetName() override { return "PMFloat_gradient"; } | 53 const char* onGetName() override { return "PMFloat_gradient"; } |
102 bool isSuitableFor(Backend backend) override { return backend == kNonRenderi
ng_Backend; } | 54 bool isSuitableFor(Backend backend) override { return backend == kNonRenderi
ng_Backend; } |
103 | 55 |
104 SkPMColor fDevice[100]; | 56 SkPMColor fDevice[100]; |
105 void onDraw(const int loops, SkCanvas*) override { | 57 void onDraw(const int loops, SkCanvas*) override { |
106 Sk4f c0 = SkPMFloat::FromARGB(255, 255, 0, 0), | 58 Sk4f c0 = SkPMFloat::FromARGB(1, 1, 0, 0), |
107 c1 = SkPMFloat::FromARGB(255, 0, 0, 255), | 59 c1 = SkPMFloat::FromARGB(1, 0, 0, 1), |
108 dc = c1 - c0, | 60 dc = c1 - c0, |
109 fx(0.1f), | 61 fx(0.1f), |
110 dx(0.002f), | 62 dx(0.002f), |
111 dcdx(dc*dx), | 63 dcdx(dc*dx), |
112 dcdx4(dcdx+dcdx+dcdx+dcdx); | 64 dcdx4(dcdx+dcdx+dcdx+dcdx); |
113 | 65 |
114 for (int n = 0; n < loops; n++) { | 66 for (int n = 0; n < loops; n++) { |
115 Sk4f a = c0 + dc*fx + Sk4f(0.5f), // The +0.5f lets us call trunc()
instead of get(). | 67 Sk4f a = c0 + dc*fx, |
116 b = a + dcdx, | 68 b = a + dcdx, |
117 c = b + dcdx, | 69 c = b + dcdx, |
118 d = c + dcdx; | 70 d = c + dcdx; |
119 for (size_t i = 0; i < SK_ARRAY_COUNT(fDevice); i += 4) { | 71 for (size_t i = 0; i < SK_ARRAY_COUNT(fDevice); i += 4) { |
120 fDevice[i+0] = SkPMFloat(a).trunc(); | 72 fDevice[i+0] = SkPMFloat(a).round(); |
121 fDevice[i+1] = SkPMFloat(b).trunc(); | 73 fDevice[i+1] = SkPMFloat(b).round(); |
122 fDevice[i+2] = SkPMFloat(c).trunc(); | 74 fDevice[i+2] = SkPMFloat(c).round(); |
123 fDevice[i+3] = SkPMFloat(d).trunc(); | 75 fDevice[i+3] = SkPMFloat(d).round(); |
124 a = a + dcdx4; | 76 a = a + dcdx4; |
125 b = b + dcdx4; | 77 b = b + dcdx4; |
126 c = c + dcdx4; | 78 c = c + dcdx4; |
127 d = d + dcdx4; | 79 d = d + dcdx4; |
128 } | 80 } |
129 } | 81 } |
130 } | 82 } |
131 }; | 83 }; |
132 | 84 |
133 DEF_BENCH(return new PMFloatGradientBench;) | 85 DEF_BENCH(return new PMFloatGradientBench;) |
OLD | NEW |