OLD | NEW |
1 #include "Benchmark.h" | 1 #include "Benchmark.h" |
2 #include "SkPMFloat.h" | 2 #include "SkPMFloat.h" |
3 | 3 |
4 // Used to prevent the compiler from optimizing away the whole loop. | 4 // Used to prevent the compiler from optimizing away the whole loop. |
5 volatile uint32_t blackhole = 0; | 5 volatile uint32_t blackhole = 0; |
6 | 6 |
7 // Not a great random number generator, but it's very fast. | 7 // Not a great random number generator, but it's very fast. |
8 // The code we're measuring is quite fast, so low overhead is essential. | 8 // The code we're measuring is quite fast, so low overhead is essential. |
9 static uint32_t lcg_rand(uint32_t* seed) { | 9 static uint32_t lcg_rand(uint32_t* seed) { |
10 *seed *= 1664525; | 10 *seed *= 1664525; |
11 *seed += 1013904223; | 11 *seed += 1013904223; |
12 return *seed; | 12 return *seed; |
13 } | 13 } |
14 | 14 |
| 15 // I'm having better luck getting these to constant-propagate away as template p
arameters. |
| 16 template <bool kClamp, bool kWide> |
15 struct PMFloatBench : public Benchmark { | 17 struct PMFloatBench : public Benchmark { |
16 explicit PMFloatBench(bool clamp) : fClamp(clamp) {} | 18 PMFloatBench() {} |
17 | 19 |
18 const char* onGetName() SK_OVERRIDE { return fClamp ? "SkPMFloat_clamp" : "S
kPMFloat_get"; } | 20 const char* onGetName() SK_OVERRIDE { |
| 21 switch (kClamp << 1 | kWide) { |
| 22 case 0: return "SkPMFloat_get_1x"; |
| 23 case 1: return "SkPMFloat_get_4x"; |
| 24 case 2: return "SkPMFloat_clamp_1x"; |
| 25 case 3: return "SkPMFloat_clamp_4x"; |
| 26 } |
| 27 SkFAIL("unreachable"); |
| 28 return "oh bother"; |
| 29 } |
19 bool isSuitableFor(Backend backend) SK_OVERRIDE { return backend == kNonRend
ering_Backend; } | 30 bool isSuitableFor(Backend backend) SK_OVERRIDE { return backend == kNonRend
ering_Backend; } |
20 | 31 |
21 void onDraw(const int loops, SkCanvas* canvas) SK_OVERRIDE { | 32 void onDraw(const int loops, SkCanvas* canvas) SK_OVERRIDE { |
22 // Unlike blackhole, junk can and probably will be a register. | 33 // Unlike blackhole, junk can and probably will be a register. |
23 uint32_t junk = 0; | 34 uint32_t junk = 0; |
24 uint32_t seed = 0; | 35 uint32_t seed = 0; |
25 for (int i = 0; i < loops; i++) { | 36 for (int i = 0; i < loops; i++) { |
| 37 SkPMColor colors[4]; |
26 #ifdef SK_DEBUG | 38 #ifdef SK_DEBUG |
27 // Our SkASSERTs will remind us that it's technically required that
we premultiply. | 39 for (int i = 0; i < 4; i++) { |
28 SkPMColor c = SkPreMultiplyColor(lcg_rand(&seed)); | 40 // Our SkASSERTs will remind us that it's technically required t
hat we premultiply. |
| 41 colors[i] = SkPreMultiplyColor(lcg_rand(&seed)); |
| 42 } |
29 #else | 43 #else |
30 // But it's a lot faster not to, and this code won't really mind the
non-PM colors. | 44 // But it's a lot faster not to, and this code won't really mind the
non-PM colors. |
31 SkPMColor c = lcg_rand(&seed); | 45 (void)lcg_rand(&seed); |
| 46 colors[0] = seed + 0; |
| 47 colors[1] = seed + 1; |
| 48 colors[2] = seed + 2; |
| 49 colors[3] = seed + 3; |
32 #endif | 50 #endif |
33 SkPMFloat pmf = SkPMFloat::FromPMColor(c); | 51 |
34 SkPMColor back = fClamp ? pmf.clamped() : pmf.get(); | 52 SkPMFloat floats[4]; |
35 junk ^= back; | 53 if (kWide) { |
| 54 SkPMFloat::From4PMColors(floats, colors); |
| 55 } else { |
| 56 for (int i = 0; i < 4; i++) { |
| 57 floats[i] = SkPMFloat::FromPMColor(colors[i]); |
| 58 } |
| 59 } |
| 60 |
| 61 SkPMColor back[4]; |
| 62 switch (kClamp << 1 | kWide) { |
| 63 case 0: for (int i = 0; i < 4; i++) { back[i] = floats[i].get();
} break; |
| 64 case 1: SkPMFloat::To4PMColors(back, floats);
break; |
| 65 case 2: for (int i = 0; i < 4; i++) { back[i] = floats[i].clampe
d(); } break; |
| 66 case 3: SkPMFloat::ClampTo4PMColors(back, floats);
break; |
| 67 } |
| 68 for (int i = 0; i < 4; i++) { |
| 69 junk ^= back[i]; |
| 70 } |
36 } | 71 } |
37 blackhole ^= junk; | 72 blackhole ^= junk; |
38 } | 73 } |
| 74 }; |
39 | 75 |
40 bool fClamp; | 76 // Extra () help DEF_BENCH not get confused by the comma inside the <>. |
41 }; | 77 DEF_BENCH(return (new PMFloatBench< true, true>);) |
42 DEF_BENCH(return new PMFloatBench( true);) | 78 DEF_BENCH(return (new PMFloatBench<false, true>);) |
43 DEF_BENCH(return new PMFloatBench(false);) | 79 DEF_BENCH(return (new PMFloatBench< true, false>);) |
| 80 DEF_BENCH(return (new PMFloatBench<false, false>);) |
OLD | NEW |