| Index: src/core/SkTaskGroup.h
|
| diff --git a/src/core/SkTaskGroup.h b/src/core/SkTaskGroup.h
|
| index 8c7369da25caa9b124bc3486f74ce701c086095e..3af64d775312f999c495302fd332343ad91c8b01 100644
|
| --- a/src/core/SkTaskGroup.h
|
| +++ b/src/core/SkTaskGroup.h
|
| @@ -10,6 +10,7 @@
|
|
|
| #include "SkTypes.h"
|
| #include "SkAtomics.h"
|
| +#include "SkTemplates.h"
|
|
|
| struct SkRunnable;
|
|
|
| @@ -49,4 +50,42 @@ private:
|
| SkAtomic<int32_t> fPending;
|
| };
|
|
|
| +// Returns best estimate of number of CPU cores available to use.
|
| +int sk_num_cores();
|
| +
|
| +// Call f(i) for i in [0, end).
|
| +template <typename Func>
|
| +void sk_parallel_for(int end, const Func& f) {
|
| + if (end <= 0) { return; }
|
| +
|
| + struct Chunk {
|
| + const Func* f;
|
| + int start, end;
|
| + };
|
| +
|
| + // TODO(mtklein): this chunking strategy could probably use some tuning.
|
| + int max_chunks = sk_num_cores() * 2,
|
| + stride = (end + max_chunks - 1 ) / max_chunks,
|
| + nchunks = (end + stride - 1 ) / stride;
|
| + SkASSERT(nchunks <= max_chunks);
|
| +
|
| + // With the chunking strategy above this won't malloc until we have a machine with >512 cores.
|
| + SkAutoSTMalloc<1024, Chunk> chunks(nchunks);
|
| +
|
| + for (int i = 0; i < nchunks; i++) {
|
| + Chunk& c = chunks[i];
|
| + c.f = &f;
|
| + c.start = i * stride;
|
| + c.end = SkTMin(c.start + stride, end);
|
| + SkASSERT(c.start < c.end); // Nothing will break if start >= end, but it's a wasted chunk.
|
| + }
|
| +
|
| + void(*run_chunk)(Chunk*) = [](Chunk* c) {
|
| + for (int i = c->start; i < c->end; i++) {
|
| + (*c->f)(i);
|
| + }
|
| + };
|
| + SkTaskGroup().batch(run_chunk, chunks.get(), nchunks);
|
| +}
|
| +
|
| #endif//SkTaskGroup_DEFINED
|
|
|