Index: src/core/SkTaskGroup.h |
diff --git a/src/core/SkTaskGroup.h b/src/core/SkTaskGroup.h |
index 8c7369da25caa9b124bc3486f74ce701c086095e..3af64d775312f999c495302fd332343ad91c8b01 100644 |
--- a/src/core/SkTaskGroup.h |
+++ b/src/core/SkTaskGroup.h |
@@ -10,6 +10,7 @@ |
#include "SkTypes.h" |
#include "SkAtomics.h" |
+#include "SkTemplates.h" |
struct SkRunnable; |
@@ -49,4 +50,42 @@ private: |
SkAtomic<int32_t> fPending; |
}; |
+// Returns best estimate of number of CPU cores available to use. |
+int sk_num_cores(); |
+ |
+// Call f(i) for i in [0, end). |
+template <typename Func> |
+void sk_parallel_for(int end, const Func& f) { |
+ if (end <= 0) { return; } |
+ |
+ struct Chunk { |
+ const Func* f; |
+ int start, end; |
+ }; |
+ |
+ // TODO(mtklein): this chunking strategy could probably use some tuning. |
+ int max_chunks = sk_num_cores() * 2, |
+ stride = (end + max_chunks - 1 ) / max_chunks, |
+ nchunks = (end + stride - 1 ) / stride; |
+ SkASSERT(nchunks <= max_chunks); |
+ |
+ // With the chunking strategy above this won't malloc until we have a machine with >512 cores. |
+ SkAutoSTMalloc<1024, Chunk> chunks(nchunks); |
+ |
+ for (int i = 0; i < nchunks; i++) { |
+ Chunk& c = chunks[i]; |
+ c.f = &f; |
+ c.start = i * stride; |
+ c.end = SkTMin(c.start + stride, end); |
+ SkASSERT(c.start < c.end); // Nothing will break if start >= end, but it's a wasted chunk. |
+ } |
+ |
+ void(*run_chunk)(Chunk*) = [](Chunk* c) { |
+ for (int i = c->start; i < c->end; i++) { |
+ (*c->f)(i); |
+ } |
+ }; |
+ SkTaskGroup().batch(run_chunk, chunks.get(), nchunks); |
+} |
+ |
#endif//SkTaskGroup_DEFINED |