Index: src/core/SkTaskGroup.h |
diff --git a/src/core/SkTaskGroup.h b/src/core/SkTaskGroup.h |
index 14a52c6aac1aadd1d3393466200fec43f4bf0e45..9011a15f5449e3c6776a47e8fc6e696d75b31663 100644 |
--- a/src/core/SkTaskGroup.h |
+++ b/src/core/SkTaskGroup.h |
@@ -9,6 +9,7 @@ |
#define SkTaskGroup_DEFINED |
#include "SkTypes.h" |
+#include "SkTemplates.h" |
struct SkRunnable; |
@@ -48,4 +49,42 @@ private: |
/*atomic*/ int32_t fPending; |
}; |
+// Returns best estimate of number of CPU cores available to use. |
+int sk_num_cores(); |
+ |
+// Call f(i) for i in [0, end). |
+template <typename Func> |
+void sk_parallel_for(int end, const Func& f) { |
+ if (end <= 0) { return; } |
+ |
+ struct Chunk { |
+ const Func* f; |
+ int start, end; |
+ }; |
+ |
+ // TODO(mtklein): this chunking strategy could probably use some tuning. |
+ int max_chunks = sk_num_cores() * 2, |
+ stride = (end + max_chunks - 1 ) / max_chunks, |
+ nchunks = (end + stride - 1 ) / stride; |
+ SkASSERT(nchunks <= max_chunks); |
+ |
+ // With the chunking strategy above this won't malloc until we have a machine with >512 cores. |
+ SkAutoSTMalloc<1024, Chunk> chunks(nchunks); |
+ |
+ for (int i = 0; i < nchunks; i++) { |
+ Chunk& c = chunks[i]; |
+ c.f = &f; |
+ c.start = i * stride; |
+ c.end = SkTMin(c.start + stride, end); |
+ SkASSERT(c.start < c.end); // Nothing will break if start >= end, but it's a wasted chunk. |
+ } |
+ |
+ void(*run_chunk)(Chunk*) = [](Chunk* c) { |
+ for (int i = c->start; i < c->end; i++) { |
+ (*c->f)(i); |
+ } |
+ }; |
+ SkTaskGroup().batch(run_chunk, chunks.get(), nchunks); |
+} |
+ |
#endif//SkTaskGroup_DEFINED |