OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2014 Google Inc. | 2 * Copyright 2014 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #ifndef SkTaskGroup_DEFINED | 8 #ifndef SkTaskGroup_DEFINED |
9 #define SkTaskGroup_DEFINED | 9 #define SkTaskGroup_DEFINED |
10 | 10 |
11 #include "SkTypes.h" | 11 #include "SkTypes.h" |
| 12 #include "SkTemplates.h" |
12 | 13 |
13 struct SkRunnable; | 14 struct SkRunnable; |
14 | 15 |
15 class SkTaskGroup : SkNoncopyable { | 16 class SkTaskGroup : SkNoncopyable { |
16 public: | 17 public: |
17 // Create one of these in main() to enable SkTaskGroups globally. | 18 // Create one of these in main() to enable SkTaskGroups globally. |
18 struct Enabler : SkNoncopyable { | 19 struct Enabler : SkNoncopyable { |
19 explicit Enabler(int threads = -1); // Default is system-reported core
count. | 20 explicit Enabler(int threads = -1); // Default is system-reported core
count. |
20 ~Enabler(); | 21 ~Enabler(); |
21 }; | 22 }; |
(...skipping 19 matching lines...) Expand all Loading... |
41 | 42 |
42 private: | 43 private: |
43 typedef void(*void_fn)(void*); | 44 typedef void(*void_fn)(void*); |
44 | 45 |
45 void add (void_fn, void* arg); | 46 void add (void_fn, void* arg); |
46 void batch(void_fn, void* args, int N, size_t stride); | 47 void batch(void_fn, void* args, int N, size_t stride); |
47 | 48 |
48 /*atomic*/ int32_t fPending; | 49 /*atomic*/ int32_t fPending; |
49 }; | 50 }; |
50 | 51 |
| 52 // Returns best estimate of number of CPU cores available to use. |
| 53 int sk_num_cores(); |
| 54 |
| 55 // Call f(i) for i in [0, end). |
| 56 template <typename Func> |
| 57 void sk_parallel_for(int end, const Func& f) { |
| 58 if (end <= 0) { return; } |
| 59 |
| 60 struct Chunk { |
| 61 const Func* f; |
| 62 int start, end; |
| 63 }; |
| 64 |
| 65 // TODO(mtklein): this chunking strategy could probably use some tuning. |
| 66 int max_chunks = sk_num_cores() * 2, |
| 67 stride = (end + max_chunks - 1 ) / max_chunks, |
| 68 nchunks = (end + stride - 1 ) / stride; |
| 69 SkASSERT(nchunks <= max_chunks); |
| 70 |
| 71 // With the chunking strategy above this won't malloc until we have a machin
e with >512 cores. |
| 72 SkAutoSTMalloc<1024, Chunk> chunks(nchunks); |
| 73 |
| 74 for (int i = 0; i < nchunks; i++) { |
| 75 Chunk& c = chunks[i]; |
| 76 c.f = &f; |
| 77 c.start = i * stride; |
| 78 c.end = SkTMin(c.start + stride, end); |
| 79 SkASSERT(c.start < c.end); // Nothing will break if start >= end, but i
t's a wasted chunk. |
| 80 } |
| 81 |
| 82 void(*run_chunk)(Chunk*) = [](Chunk* c) { |
| 83 for (int i = c->start; i < c->end; i++) { |
| 84 (*c->f)(i); |
| 85 } |
| 86 }; |
| 87 SkTaskGroup().batch(run_chunk, chunks.get(), nchunks); |
| 88 } |
| 89 |
51 #endif//SkTaskGroup_DEFINED | 90 #endif//SkTaskGroup_DEFINED |
OLD | NEW |