Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(94)

Side by Side Diff: native_client_sdk/src/examples/demo/life_simd/life.cc

Issue 451883002: Update SIMD version of life demo. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Created 6 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <assert.h>
6 #include <math.h>
7 #include <stdint.h>
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #include <sys/time.h>
12 #include <unistd.h>
13
14 #include <ppapi/c/ppb_input_event.h>
15 #include <ppapi/cpp/fullscreen.h>
16 #include <ppapi/cpp/input_event.h>
17 #include <ppapi/cpp/var.h>
18 #include <ppapi/cpp/var_array.h>
19 #include <ppapi/cpp/var_array_buffer.h>
20 #include <ppapi/cpp/var_dictionary.h>
21
22 #include "ppapi_simple/ps.h"
23 #include "ppapi_simple/ps_context_2d.h"
24 #include "ppapi_simple/ps_event.h"
25 #include "ppapi_simple/ps_instance.h"
26 #include "ppapi_simple/ps_interface.h"
27 #include "ppapi_simple/ps_main.h"
28 #include "sdk_util/macros.h"
29 #include "sdk_util/thread_pool.h"
30
31 using namespace sdk_util; // For sdk_util::ThreadPool
32
33 namespace {
34
35 #define INLINE inline __attribute__((always_inline))
36
37 // BGRA helper macro, for constructing a pixel for a BGRA buffer.
38 #define MakeBGRA(b, g, r, a) \
39 (((a) << 24) | ((r) << 16) | ((g) << 8) | (b))
40
41 const int kFramesToBenchmark = 100;
42 const int kCellAlignment = 0x10;
43
44 // 128 bit vector types
45 typedef uint8_t u8x16_t __attribute__ ((vector_size (16)));
46
47 // Helper function to broadcast x across 16 element vector.
48 INLINE u8x16_t broadcast(uint8_t x) {
49 u8x16_t r = {x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x};
50 return r;
51 }
52
53 // Convert a count value into a live (green) or dead color value.
54 const uint32_t kNeighborColors[] = {
55 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
56 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
57 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
58 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
59 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
60 MakeBGRA(0x00, 0xFF, 0x00, 0xFF),
61 MakeBGRA(0x00, 0xFF, 0x00, 0xFF),
62 MakeBGRA(0x00, 0xFF, 0x00, 0xFF),
63 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
64 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
65 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
66 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
67 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
68 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
69 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
70 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
71 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
72 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
73 };
74
75 // These represent the new health value of a cell based on its neighboring
76 // values. The health is binary: either alive or dead.
77 const uint8_t kIsAlive[] = {
78 0, 0, 0, 0, 0, 1, 1, 1, 0,
79 0, 0, 0, 0, 0, 0, 0, 0, 0
80 };
81
82 // Timer helper for benchmarking. Returns seconds elapsed since program start,
83 // as a double.
84 timeval start_tv;
85 int start_tv_retv = gettimeofday(&start_tv, NULL);
86
87 inline double getseconds() {
88 const double usec_to_sec = 0.000001;
89 timeval tv;
90 if ((0 == start_tv_retv) && (0 == gettimeofday(&tv, NULL)))
91 return (tv.tv_sec - start_tv.tv_sec) + tv.tv_usec * usec_to_sec;
92 return 0.0;
93 }
94 } // namespace
95
96
97 class Life {
98 public:
99 Life();
100 virtual ~Life();
101 // Runs a tick of the simulations, update 2D output.
102 void Update();
103 // Handle event from user, or message from JS.
104 void HandleEvent(PSEvent* ps_event);
105 private:
106 void UpdateContext();
107 void DrawCell(int32_t x, int32_t y);
108 void ProcessTouchEvent(pp::TouchInputEvent touches);
binji 2014/08/08 00:31:40 const pp::TouchInputEvent&
nfullagar 2014/08/08 20:51:48 Done.
109 void PostUpdateMessage(const char* message, double value);
110 void StartBenchmark();
111 void EndBenchmark();
112 void Stir();
113 void wSimulate(int y);
114 static void wSimulateEntry(int y, void* data);
115 void Simulate();
116
117 bool simd_;
118 bool multithread_;
119 bool benchmarking_;
120 int benchmark_frame_counter_;
121 double bench_start_time_;
122 double bench_end_time_;
123 uint8_t* cell_in_;
124 uint8_t* cell_out_;
125 int32_t cell_stride_;
126 int32_t width_;
127 int32_t height_;
128 PSContext2D_t* ps_context_;
129 ThreadPool* workers_;
130 };
131
132 Life::Life() :
133 simd_(true),
134 multithread_(true),
135 benchmarking_(false),
136 benchmark_frame_counter_(0),
137 bench_start_time_(0.0),
138 bench_end_time_(0.0),
139 cell_in_(NULL),
140 cell_out_(NULL),
141 cell_stride_(0),
142 width_(0),
143 height_(0) {
144 ps_context_ = PSContext2DAllocate(PP_IMAGEDATAFORMAT_BGRA_PREMUL);
145 // Query system for number of processors via sysconf()
146 int num_threads = sysconf(_SC_NPROCESSORS_ONLN);
147 if (num_threads < 2)
148 num_threads = 2;
149 workers_ = new ThreadPool(num_threads);
150 PSEventSetFilter(PSE_ALL);
151 }
152
153 Life::~Life() {
154 delete workers_;
155 PSContext2DFree(ps_context_);
156 }
157
158 void Life::UpdateContext() {
159 cell_stride_ = (ps_context_->width + kCellAlignment - 1) &
160 ~(kCellAlignment - 1);
binji 2014/08/08 00:31:40 whoops, bug in the previous version, hm?
161 size_t size = cell_stride_ * ps_context_->height;
162
163 if (ps_context_->width != width_ || ps_context_->height != height_) {
164 free(cell_in_);
165 free(cell_out_);
166
167 // Create a new context
168 void* in_buffer = NULL;
169 void* out_buffer = NULL;
170 // alloc buffers aligned on 16 bytes
171 posix_memalign(&in_buffer, kCellAlignment, size);
172 posix_memalign(&out_buffer, kCellAlignment, size);
173 cell_in_ = (uint8_t*) in_buffer;
174 cell_out_ = (uint8_t*) out_buffer;
175
176 memset(cell_out_, 0, size);
177 for (size_t index = 0; index < size; index++) {
178 cell_in_[index] = rand() & 1;
179 }
180 width_ = ps_context_->width;
181 height_ = ps_context_->height;
182 }
183 }
184
185 void Life::DrawCell(int32_t x, int32_t y) {
186 if (!cell_in_) return;
187 if (x > 0 && x < ps_context_->width - 1 &&
188 y > 0 && y < ps_context_->height - 1) {
189 cell_in_[x - 1 + y * cell_stride_] = 1;
190 cell_in_[x + 1 + y * cell_stride_] = 1;
191 cell_in_[x + (y - 1) * cell_stride_] = 1;
192 cell_in_[x + (y + 1) * cell_stride_] = 1;
193 }
194 }
195
196 void Life::ProcessTouchEvent(pp::TouchInputEvent touches) {
197 uint32_t count = touches.GetTouchCount(PP_TOUCHLIST_TYPE_TOUCHES);
198 uint32_t i, j;
199 for (i = 0; i < count; i++) {
200 pp::TouchPoint touch =
201 touches.GetTouchByIndex(PP_TOUCHLIST_TYPE_TOUCHES, i);
202 int radius = (int)(touch.radii().x());
203 int x = (int)(touch.position().x());
204 int y = (int)(touch.position().y());
205 // num = 1/100th the area of touch point
206 uint32_t num = (uint32_t)(M_PI * radius * radius / 100.0f);
207 for (j = 0; j < num; j++) {
208 int dx = rand() % (radius * 2) - radius;
209 int dy = rand() % (radius * 2) - radius;
210 // only plot random cells within the touch area
211 if (dx * dx + dy * dy <= radius * radius)
212 DrawCell(x + dx, y + dy);
213 }
214 }
215 }
216
217 void Life::PostUpdateMessage(const char* message_name, double value) {
218 pp::VarDictionary message;
219 message.Set("message", message_name);
220 message.Set("value", value);
221 PSInterfaceMessaging()->PostMessage(PSGetInstanceId(), message.pp_var());
222 }
223
224 void Life::StartBenchmark() {
225 printf("Running benchmark... (SIMD: %s, multi-threading: %s, size: %dx%d)\n",
226 simd_ ? "enabled" : "disabled",
227 multithread_ ? "enabled" : "disabled",
228 ps_context_->width,
229 ps_context_->height);
230 benchmarking_ = true;
231 bench_start_time_ = getseconds();
232 benchmark_frame_counter_ = kFramesToBenchmark;
233 }
234
235 void Life::EndBenchmark() {
236 double total_time;
237 bench_end_time_ = getseconds();
238 benchmarking_ = false;
239 total_time = bench_end_time_ - bench_start_time_;
240 printf("Finished - benchmark took %f seconds\n", total_time);
241 // Send benchmark result to JS.
242 PostUpdateMessage("benchmark_result", total_time);
243 }
244
245 void Life::HandleEvent(PSEvent* ps_event) {
246 // Give the 2D context a chance to process the event.
247 if (0 != PSContext2DHandleEvent(ps_context_, ps_event)) {
248 UpdateContext();
249 return;
250 }
251
252 switch(ps_event->type) {
253
254 case PSE_INSTANCE_HANDLEINPUT: {
255 pp::InputEvent event(ps_event->as_resource);
256
257 switch(event.GetType()) {
258 case PP_INPUTEVENT_TYPE_MOUSEDOWN:
259 case PP_INPUTEVENT_TYPE_MOUSEMOVE: {
260 pp::MouseInputEvent mouse = pp::MouseInputEvent(event);
261 // If the button is down, draw
262 if (mouse.GetModifiers() & PP_INPUTEVENT_MODIFIER_LEFTBUTTONDOWN) {
263 PP_Point location = mouse.GetPosition();
264 DrawCell(location.x, location.y);
265 }
266 break;
267 }
268
269 case PP_INPUTEVENT_TYPE_TOUCHSTART:
270 case PP_INPUTEVENT_TYPE_TOUCHMOVE: {
271 pp::TouchInputEvent touches = pp::TouchInputEvent(event);
272 ProcessTouchEvent(touches);
273 break;
274 }
275
276 case PP_INPUTEVENT_TYPE_KEYDOWN: {
277 pp::Fullscreen fullscreen(PSInstance::GetInstance());
278 bool isFullscreen = fullscreen.IsFullscreen();
279 fullscreen.SetFullscreen(!isFullscreen);
280 break;
281 }
282
283 default:
284 break;
285 }
286 break; // case PSE_INSTANCE_HANDLEINPUT
287 }
288
289 case PSE_INSTANCE_HANDLEMESSAGE: {
290 // Convert Pepper Simple message to PPAPI C++ vars
291 pp::Var var(ps_event->as_var);
292 if (var.is_dictionary()) {
293 pp::VarDictionary dictionary(var);
294 std::string message = dictionary.Get("message").AsString();
295 if (message == "run_benchmark" && !benchmarking_) {
296 StartBenchmark();
297 } else if (message == "set_simd") {
298 std::string value = dictionary.Get("value").AsString();
299 simd_ = value == "enable";
300 } else if (message == "set_threading") {
301 std::string value = dictionary.Get("value").AsString();
302 multithread_ = value == "enable";
303 }
304 }
305 break; // case PSE_INSTANCE_HANDLEMESSAGE
306 }
307
308 default:
309 break;
310 }
311 }
312
313 void Life::Stir() {
314 int32_t width = ps_context_->width;
315 int32_t height = ps_context_->height;
316 int32_t stride = cell_stride_;
317 int32_t i;
318 if (cell_in_ == NULL || cell_out_ == NULL)
319 return;
320
321 for (i = 0; i < width; ++i) {
322 cell_in_[i] = rand() & 1;
323 cell_in_[i + (height - 1) * stride] = rand() & 1;
324 }
325 for (i = 0; i < height; ++i) {
326 cell_in_[i * stride] = rand() & 1;
327 cell_in_[i * stride + (width - 1)] = rand() & 1;
328 }
329 }
330
331 void Life::wSimulate(int y) {
332 // Don't run simulation on top and bottom borders
333 if (y < 1 || y >= ps_context_->height - 1)
334 return;
335
336 // Do neighbor summation; apply rules, output pixel color. Note that a 1 cell
337 // wide perimeter is excluded from the simulation update; only cells from
338 // x = 1 to x < width - 1 and y = 1 to y < height - 1 are updated.
339 uint8_t *src0 = (cell_in_ + (y - 1) * cell_stride_);
340 uint8_t *src1 = src0 + cell_stride_;
341 uint8_t *src2 = src1 + cell_stride_;
342 uint8_t *dst = (cell_out_ + y * cell_stride_) + 1;
343 uint32_t *pixels = static_cast<uint32_t *>(ps_context_->data);
344 uint32_t *pixel_line = static_cast<uint32_t*>
binji 2014/08/08 00:31:40 cast isn't needed, right?
345 (pixels + y * ps_context_->stride / sizeof(uint32_t));
346 int32_t x = 1;
347
348 if (simd_) {
349 const u8x16_t kOne = broadcast(1);
350 const u8x16_t kFour = broadcast(4);
351 const u8x16_t kEight = broadcast(8);
352 const u8x16_t kZero255 = {0, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
353
354 // Prime the src
355 u8x16_t src00 = *reinterpret_cast<u8x16_t*>(&src0[0]);
356 u8x16_t src01 = *reinterpret_cast<u8x16_t*>(&src0[16]);
357 u8x16_t src10 = *reinterpret_cast<u8x16_t*>(&src1[0]);
358 u8x16_t src11 = *reinterpret_cast<u8x16_t*>(&src1[16]);
359 u8x16_t src20 = *reinterpret_cast<u8x16_t*>(&src2[0]);
360 u8x16_t src21 = *reinterpret_cast<u8x16_t*>(&src2[16]);
361
362 // This inner loop is SIMD - each loop iteration will process 16 cells.
363 for (; (x + 15) < (ps_context_->width - 1); x += 16) {
364
365 // Construct jittered source temps, using __builtin_shufflevector(..) to
366 // extract a shifted 16 element vector from the 32 element concatenation
367 // of two source vectors.
368 u8x16_t src0j0 = src00;
369 u8x16_t src0j1 = __builtin_shufflevector(src00, src01,
370 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
371 u8x16_t src0j2 = __builtin_shufflevector(src00, src01,
372 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
373 u8x16_t src1j0 = src10;
374 u8x16_t src1j1 = __builtin_shufflevector(src10, src11,
375 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
376 u8x16_t src1j2 = __builtin_shufflevector(src10, src11,
377 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
378 u8x16_t src2j0 = src20;
379 u8x16_t src2j1 = __builtin_shufflevector(src20, src21,
380 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
381 u8x16_t src2j2 = __builtin_shufflevector(src20, src21,
382 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
383
384 // Sum the jittered sources to construct neighbor count.
385 u8x16_t count = src0j0 + src0j1 + src0j2 +
386 src1j0 + + src1j2 +
387 src2j0 + src2j1 + src2j2;
388 // Add the center cell.
389 count = count + count + src1j1;
390 // If count > 4 and < 8, center cell will be alive in the next frame.
391 u8x16_t alive1 = count > kFour;
392 u8x16_t alive2 = count < kEight;
393 // Intersect the two comparisons from above.
394 u8x16_t alive = alive1 & alive2;
395
396 // At this point, alive[x] will be one of two values:
397 // 0x00 for a dead cell
398 // 0xFF for an alive cell.
399 //
400 // Next, convert alive cells to green pixel color.
401 // Use __builtin_shufflevector(..) to construct output pixels from
402 // concantination of alive vector and kZero255 const vector.
403 // Indices 0..15 select the 16 cells from alive vector.
404 // Index 16 is zero constant from kZero255 constant vector.
405 // Index 17 is 255 constant from kZero255 constant vector.
406 // Output pixel color values are in BGRABGRABGRABGRA order.
407 // Since each pixel needs 4 bytes of color information, 16 cells will
408 // need to expand to 4 seperate 16 byte pixel splats.
409 u8x16_t pixel0_3 = __builtin_shufflevector(alive, kZero255,
410 16, 0, 16, 17, 16, 1, 16, 17, 16, 2, 16, 17, 16, 3, 16, 17);
411 u8x16_t pixel4_7 = __builtin_shufflevector(alive, kZero255,
412 16, 4, 16, 17, 16, 5, 16, 17, 16, 6, 16, 17, 16, 7, 16, 17);
413 u8x16_t pixel8_11 = __builtin_shufflevector(alive, kZero255,
414 16, 8, 16, 17, 16, 9, 16, 17, 16, 10, 16, 17, 16, 11, 16, 17);
415 u8x16_t pixel12_15 = __builtin_shufflevector(alive, kZero255,
416 16, 12, 16, 17, 16, 13, 16, 17, 16, 14, 16, 17, 16, 15, 16, 17);
417
418 // Write 16 pixels to output pixel buffer.
419 *reinterpret_cast<u8x16_t*>(pixel_line + 0) = pixel0_3;
420 *reinterpret_cast<u8x16_t*>(pixel_line + 4) = pixel4_7;
421 *reinterpret_cast<u8x16_t*>(pixel_line + 8) = pixel8_11;
422 *reinterpret_cast<u8x16_t*>(pixel_line + 12) = pixel12_15;
423
424 // Convert alive mask to 1 or 0 and store in destination cell array.
425 *reinterpret_cast<u8x16_t*>(dst) = alive & kOne;
426
427 // Increment pointers.
428 pixel_line += 16;
429 dst += 16;
430 src0 += 16;
431 src1 += 16;
432 src2 += 16;
433
434 // Shift source over by 16 cells and read the next 16 cells.
435 src00 = src01;
436 src01 = *reinterpret_cast<u8x16_t*>(&src0[16]);
437 src10 = src11;
438 src11 = *reinterpret_cast<u8x16_t*>(&src1[16]);
439 src20 = src21;
440 src21 = *reinterpret_cast<u8x16_t*>(&src2[16]);
441 }
442 }
443
444 // The SIMD loop above does 16 cells at a time. The loop below is the
445 // regular version which processes one cell at a time. It is used to
446 // finish the remainder of the scanline not handled by the SIMD loop.
447 for (; x < (ps_context_->width - 1); ++x) {
448 // Sum the jittered sources to construct neighbor count.
449 int count = src0[0] + src0[1] + src0[2] +
450 src1[0] + + src1[2] +
451 src2[0] + src2[1] + src2[2];
452 // Add the center cell.
453 count = count + count + src1[1];
454 // Use table lookup indexed by count to determine pixel & alive state.
455 uint32_t color = kNeighborColors[count];
456 *pixel_line++ = color;
457 *dst++ = kIsAlive[count];
458 ++src0;
459 ++src1;
460 ++src2;
461 }
462 }
463
464 // Static entry point for worker thread.
465 void Life::wSimulateEntry(int slice, void* thiz) {
466 static_cast<Life*>(thiz)->wSimulate(slice);
467 }
468
469 void Life::Simulate() {
470 // Stir up the edges to prevent the simulation from reaching steady state.
471 Stir();
472
473 if (multithread_) {
474 // If multi-threading enabled, dispatch tasks to pool of worker threads.
475 workers_->Dispatch(ps_context_->height, wSimulateEntry, this);
476 } else {
477 // Else manually simulate each line on this thread.
478 for (int y = 0; y < ps_context_->height; y++) {
479 wSimulateEntry(y, this);
480 }
481 }
482 std::swap(cell_in_, cell_out_);
483 }
484
485 void Life::Update() {
486
487 PSContext2DGetBuffer(ps_context_);
488 if (NULL == ps_context_->data)
489 return;
490
491 // If we somehow have not allocated these pointers yet, skip this frame.
492 if (!cell_in_ || !cell_out_) return;
493
494 // Simulate one (or more if benchmarking) frames
495 do {
496 Simulate();
497 if (!benchmarking_)
498 break;
499 --benchmark_frame_counter_;
500 } while(benchmark_frame_counter_ > 0);
501 if (benchmarking_)
502 EndBenchmark();
503
504 PSContext2DSwapBuffer(ps_context_);
505 }
506
507 // Starting point for the module. We do not use main since it would
508 // collide with main in libppapi_cpp.
509 int example_main(int argc, char* argv[]) {
510 Life life;
511 while (true) {
512 PSEvent* ps_event;
513 // Consume all available events
514 while ((ps_event = PSEventTryAcquire()) != NULL) {
515 life.HandleEvent(ps_event);
516 PSEventRelease(ps_event);
517 }
518 // Do simulation, render and present.
519 life.Update();
520 }
521 return 0;
522 }
523
524 // Register the function to call once the Instance Object is initialized.
525 // see: pappi_simple/ps_main.h
526 PPAPI_SIMPLE_REGISTER_MAIN(example_main);
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698