Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(13)

Side by Side Diff: native_client_sdk/src/examples/demo/life_simd/life.cc

Issue 451883002: Update SIMD version of life demo. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: remove commented out js Created 6 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « native_client_sdk/src/examples/demo/life_simd/life.c ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <assert.h>
6 #include <math.h>
7 #include <stdint.h>
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #include <sys/time.h>
12 #include <unistd.h>
13
14 #include <ppapi/c/ppb_input_event.h>
15 #include <ppapi/cpp/fullscreen.h>
16 #include <ppapi/cpp/input_event.h>
17 #include <ppapi/cpp/var.h>
18 #include <ppapi/cpp/var_array.h>
19 #include <ppapi/cpp/var_array_buffer.h>
20 #include <ppapi/cpp/var_dictionary.h>
21
22 #include "ppapi_simple/ps.h"
23 #include "ppapi_simple/ps_context_2d.h"
24 #include "ppapi_simple/ps_event.h"
25 #include "ppapi_simple/ps_instance.h"
26 #include "ppapi_simple/ps_interface.h"
27 #include "ppapi_simple/ps_main.h"
28 #include "sdk_util/macros.h"
29 #include "sdk_util/thread_pool.h"
30
31 using namespace sdk_util; // For sdk_util::ThreadPool
32
33 namespace {
34
35 #define INLINE inline __attribute__((always_inline))
36
37 // BGRA helper macro, for constructing a pixel for a BGRA buffer.
38 #define MakeBGRA(b, g, r, a) \
39 (((a) << 24) | ((r) << 16) | ((g) << 8) | (b))
40
41 const int kFramesToBenchmark = 100;
42 const int kCellAlignment = 0x10;
43
44 // 128 bit vector types
45 typedef uint8_t u8x16_t __attribute__ ((vector_size (16)));
46
47 // Helper function to broadcast x across 16 element vector.
48 INLINE u8x16_t broadcast(uint8_t x) {
49 u8x16_t r = {x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x};
50 return r;
51 }
52
53 // Convert a count value into a live (green) or dead color value.
54 const uint32_t kNeighborColors[] = {
55 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
56 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
57 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
58 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
59 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
60 MakeBGRA(0x00, 0xFF, 0x00, 0xFF),
61 MakeBGRA(0x00, 0xFF, 0x00, 0xFF),
62 MakeBGRA(0x00, 0xFF, 0x00, 0xFF),
63 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
64 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
65 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
66 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
67 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
68 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
69 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
70 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
71 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
72 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
73 };
74
75 // These represent the new health value of a cell based on its neighboring
76 // values. The health is binary: either alive or dead.
77 const uint8_t kIsAlive[] = {
78 0, 0, 0, 0, 0, 1, 1, 1, 0,
79 0, 0, 0, 0, 0, 0, 0, 0, 0
80 };
81
82 // Timer helper for benchmarking. Returns seconds elapsed since program start,
83 // as a double.
84 timeval start_tv;
85 int start_tv_retv = gettimeofday(&start_tv, NULL);
86
87 inline double getseconds() {
88 const double usec_to_sec = 0.000001;
89 timeval tv;
90 if ((0 == start_tv_retv) && (0 == gettimeofday(&tv, NULL)))
91 return (tv.tv_sec - start_tv.tv_sec) + tv.tv_usec * usec_to_sec;
92 return 0.0;
93 }
94 } // namespace
95
96
97 class Life {
98 public:
99 Life();
100 virtual ~Life();
101 // Runs a tick of the simulations, update 2D output.
102 void Update();
103 // Handle event from user, or message from JS.
104 void HandleEvent(PSEvent* ps_event);
105 private:
106 void UpdateContext();
107 void DrawCell(int32_t x, int32_t y);
108 void ProcessTouchEvent(const pp::TouchInputEvent& touches);
109 void PostUpdateMessage(const char* message, double value);
110 void StartBenchmark();
111 void EndBenchmark();
112 void Stir();
113 void wSimulate(int y);
114 static void wSimulateEntry(int y, void* data);
115 void Simulate();
116
117 bool simd_;
118 bool multithread_;
119 bool benchmarking_;
120 int benchmark_frame_counter_;
121 double bench_start_time_;
122 double bench_end_time_;
123 uint8_t* cell_in_;
124 uint8_t* cell_out_;
125 int32_t cell_stride_;
126 int32_t width_;
127 int32_t height_;
128 PSContext2D_t* ps_context_;
129 ThreadPool* workers_;
130 };
131
132 Life::Life() :
133 simd_(true),
134 multithread_(true),
135 benchmarking_(false),
136 benchmark_frame_counter_(0),
137 bench_start_time_(0.0),
138 bench_end_time_(0.0),
139 cell_in_(NULL),
140 cell_out_(NULL),
141 cell_stride_(0),
142 width_(0),
143 height_(0) {
144 ps_context_ = PSContext2DAllocate(PP_IMAGEDATAFORMAT_BGRA_PREMUL);
145 // Query system for number of processors via sysconf()
146 int num_threads = sysconf(_SC_NPROCESSORS_ONLN);
147 if (num_threads < 2)
148 num_threads = 2;
149 workers_ = new ThreadPool(num_threads);
150 PSEventSetFilter(PSE_ALL);
151 }
152
153 Life::~Life() {
154 delete workers_;
155 PSContext2DFree(ps_context_);
156 }
157
158 void Life::UpdateContext() {
159 cell_stride_ = (ps_context_->width + kCellAlignment - 1) &
160 ~(kCellAlignment - 1);
161 size_t size = cell_stride_ * ps_context_->height;
162
163 if (ps_context_->width != width_ || ps_context_->height != height_) {
164 free(cell_in_);
165 free(cell_out_);
166
167 // Create a new context
168 void* in_buffer = NULL;
169 void* out_buffer = NULL;
170 // alloc buffers aligned on 16 bytes
171 posix_memalign(&in_buffer, kCellAlignment, size);
172 posix_memalign(&out_buffer, kCellAlignment, size);
173 cell_in_ = (uint8_t*) in_buffer;
174 cell_out_ = (uint8_t*) out_buffer;
175
176 memset(cell_out_, 0, size);
177 for (size_t index = 0; index < size; index++) {
178 cell_in_[index] = rand() & 1;
179 }
180 width_ = ps_context_->width;
181 height_ = ps_context_->height;
182 }
183 }
184
185 void Life::DrawCell(int32_t x, int32_t y) {
186 if (!cell_in_) return;
187 if (x > 0 && x < ps_context_->width - 1 &&
188 y > 0 && y < ps_context_->height - 1) {
189 cell_in_[x - 1 + y * cell_stride_] = 1;
190 cell_in_[x + 1 + y * cell_stride_] = 1;
191 cell_in_[x + (y - 1) * cell_stride_] = 1;
192 cell_in_[x + (y + 1) * cell_stride_] = 1;
193 }
194 }
195
196 void Life::ProcessTouchEvent(const pp::TouchInputEvent& touches) {
197 uint32_t count = touches.GetTouchCount(PP_TOUCHLIST_TYPE_TOUCHES);
198 uint32_t i, j;
199 for (i = 0; i < count; i++) {
200 pp::TouchPoint touch =
201 touches.GetTouchByIndex(PP_TOUCHLIST_TYPE_TOUCHES, i);
202 int radius = (int)(touch.radii().x());
203 int x = (int)(touch.position().x());
204 int y = (int)(touch.position().y());
205 // num = 1/100th the area of touch point
206 uint32_t num = (uint32_t)(M_PI * radius * radius / 100.0f);
207 for (j = 0; j < num; j++) {
208 int dx = rand() % (radius * 2) - radius;
209 int dy = rand() % (radius * 2) - radius;
210 // only plot random cells within the touch area
211 if (dx * dx + dy * dy <= radius * radius)
212 DrawCell(x + dx, y + dy);
213 }
214 }
215 }
216
217 void Life::PostUpdateMessage(const char* message_name, double value) {
218 pp::VarDictionary message;
219 message.Set("message", message_name);
220 message.Set("value", value);
221 PSInterfaceMessaging()->PostMessage(PSGetInstanceId(), message.pp_var());
222 }
223
224 void Life::StartBenchmark() {
225 printf("Running benchmark... (SIMD: %s, multi-threading: %s, size: %dx%d)\n",
226 simd_ ? "enabled" : "disabled",
227 multithread_ ? "enabled" : "disabled",
228 ps_context_->width,
229 ps_context_->height);
230 benchmarking_ = true;
231 bench_start_time_ = getseconds();
232 benchmark_frame_counter_ = kFramesToBenchmark;
233 }
234
235 void Life::EndBenchmark() {
236 double total_time;
237 bench_end_time_ = getseconds();
238 benchmarking_ = false;
239 total_time = bench_end_time_ - bench_start_time_;
240 printf("Finished - benchmark took %f seconds\n", total_time);
241 // Send benchmark result to JS.
242 PostUpdateMessage("benchmark_result", total_time);
243 }
244
245 void Life::HandleEvent(PSEvent* ps_event) {
246 // Give the 2D context a chance to process the event.
247 if (0 != PSContext2DHandleEvent(ps_context_, ps_event)) {
248 UpdateContext();
249 return;
250 }
251
252 switch(ps_event->type) {
253
254 case PSE_INSTANCE_HANDLEINPUT: {
255 pp::InputEvent event(ps_event->as_resource);
256
257 switch(event.GetType()) {
258 case PP_INPUTEVENT_TYPE_MOUSEDOWN:
259 case PP_INPUTEVENT_TYPE_MOUSEMOVE: {
260 pp::MouseInputEvent mouse = pp::MouseInputEvent(event);
261 // If the button is down, draw
262 if (mouse.GetModifiers() & PP_INPUTEVENT_MODIFIER_LEFTBUTTONDOWN) {
263 PP_Point location = mouse.GetPosition();
264 DrawCell(location.x, location.y);
265 }
266 break;
267 }
268
269 case PP_INPUTEVENT_TYPE_TOUCHSTART:
270 case PP_INPUTEVENT_TYPE_TOUCHMOVE: {
271 pp::TouchInputEvent touches = pp::TouchInputEvent(event);
272 ProcessTouchEvent(touches);
273 break;
274 }
275
276 case PP_INPUTEVENT_TYPE_KEYDOWN: {
277 pp::Fullscreen fullscreen(PSInstance::GetInstance());
278 bool isFullscreen = fullscreen.IsFullscreen();
279 fullscreen.SetFullscreen(!isFullscreen);
280 break;
281 }
282
283 default:
284 break;
285 }
286 break; // case PSE_INSTANCE_HANDLEINPUT
287 }
288
289 case PSE_INSTANCE_HANDLEMESSAGE: {
290 // Convert Pepper Simple message to PPAPI C++ vars
291 pp::Var var(ps_event->as_var);
292 if (var.is_dictionary()) {
293 pp::VarDictionary dictionary(var);
294 std::string message = dictionary.Get("message").AsString();
295 if (message == "run_benchmark" && !benchmarking_) {
296 StartBenchmark();
297 } else if (message == "set_simd") {
298 simd_ = dictionary.Get("value").AsBool();
299 } else if (message == "set_threading") {
300 multithread_ = dictionary.Get("value").AsBool();
301 }
302 }
303 break; // case PSE_INSTANCE_HANDLEMESSAGE
304 }
305
306 default:
307 break;
308 }
309 }
310
311 void Life::Stir() {
312 int32_t width = ps_context_->width;
313 int32_t height = ps_context_->height;
314 int32_t stride = cell_stride_;
315 int32_t i;
316 if (cell_in_ == NULL || cell_out_ == NULL)
317 return;
318
319 for (i = 0; i < width; ++i) {
320 cell_in_[i] = rand() & 1;
321 cell_in_[i + (height - 1) * stride] = rand() & 1;
322 }
323 for (i = 0; i < height; ++i) {
324 cell_in_[i * stride] = rand() & 1;
325 cell_in_[i * stride + (width - 1)] = rand() & 1;
326 }
327 }
328
329 void Life::wSimulate(int y) {
330 // Don't run simulation on top and bottom borders
331 if (y < 1 || y >= ps_context_->height - 1)
332 return;
333
334 // Do neighbor summation; apply rules, output pixel color. Note that a 1 cell
335 // wide perimeter is excluded from the simulation update; only cells from
336 // x = 1 to x < width - 1 and y = 1 to y < height - 1 are updated.
337 uint8_t *src0 = (cell_in_ + (y - 1) * cell_stride_);
338 uint8_t *src1 = src0 + cell_stride_;
339 uint8_t *src2 = src1 + cell_stride_;
340 uint8_t *dst = (cell_out_ + y * cell_stride_) + 1;
341 uint32_t *pixels = static_cast<uint32_t *>(ps_context_->data);
342 uint32_t *pixel_line = // static_cast<uint32_t*>
343 (pixels + y * ps_context_->stride / sizeof(uint32_t));
344 int32_t x = 1;
345
346 if (simd_) {
347 const u8x16_t kOne = broadcast(1);
348 const u8x16_t kFour = broadcast(4);
349 const u8x16_t kEight = broadcast(8);
350 const u8x16_t kZero255 = {0, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
351
352 // Prime the src
353 u8x16_t src00 = *reinterpret_cast<u8x16_t*>(&src0[0]);
354 u8x16_t src01 = *reinterpret_cast<u8x16_t*>(&src0[16]);
355 u8x16_t src10 = *reinterpret_cast<u8x16_t*>(&src1[0]);
356 u8x16_t src11 = *reinterpret_cast<u8x16_t*>(&src1[16]);
357 u8x16_t src20 = *reinterpret_cast<u8x16_t*>(&src2[0]);
358 u8x16_t src21 = *reinterpret_cast<u8x16_t*>(&src2[16]);
359
360 // This inner loop is SIMD - each loop iteration will process 16 cells.
361 for (; (x + 15) < (ps_context_->width - 1); x += 16) {
362
363 // Construct jittered source temps, using __builtin_shufflevector(..) to
364 // extract a shifted 16 element vector from the 32 element concatenation
365 // of two source vectors.
366 u8x16_t src0j0 = src00;
367 u8x16_t src0j1 = __builtin_shufflevector(src00, src01,
368 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
369 u8x16_t src0j2 = __builtin_shufflevector(src00, src01,
370 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
371 u8x16_t src1j0 = src10;
372 u8x16_t src1j1 = __builtin_shufflevector(src10, src11,
373 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
374 u8x16_t src1j2 = __builtin_shufflevector(src10, src11,
375 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
376 u8x16_t src2j0 = src20;
377 u8x16_t src2j1 = __builtin_shufflevector(src20, src21,
378 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
379 u8x16_t src2j2 = __builtin_shufflevector(src20, src21,
380 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
381
382 // Sum the jittered sources to construct neighbor count.
383 u8x16_t count = src0j0 + src0j1 + src0j2 +
384 src1j0 + + src1j2 +
385 src2j0 + src2j1 + src2j2;
386 // Add the center cell.
387 count = count + count + src1j1;
388 // If count > 4 and < 8, center cell will be alive in the next frame.
389 u8x16_t alive1 = count > kFour;
390 u8x16_t alive2 = count < kEight;
391 // Intersect the two comparisons from above.
392 u8x16_t alive = alive1 & alive2;
393
394 // At this point, alive[x] will be one of two values:
395 // 0x00 for a dead cell
396 // 0xFF for an alive cell.
397 //
398 // Next, convert alive cells to green pixel color.
399 // Use __builtin_shufflevector(..) to construct output pixels from
400 // concantination of alive vector and kZero255 const vector.
401 // Indices 0..15 select the 16 cells from alive vector.
402 // Index 16 is zero constant from kZero255 constant vector.
403 // Index 17 is 255 constant from kZero255 constant vector.
404 // Output pixel color values are in BGRABGRABGRABGRA order.
405 // Since each pixel needs 4 bytes of color information, 16 cells will
406 // need to expand to 4 seperate 16 byte pixel splats.
407 u8x16_t pixel0_3 = __builtin_shufflevector(alive, kZero255,
408 16, 0, 16, 17, 16, 1, 16, 17, 16, 2, 16, 17, 16, 3, 16, 17);
409 u8x16_t pixel4_7 = __builtin_shufflevector(alive, kZero255,
410 16, 4, 16, 17, 16, 5, 16, 17, 16, 6, 16, 17, 16, 7, 16, 17);
411 u8x16_t pixel8_11 = __builtin_shufflevector(alive, kZero255,
412 16, 8, 16, 17, 16, 9, 16, 17, 16, 10, 16, 17, 16, 11, 16, 17);
413 u8x16_t pixel12_15 = __builtin_shufflevector(alive, kZero255,
414 16, 12, 16, 17, 16, 13, 16, 17, 16, 14, 16, 17, 16, 15, 16, 17);
415
416 // Write 16 pixels to output pixel buffer.
417 *reinterpret_cast<u8x16_t*>(pixel_line + 0) = pixel0_3;
418 *reinterpret_cast<u8x16_t*>(pixel_line + 4) = pixel4_7;
419 *reinterpret_cast<u8x16_t*>(pixel_line + 8) = pixel8_11;
420 *reinterpret_cast<u8x16_t*>(pixel_line + 12) = pixel12_15;
421
422 // Convert alive mask to 1 or 0 and store in destination cell array.
423 *reinterpret_cast<u8x16_t*>(dst) = alive & kOne;
424
425 // Increment pointers.
426 pixel_line += 16;
427 dst += 16;
428 src0 += 16;
429 src1 += 16;
430 src2 += 16;
431
432 // Shift source over by 16 cells and read the next 16 cells.
433 src00 = src01;
434 src01 = *reinterpret_cast<u8x16_t*>(&src0[16]);
435 src10 = src11;
436 src11 = *reinterpret_cast<u8x16_t*>(&src1[16]);
437 src20 = src21;
438 src21 = *reinterpret_cast<u8x16_t*>(&src2[16]);
439 }
440 }
441
442 // The SIMD loop above does 16 cells at a time. The loop below is the
443 // regular version which processes one cell at a time. It is used to
444 // finish the remainder of the scanline not handled by the SIMD loop.
445 for (; x < (ps_context_->width - 1); ++x) {
446 // Sum the jittered sources to construct neighbor count.
447 int count = src0[0] + src0[1] + src0[2] +
448 src1[0] + + src1[2] +
449 src2[0] + src2[1] + src2[2];
450 // Add the center cell.
451 count = count + count + src1[1];
452 // Use table lookup indexed by count to determine pixel & alive state.
453 uint32_t color = kNeighborColors[count];
454 *pixel_line++ = color;
455 *dst++ = kIsAlive[count];
456 ++src0;
457 ++src1;
458 ++src2;
459 }
460 }
461
462 // Static entry point for worker thread.
463 void Life::wSimulateEntry(int slice, void* thiz) {
464 static_cast<Life*>(thiz)->wSimulate(slice);
465 }
466
467 void Life::Simulate() {
468 // Stir up the edges to prevent the simulation from reaching steady state.
469 Stir();
470
471 if (multithread_) {
472 // If multi-threading enabled, dispatch tasks to pool of worker threads.
473 workers_->Dispatch(ps_context_->height, wSimulateEntry, this);
474 } else {
475 // Else manually simulate each line on this thread.
476 for (int y = 0; y < ps_context_->height; y++) {
477 wSimulateEntry(y, this);
478 }
479 }
480 std::swap(cell_in_, cell_out_);
481 }
482
483 void Life::Update() {
484
485 PSContext2DGetBuffer(ps_context_);
486 if (NULL == ps_context_->data)
487 return;
488
489 // If we somehow have not allocated these pointers yet, skip this frame.
490 if (!cell_in_ || !cell_out_) return;
491
492 // Simulate one (or more if benchmarking) frames
493 do {
494 Simulate();
495 if (!benchmarking_)
496 break;
497 --benchmark_frame_counter_;
498 } while(benchmark_frame_counter_ > 0);
499 if (benchmarking_)
500 EndBenchmark();
501
502 PSContext2DSwapBuffer(ps_context_);
503 }
504
505 // Starting point for the module. We do not use main since it would
506 // collide with main in libppapi_cpp.
507 int example_main(int argc, char* argv[]) {
508 Life life;
509 while (true) {
510 PSEvent* ps_event;
511 // Consume all available events
512 while ((ps_event = PSEventTryAcquire()) != NULL) {
513 life.HandleEvent(ps_event);
514 PSEventRelease(ps_event);
515 }
516 // Do simulation, render and present.
517 life.Update();
518 }
519 return 0;
520 }
521
522 // Register the function to call once the Instance Object is initialized.
523 // see: pappi_simple/ps_main.h
524 PPAPI_SIMPLE_REGISTER_MAIN(example_main);
OLDNEW
« no previous file with comments | « native_client_sdk/src/examples/demo/life_simd/life.c ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698