Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(9)

Side by Side Diff: native_client_sdk/src/examples/demo/life_simd/life.c

Issue 289023002: Initial SIMD demos life and earth for PNaCl. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: title Created 6 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 /* Copyright (c) 2013 The Chromium Authors. All rights reserved. 1 /* Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 * Use of this source code is governed by a BSD-style license that can be 2 * Use of this source code is governed by a BSD-style license that can be
3 * found in the LICENSE file. 3 * found in the LICENSE file.
4 */ 4 */
5 5
6 #include <assert.h> 6 #include <assert.h>
7 #include <math.h> 7 #include <math.h>
8 #include <stdio.h> 8 #include <stdio.h>
9 #include <stdlib.h> 9 #include <stdlib.h>
10 #include <string.h> 10 #include <string.h>
(...skipping 20 matching lines...) Expand all
31 PPB_KeyboardInputEvent* g_pKeyboardInput; 31 PPB_KeyboardInputEvent* g_pKeyboardInput;
32 PPB_MouseInputEvent* g_pMouseInput; 32 PPB_MouseInputEvent* g_pMouseInput;
33 PPB_TouchInputEvent* g_pTouchInput; 33 PPB_TouchInputEvent* g_pTouchInput;
34 34
35 struct { 35 struct {
36 PP_Resource ctx; 36 PP_Resource ctx;
37 struct PP_Size size; 37 struct PP_Size size;
38 int bound; 38 int bound;
39 uint8_t* cell_in; 39 uint8_t* cell_in;
40 uint8_t* cell_out; 40 uint8_t* cell_out;
41 int32_t cell_stride;
41 } g_Context; 42 } g_Context;
42 43
43 44
44 const unsigned int kInitialRandSeed = 0xC0DE533D; 45 const unsigned int kInitialRandSeed = 0xC0DE533D;
46 const int kCellAlignment = 0x10;
47
48 #define INLINE inline __attribute__((always_inline))
45 49
46 /* BGRA helper macro, for constructing a pixel for a BGRA buffer. */ 50 /* BGRA helper macro, for constructing a pixel for a BGRA buffer. */
47 #define MakeBGRA(b, g, r, a) \ 51 #define MakeBGRA(b, g, r, a) \
48 (((a) << 24) | ((r) << 16) | ((g) << 8) | (b)) 52 (((a) << 24) | ((r) << 16) | ((g) << 8) | (b))
49 53
54 /* 128 bit vector types */
55 typedef uint8_t u8x16_t __attribute__ ((vector_size (16)));
56
57 /* Helper function to broadcast x across 16 element vector. */
58 INLINE u8x16_t broadcast(uint8_t x) {
59 u8x16_t r = {x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x};
60 return r;
61 }
62
50 63
51 /* 64 /*
52 * Given a count of cells in a 3x3 grid where cells are worth 1 except for 65 * Convert a count value into a live (green) or dead color value.
53 * the center which is worth 9, this is a color representation of how
54 * "alive" that cell is making for a more interesting representation than
55 * a binary alive or dead.
56 */ 66 */
57 const uint32_t kNeighborColors[] = { 67 const uint32_t kNeighborColors[] = {
58 MakeBGRA(0x00, 0x00, 0x00, 0xff), 68 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
59 MakeBGRA(0x00, 0x40, 0x00, 0xff), 69 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
60 MakeBGRA(0x00, 0x60, 0x00, 0xff), 70 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
61 MakeBGRA(0x00, 0x80, 0x00, 0xff), 71 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
62 MakeBGRA(0x00, 0xA0, 0x00, 0xff), 72 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
63 MakeBGRA(0x00, 0xC0, 0x00, 0xff), 73 MakeBGRA(0x00, 0xFF, 0x00, 0xFF),
64 MakeBGRA(0x00, 0xE0, 0x00, 0xff), 74 MakeBGRA(0x00, 0xFF, 0x00, 0xFF),
65 MakeBGRA(0x00, 0x00, 0x00, 0xff), 75 MakeBGRA(0x00, 0xFF, 0x00, 0xFF),
66 MakeBGRA(0x00, 0x40, 0x00, 0xff), 76 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
67 MakeBGRA(0x00, 0x60, 0x00, 0xff), 77 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
68 MakeBGRA(0x00, 0x80, 0x00, 0xff), 78 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
69 MakeBGRA(0x00, 0xA0, 0x00, 0xff), 79 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
70 MakeBGRA(0x00, 0xC0, 0x00, 0xff), 80 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
71 MakeBGRA(0x00, 0xE0, 0x00, 0xff), 81 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
72 MakeBGRA(0x00, 0xFF, 0x00, 0xff), 82 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
73 MakeBGRA(0x00, 0xFF, 0x00, 0xff), 83 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
74 MakeBGRA(0x00, 0xFF, 0x00, 0xff), 84 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
75 MakeBGRA(0x00, 0xFF, 0x00, 0xff), 85 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
76 }; 86 };
77 87
78 /* 88 /*
79 * These represent the new health value of a cell based on its neighboring 89 * These represent the new health value of a cell based on its neighboring
80 * values. The health is binary: either alive or dead. 90 * values. The health is binary: either alive or dead.
81 */ 91 */
82 const uint8_t kIsAlive[] = { 92 const uint8_t kIsAlive[] = {
83 0, 0, 0, 1, 0, 0, 0, 0, 0, /* Values if the center cell is dead. */ 93 0, 0, 0, 0, 0, 1, 1, 1, 0,
84 0, 0, 1, 1, 0, 0, 0, 0, 0 /* Values if the center cell is alive. */ 94 0, 0, 0, 0, 0, 0, 0, 0, 0
85 }; 95 };
86 96
87 void UpdateContext(uint32_t width, uint32_t height) { 97 void UpdateContext(uint32_t width, uint32_t height) {
98 int stride = (width + kCellAlignment - 1) & ~kCellAlignment;
88 if (width != g_Context.size.width || height != g_Context.size.height) { 99 if (width != g_Context.size.width || height != g_Context.size.height) {
89 size_t size = width * height; 100
101 size_t size = stride * height;
90 size_t index; 102 size_t index;
91 103
92 free(g_Context.cell_in); 104 free(g_Context.cell_in);
93 free(g_Context.cell_out); 105 free(g_Context.cell_out);
94 106
95 /* Create a new context */ 107 /* Create a new context */
96 g_Context.cell_in = (uint8_t*) malloc(size); 108 void* in_buffer = NULL;
97 g_Context.cell_out = (uint8_t*) malloc(size); 109 void* out_buffer = NULL;
110 /* alloc buffers aligned on 16 bytes */
111 posix_memalign(&in_buffer, kCellAlignment, size);
112 posix_memalign(&out_buffer, kCellAlignment, size);
113 g_Context.cell_in = (uint8_t*) in_buffer;
114 g_Context.cell_out = (uint8_t*) out_buffer;
98 115
99 memset(g_Context.cell_out, 0, size); 116 memset(g_Context.cell_out, 0, size);
100 for (index = 0; index < size; index++) { 117 for (index = 0; index < size; index++) {
101 g_Context.cell_in[index] = rand() & 1; 118 g_Context.cell_in[index] = rand() & 1;
102 } 119 }
103 } 120 }
104 121
105 /* Recreate the graphics context on a view change */ 122 /* Recreate the graphics context on a view change */
106 g_pCore->ReleaseResource(g_Context.ctx); 123 g_pCore->ReleaseResource(g_Context.ctx);
107 g_Context.size.width = width; 124 g_Context.size.width = width;
108 g_Context.size.height = height; 125 g_Context.size.height = height;
126 g_Context.cell_stride = stride;
109 g_Context.ctx = 127 g_Context.ctx =
110 g_pGraphics2D->Create(PSGetInstanceId(), &g_Context.size, PP_TRUE); 128 g_pGraphics2D->Create(PSGetInstanceId(), &g_Context.size, PP_TRUE);
111 g_Context.bound = 129 g_Context.bound =
112 g_pInstance->BindGraphics(PSGetInstanceId(), g_Context.ctx); 130 g_pInstance->BindGraphics(PSGetInstanceId(), g_Context.ctx);
113 } 131 }
114 132
115 void DrawCell(int32_t x, int32_t y) { 133 void DrawCell(int32_t x, int32_t y) {
116 int32_t width = g_Context.size.width; 134 int32_t width = g_Context.size.width;
117 int32_t height = g_Context.size.height; 135 int32_t height = g_Context.size.height;
136 int32_t stride = g_Context.cell_stride;
118 137
119 if (!g_Context.cell_in) return; 138 if (!g_Context.cell_in) return;
120 139
121 if (x > 0 && x < width - 1 && y > 0 && y < height - 1) { 140 if (x > 0 && x < width - 1 && y > 0 && y < height - 1) {
122 g_Context.cell_in[x - 1 + y * width] = 1; 141 g_Context.cell_in[x - 1 + y * stride] = 1;
123 g_Context.cell_in[x + 1 + y * width] = 1; 142 g_Context.cell_in[x + 1 + y * stride] = 1;
124 g_Context.cell_in[x + (y - 1) * width] = 1; 143 g_Context.cell_in[x + (y - 1) * stride] = 1;
125 g_Context.cell_in[x + (y + 1) * width] = 1; 144 g_Context.cell_in[x + (y + 1) * stride] = 1;
126 } 145 }
127 } 146 }
128 147
129 void ProcessTouchEvent(PSEvent* event) { 148 void ProcessTouchEvent(PSEvent* event) {
130 uint32_t count = g_pTouchInput->GetTouchCount(event->as_resource, 149 uint32_t count = g_pTouchInput->GetTouchCount(event->as_resource,
131 PP_TOUCHLIST_TYPE_TOUCHES); 150 PP_TOUCHLIST_TYPE_TOUCHES);
132 uint32_t i, j; 151 uint32_t i, j;
133 for (i = 0; i < count; i++) { 152 for (i = 0; i < count; i++) {
134 struct PP_TouchPoint touch = g_pTouchInput->GetTouchByIndex( 153 struct PP_TouchPoint touch = g_pTouchInput->GetTouchByIndex(
135 event->as_resource, PP_TOUCHLIST_TYPE_TOUCHES, i); 154 event->as_resource, PP_TOUCHLIST_TYPE_TOUCHES, i);
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after
194 /* case PSE_INSTANCE_HANDLEINPUT */ 213 /* case PSE_INSTANCE_HANDLEINPUT */
195 break; 214 break;
196 } 215 }
197 216
198 default: 217 default:
199 break; 218 break;
200 } 219 }
201 } 220 }
202 221
203 222
204 void Stir(uint32_t width, uint32_t height) { 223 void Stir() {
205 int i; 224 int32_t width = g_Context.size.width;
225 int32_t height = g_Context.size.height;
226 int32_t stride = g_Context.cell_stride;
227 int32_t i;
206 if (g_Context.cell_in == NULL || g_Context.cell_out == NULL) 228 if (g_Context.cell_in == NULL || g_Context.cell_out == NULL)
207 return; 229 return;
208 230
209 for (i = 0; i < width; ++i) { 231 for (i = 0; i < width; ++i) {
210 g_Context.cell_in[i] = rand() & 1; 232 g_Context.cell_in[i] = rand() & 1;
211 g_Context.cell_in[i + (height - 1) * width] = rand() & 1; 233 g_Context.cell_in[i + (height - 1) * stride] = rand() & 1;
212 } 234 }
213 for (i = 0; i < height; ++i) { 235 for (i = 0; i < height; ++i) {
214 g_Context.cell_in[i * width] = rand() & 1; 236 g_Context.cell_in[i * stride] = rand() & 1;
215 g_Context.cell_in[i * width + (width - 1)] = rand() & 1; 237 g_Context.cell_in[i * stride + (width - 1)] = rand() & 1;
216 } 238 }
217 } 239 }
218 240
241
219 void Render() { 242 void Render() {
220 struct PP_Size* psize = &g_Context.size; 243 struct PP_Size* psize = &g_Context.size;
221 PP_ImageDataFormat format = PP_IMAGEDATAFORMAT_BGRA_PREMUL; 244 PP_ImageDataFormat format = PP_IMAGEDATAFORMAT_BGRA_PREMUL;
222 245
223 /* 246 /*
224 * Create a buffer to draw into. Since we are waiting until the next flush 247 * Create a buffer to draw into. Since we are waiting until the next flush
225 * chrome has an opportunity to cache this buffer see ppb_graphics_2d.h. 248 * chrome has an opportunity to cache this buffer see ppb_graphics_2d.h.
226 */ 249 */
227 PP_Resource image = 250 PP_Resource image =
228 g_pImageData->Create(PSGetInstanceId(), format, psize, PP_FALSE); 251 g_pImageData->Create(PSGetInstanceId(), format, psize, PP_FALSE);
229 uint8_t* pixels = g_pImageData->Map(image); 252 uint8_t* pixels = g_pImageData->Map(image);
230 253
231 struct PP_ImageDataDesc desc; 254 struct PP_ImageDataDesc desc;
232 uint8_t* cell_temp; 255 uint8_t* cell_temp;
233 uint32_t x, y; 256 uint32_t x, y;
234 257
235 /* If we somehow have not allocated these pointers yet, skip this frame. */ 258 /* If we somehow have not allocated these pointers yet, skip this frame. */
236 if (!g_Context.cell_in || !g_Context.cell_out) return; 259 if (!g_Context.cell_in || !g_Context.cell_out) return;
237 260
238 /* Get the stride. */ 261 /* Get the pixel stride. */
239 g_pImageData->Describe(image, &desc); 262 g_pImageData->Describe(image, &desc);
240 263
241 /* Stir up the edges to prevent the simulation from reaching steady state. */ 264 /* Stir up the edges to prevent the simulation from reaching steady state. */
242 Stir(desc.size.width, desc.size.height); 265 Stir();
243 266
244 /* Do neighbor summation; apply rules, output pixel color. */ 267 /*
245 for (y = 1; y < desc.size.height - 1; ++y) { 268 * Do neighbor summation; apply rules, output pixel color. Note that a 1 cell
246 uint8_t *src0 = (g_Context.cell_in + (y - 1) * desc.size.width) + 1; 269 * wide perimeter is excluded from the simulation update; only cells from
247 uint8_t *src1 = src0 + desc.size.width; 270 * x = 1 to x < width - 1 and y = 1 to y < height - 1 are updated.
248 uint8_t *src2 = src1 + desc.size.width; 271 */
249 int count; 272
250 uint32_t color; 273 for (y = 1; y < g_Context.size.height - 1; ++y) {
251 uint8_t *dst = (g_Context.cell_out + y * desc.size.width) + 1; 274 uint8_t *src0 = (g_Context.cell_in + (y - 1) * g_Context.cell_stride);
275 uint8_t *src1 = src0 + g_Context.cell_stride;
276 uint8_t *src2 = src1 + g_Context.cell_stride;
277 uint8_t *dst = (g_Context.cell_out + y * g_Context.cell_stride) + 1;
252 uint32_t *pixel_line = (uint32_t*) (pixels + y * desc.stride); 278 uint32_t *pixel_line = (uint32_t*) (pixels + y * desc.stride);
279 const u8x16_t kOne = broadcast(1);
280 const u8x16_t kFour = broadcast(4);
281 const u8x16_t kEight = broadcast(8);
282 const u8x16_t kZero255 = {0, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
253 283
254 for (x = 1; x < (desc.size.width - 1); ++x) { 284 /* Prime the src */
255 /* Build sum, weight center by 9x. */ 285 u8x16_t src00 = *(u8x16_t*)&src0[0];
256 count = src0[-1] + src0[0] + src0[1] + 286 u8x16_t src01 = *(u8x16_t*)&src0[16];
257 src1[-1] + src1[0] * 9 + src1[1] + 287 u8x16_t src10 = *(u8x16_t*)&src1[0];
258 src2[-1] + src2[0] + src2[1]; 288 u8x16_t src11 = *(u8x16_t*)&src1[16];
259 color = kNeighborColors[count]; 289 u8x16_t src20 = *(u8x16_t*)&src2[0];
290 u8x16_t src21 = *(u8x16_t*)&src2[16];
260 291
292 /* This inner loop is SIMD - each loop iteration will process 16 cells. */
293 for (x = 1; (x + 15) < (g_Context.size.width - 1); x += 16) {
294
295 /*
296 * Construct jittered source temps, using __builtin_shufflevector(..) to
297 * extract a shifted 16 element vector from the 32 element concatenation
298 * of two source vectors.
299 */
300 u8x16_t src0j0 = src00;
301 u8x16_t src0j1 = __builtin_shufflevector(src00, src01,
302 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
303 u8x16_t src0j2 = __builtin_shufflevector(src00, src01,
304 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
305 u8x16_t src1j0 = src10;
306 u8x16_t src1j1 = __builtin_shufflevector(src10, src11,
307 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
308 u8x16_t src1j2 = __builtin_shufflevector(src10, src11,
309 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
310 u8x16_t src2j0 = src20;
311 u8x16_t src2j1 = __builtin_shufflevector(src20, src21,
312 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
313 u8x16_t src2j2 = __builtin_shufflevector(src20, src21,
314 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
315
316 /* Sum the jittered sources to construct neighbor count. */
317 u8x16_t count = src0j0 + src0j1 + src0j2 +
318 src1j0 + + src1j2 +
319 src2j0 + src2j1 + src2j2;
320 /* Add the center cell. */
321 count = count + count + src1j1;
322 /* If count > 4 and < 8, center cell will be alive in the next frame. */
323 u8x16_t alive1 = count > kFour;
324 u8x16_t alive2 = count < kEight;
325 /* Intersect the two comparisons from above. */
326 u8x16_t alive = alive1 & alive2;
327
328 /*
329 * At this point, alive[x] will be one of two values:
330 * 0x00 for a dead cell
331 * 0xFF for an alive cell.
332 *
333 * Next, convert alive cells to green pixel color.
334 * Use __builtin_shufflevector(..) to construct output pixels from
335 * concantination of alive vector and kZero255 const vector.
336 * Indices 0..15 select the 16 cells from alive vector.
337 * Index 16 is zero constant from kZero255 constant vector.
338 * Index 17 is 255 constant from kZero255 constant vector.
339 * Output pixel color values are in BGRABGRABGRABGRA order.
340 * Since each pixel needs 4 bytes of color information, 16 cells will
341 * need to expand to 4 seperate 16 byte pixel splats.
342 */
343 u8x16_t pixel0_3 = __builtin_shufflevector(alive, kZero255,
344 16, 0, 16, 17, 16, 1, 16, 17, 16, 2, 16, 17, 16, 3, 16, 17);
345 u8x16_t pixel4_7 = __builtin_shufflevector(alive, kZero255,
346 16, 4, 16, 17, 16, 5, 16, 17, 16, 6, 16, 17, 16, 7, 16, 17);
347 u8x16_t pixel8_11 = __builtin_shufflevector(alive, kZero255,
348 16, 8, 16, 17, 16, 9, 16, 17, 16, 10, 16, 17, 16, 11, 16, 17);
349 u8x16_t pixel12_15 = __builtin_shufflevector(alive, kZero255,
350 16, 12, 16, 17, 16, 13, 16, 17, 16, 14, 16, 17, 16, 15, 16, 17);
351
352 /* Write 16 pixels to output pixel buffer. */
353 *(u8x16_t*)(pixel_line + 0) = pixel0_3;
354 *(u8x16_t*)(pixel_line + 4) = pixel4_7;
355 *(u8x16_t*)(pixel_line + 8) = pixel8_11;
356 *(u8x16_t*)(pixel_line + 12) = pixel12_15;
357
358 /* Convert alive mask to 1 or 0 and store in destination cell array. */
359 *(u8x16_t*)dst = alive & kOne;
360
361 /* Increment pointers. */
362 pixel_line += 16;
363 dst += 16;
364 src0 += 16;
365 src1 += 16;
366 src2 += 16;
367
368 /* Shift source over by 16 cells and read the next 16 cells. */
369 src00 = src01;
370 src01 = *(u8x16_t*)&src0[16];
371 src10 = src11;
372 src11 = *(u8x16_t*)&src1[16];
373 src20 = src21;
374 src21 = *(u8x16_t*)&src2[16];
375 }
376
377 /*
378 * The SIMD loop above does 16 cells at a time. The loop below is the
379 * regular version which processes one cell at a time. It is used to
380 * finish the remainder of the scanline not handled by the SIMD loop.
381 */
382 for (; x < (g_Context.size.width - 1); ++x) {
383 /* Sum the jittered sources to construct neighbor count. */
384 int count = src0[0] + src0[1] + src0[2] +
385 src1[0] + + src1[2] +
386 src2[0] + src2[1] + src2[2];
387 /* Add the center cell. */
388 count = count + count + src1[1];
389 /* Use table lookup indexed by count to determine pixel & alive state. */
390 uint32_t color = kNeighborColors[count];
261 *pixel_line++ = color; 391 *pixel_line++ = color;
262 *dst++ = kIsAlive[count]; 392 *dst++ = kIsAlive[count];
263 ++src0; 393 ++src0;
264 ++src1; 394 ++src1;
265 ++src2; 395 ++src2;
266 } 396 }
267 } 397 }
268 398
269 cell_temp = g_Context.cell_in; 399 cell_temp = g_Context.cell_in;
270 g_Context.cell_in = g_Context.cell_out; 400 g_Context.cell_in = g_Context.cell_out;
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after
318 } 448 }
319 } 449 }
320 return 0; 450 return 0;
321 } 451 }
322 452
323 /* 453 /*
324 * Register the function to call once the Instance Object is initialized. 454 * Register the function to call once the Instance Object is initialized.
325 * see: pappi_simple/ps_main.h 455 * see: pappi_simple/ps_main.h
326 */ 456 */
327 PPAPI_SIMPLE_REGISTER_MAIN(example_main); 457 PPAPI_SIMPLE_REGISTER_MAIN(example_main);
OLDNEW
« no previous file with comments | « native_client_sdk/src/examples/demo/life_simd/index.html ('k') | native_client_sdk/src/examples/demo/voronoi/example.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698