Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(38)

Side by Side Diff: native_client_sdk/src/examples/demo/life_simd/life.c

Issue 289023002: Initial SIMD demos life and earth for PNaCl. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Created 6 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 /* Copyright (c) 2013 The Chromium Authors. All rights reserved. 1 /* Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 * Use of this source code is governed by a BSD-style license that can be 2 * Use of this source code is governed by a BSD-style license that can be
3 * found in the LICENSE file. 3 * found in the LICENSE file.
4 */ 4 */
5 5
6 #include <assert.h> 6 #include <assert.h>
7 #include <math.h> 7 #include <math.h>
8 #include <stdio.h> 8 #include <stdio.h>
9 #include <stdlib.h> 9 #include <stdlib.h>
10 #include <string.h> 10 #include <string.h>
(...skipping 25 matching lines...) Expand all
36 PP_Resource ctx; 36 PP_Resource ctx;
37 struct PP_Size size; 37 struct PP_Size size;
38 int bound; 38 int bound;
39 uint8_t* cell_in; 39 uint8_t* cell_in;
40 uint8_t* cell_out; 40 uint8_t* cell_out;
41 } g_Context; 41 } g_Context;
42 42
43 43
44 const unsigned int kInitialRandSeed = 0xC0DE533D; 44 const unsigned int kInitialRandSeed = 0xC0DE533D;
45 45
46 #define INLINE inline __attribute__((always_inline))
47
46 /* BGRA helper macro, for constructing a pixel for a BGRA buffer. */ 48 /* BGRA helper macro, for constructing a pixel for a BGRA buffer. */
47 #define MakeBGRA(b, g, r, a) \ 49 #define MakeBGRA(b, g, r, a) \
48 (((a) << 24) | ((r) << 16) | ((g) << 8) | (b)) 50 (((a) << 24) | ((r) << 16) | ((g) << 8) | (b))
49 51
52 /* 128 bit vector types */
53 typedef uint8_t u8x16_t __attribute__ ((vector_size (16)));
54
55 /* Helper function to broadcast x across 16 element vector. */
56 INLINE u8x16_t broadcast(uint8_t x) {
57 u8x16_t r = {x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x};
58 return r;
59 }
60
50 61
51 /* 62 /*
52 * Given a count of cells in a 3x3 grid where cells are worth 1 except for 63 * Convert a count value into a live (green) or dead color value.
53 * the center which is worth 9, this is a color representation of how
54 * "alive" that cell is making for a more interesting representation than
55 * a binary alive or dead.
56 */ 64 */
57 const uint32_t kNeighborColors[] = { 65 const uint32_t kNeighborColors[] = {
58 MakeBGRA(0x00, 0x00, 0x00, 0xff), 66 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
59 MakeBGRA(0x00, 0x40, 0x00, 0xff), 67 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
60 MakeBGRA(0x00, 0x60, 0x00, 0xff), 68 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
61 MakeBGRA(0x00, 0x80, 0x00, 0xff), 69 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
62 MakeBGRA(0x00, 0xA0, 0x00, 0xff), 70 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
63 MakeBGRA(0x00, 0xC0, 0x00, 0xff), 71 MakeBGRA(0x00, 0xFF, 0x00, 0xFF),
64 MakeBGRA(0x00, 0xE0, 0x00, 0xff), 72 MakeBGRA(0x00, 0xFF, 0x00, 0xFF),
65 MakeBGRA(0x00, 0x00, 0x00, 0xff), 73 MakeBGRA(0x00, 0xFF, 0x00, 0xFF),
66 MakeBGRA(0x00, 0x40, 0x00, 0xff), 74 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
67 MakeBGRA(0x00, 0x60, 0x00, 0xff), 75 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
68 MakeBGRA(0x00, 0x80, 0x00, 0xff), 76 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
69 MakeBGRA(0x00, 0xA0, 0x00, 0xff), 77 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
70 MakeBGRA(0x00, 0xC0, 0x00, 0xff), 78 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
71 MakeBGRA(0x00, 0xE0, 0x00, 0xff), 79 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
72 MakeBGRA(0x00, 0xFF, 0x00, 0xff), 80 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
73 MakeBGRA(0x00, 0xFF, 0x00, 0xff), 81 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
74 MakeBGRA(0x00, 0xFF, 0x00, 0xff), 82 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
75 MakeBGRA(0x00, 0xFF, 0x00, 0xff), 83 MakeBGRA(0x00, 0x00, 0x00, 0xFF),
76 }; 84 };
77 85
78 /* 86 /*
79 * These represent the new health value of a cell based on its neighboring 87 * These represent the new health value of a cell based on its neighboring
80 * values. The health is binary: either alive or dead. 88 * values. The health is binary: either alive or dead.
81 */ 89 */
82 const uint8_t kIsAlive[] = { 90 const uint8_t kIsAlive[] = {
83 0, 0, 0, 1, 0, 0, 0, 0, 0, /* Values if the center cell is dead. */ 91 0, 0, 0, 0, 0, 1, 1, 1, 0,
84 0, 0, 1, 1, 0, 0, 0, 0, 0 /* Values if the center cell is alive. */ 92 0, 0, 0, 0, 0, 0, 0, 0, 0
85 }; 93 };
86 94
87 void UpdateContext(uint32_t width, uint32_t height) { 95 void UpdateContext(uint32_t width, uint32_t height) {
88 if (width != g_Context.size.width || height != g_Context.size.height) { 96 if (width != g_Context.size.width || height != g_Context.size.height) {
89 size_t size = width * height; 97 size_t size = width * height;
90 size_t index; 98 size_t index;
91 99
92 free(g_Context.cell_in); 100 free(g_Context.cell_in);
93 free(g_Context.cell_out); 101 free(g_Context.cell_out);
94 102
(...skipping 114 matching lines...) Expand 10 before | Expand all | Expand 10 after
209 for (i = 0; i < width; ++i) { 217 for (i = 0; i < width; ++i) {
210 g_Context.cell_in[i] = rand() & 1; 218 g_Context.cell_in[i] = rand() & 1;
211 g_Context.cell_in[i + (height - 1) * width] = rand() & 1; 219 g_Context.cell_in[i + (height - 1) * width] = rand() & 1;
212 } 220 }
213 for (i = 0; i < height; ++i) { 221 for (i = 0; i < height; ++i) {
214 g_Context.cell_in[i * width] = rand() & 1; 222 g_Context.cell_in[i * width] = rand() & 1;
215 g_Context.cell_in[i * width + (width - 1)] = rand() & 1; 223 g_Context.cell_in[i * width + (width - 1)] = rand() & 1;
216 } 224 }
217 } 225 }
218 226
219 void Render() { 227 #define NOINLINE __attribute__((noinline))
binji 2014/05/15 18:58:37 needed?
nfullagar 2014/05/15 21:25:13 Done.
228
229 NOINLINE void Render() {
220 struct PP_Size* psize = &g_Context.size; 230 struct PP_Size* psize = &g_Context.size;
221 PP_ImageDataFormat format = PP_IMAGEDATAFORMAT_BGRA_PREMUL; 231 PP_ImageDataFormat format = PP_IMAGEDATAFORMAT_BGRA_PREMUL;
222 232
223 /* 233 /*
224 * Create a buffer to draw into. Since we are waiting until the next flush 234 * Create a buffer to draw into. Since we are waiting until the next flush
225 * chrome has an opportunity to cache this buffer see ppb_graphics_2d.h. 235 * chrome has an opportunity to cache this buffer see ppb_graphics_2d.h.
226 */ 236 */
227 PP_Resource image = 237 PP_Resource image =
228 g_pImageData->Create(PSGetInstanceId(), format, psize, PP_FALSE); 238 g_pImageData->Create(PSGetInstanceId(), format, psize, PP_FALSE);
229 uint8_t* pixels = g_pImageData->Map(image); 239 uint8_t* pixels = g_pImageData->Map(image);
230 240
231 struct PP_ImageDataDesc desc; 241 struct PP_ImageDataDesc desc;
232 uint8_t* cell_temp; 242 uint8_t* cell_temp;
233 uint32_t x, y; 243 uint32_t x, y;
234 244
235 /* If we somehow have not allocated these pointers yet, skip this frame. */ 245 /* If we somehow have not allocated these pointers yet, skip this frame. */
236 if (!g_Context.cell_in || !g_Context.cell_out) return; 246 if (!g_Context.cell_in || !g_Context.cell_out) return;
237 247
238 /* Get the stride. */ 248 /* Get the stride. */
239 g_pImageData->Describe(image, &desc); 249 g_pImageData->Describe(image, &desc);
240 250
241 /* Stir up the edges to prevent the simulation from reaching steady state. */ 251 /* Stir up the edges to prevent the simulation from reaching steady state. */
242 Stir(desc.size.width, desc.size.height); 252 Stir(desc.size.width, desc.size.height);
243 253
244 /* Do neighbor summation; apply rules, output pixel color. */ 254 /*
255 * Do neighbor summation; apply rules, output pixel color. Note that a 1 cell
256 * wide perimeter is excluded from the simulation update; only cells from
257 * x = 1 to x < width - 1 and y = 1 to y < height - 1 are updated.
258 */
259
245 for (y = 1; y < desc.size.height - 1; ++y) { 260 for (y = 1; y < desc.size.height - 1; ++y) {
246 uint8_t *src0 = (g_Context.cell_in + (y - 1) * desc.size.width) + 1; 261 uint8_t *src0 = (g_Context.cell_in + (y - 1) * desc.size.width);
247 uint8_t *src1 = src0 + desc.size.width; 262 uint8_t *src1 = src0 + desc.size.width;
248 uint8_t *src2 = src1 + desc.size.width; 263 uint8_t *src2 = src1 + desc.size.width;
249 int count;
250 uint32_t color;
251 uint8_t *dst = (g_Context.cell_out + y * desc.size.width) + 1; 264 uint8_t *dst = (g_Context.cell_out + y * desc.size.width) + 1;
252 uint32_t *pixel_line = (uint32_t*) (pixels + y * desc.stride); 265 uint32_t *pixel_line = (uint32_t*) (pixels + y * desc.stride);
266 const u8x16_t kOne = broadcast(1);
267 const u8x16_t kFour = broadcast(4);
268 const u8x16_t kEight = broadcast(8);
269 const u8x16_t kZero255 = {0, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
253 270
254 for (x = 1; x < (desc.size.width - 1); ++x) { 271 /* Prime the src */
255 /* Build sum, weight center by 9x. */ 272 u8x16_t src00 = *(u8x16_t*)&src0[0];
256 count = src0[-1] + src0[0] + src0[1] + 273 u8x16_t src01 = *(u8x16_t*)&src0[16];
257 src1[-1] + src1[0] * 9 + src1[1] + 274 u8x16_t src10 = *(u8x16_t*)&src1[0];
258 src2[-1] + src2[0] + src2[1]; 275 u8x16_t src11 = *(u8x16_t*)&src1[16];
259 color = kNeighborColors[count]; 276 u8x16_t src20 = *(u8x16_t*)&src2[0];
277 u8x16_t src21 = *(u8x16_t*)&src2[16];
260 278
279 /* This inner loop is SIMD - each loop iteration will process 16 cells. */
280 for (x = 1; (x + 15) < (desc.size.width - 1); x += 16) {
281
282 /*
283 * Construct jittered source temps, using __builtin_shufflevector(..) to
284 * extract a shifted 16 element vector from the 32 element concatination
binji 2014/05/15 18:58:37 sp: concatenation
nfullagar 2014/05/15 21:25:13 Done.
285 * of two source vectors.
286 */
287 u8x16_t src0j0 = src00;
288 u8x16_t src0j1 = __builtin_shufflevector(src00, src01,
289 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
290 u8x16_t src0j2 = __builtin_shufflevector(src00, src01,
291 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
292 u8x16_t src1j0 = src10;
293 u8x16_t src1j1 = __builtin_shufflevector(src10, src11,
294 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
295 u8x16_t src1j2 = __builtin_shufflevector(src10, src11,
296 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
297 u8x16_t src2j0 = src20;
298 u8x16_t src2j1 = __builtin_shufflevector(src20, src21,
299 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
300 u8x16_t src2j2 = __builtin_shufflevector(src20, src21,
301 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
302
303 /* Sum the jittered sources to construct neighbor count. */
304 u8x16_t count = src0j0 + src0j1 + src0j2 +
305 src1j0 + + src1j2 +
306 src2j0 + src2j1 + src2j2;
307 /* Add the center cell. */
308 count = count + count + src1j1;
309 /* If count > 4 and < 8, center cell will be alive in the next frame. */
310 u8x16_t alive1 = count > kFour;
311 u8x16_t alive2 = count < kEight;
312 /* Intersect the two comparisons from above. */
313 u8x16_t alive = alive1 & alive2;
314
315 /*
316 * At this point, alive[x] will be one of two values:
317 * 0x00 for a dead cell
318 * 0xFF for an alive cell.
319 *
320 * Next, convert alive cells to green pixel color.
321 * Use __builtin_shufflevector(..) to construct output pixels from
322 * concantination of alive vector and kZero255 const vector.
323 * Indices 0..15 select the 16 cells from alive vector.
324 * Index 16 is zero constant from kZero255 constant vector.
325 * Index 17 is 255 constant from kZero255 constant vector.
326 * Output pixel color values are in BGRABGRABGRABGRA order.
327 * Since each pixel needs 4 bytes of color information, 16 cells will
328 * need to expand to 4 seperate 16 byte pixel splats.
329 */
330 u8x16_t pixel0_3 = __builtin_shufflevector(alive, kZero255,
331 16, 0, 16, 17, 16, 1, 16, 17, 16, 2, 16, 17, 16, 3, 16, 17);
332 u8x16_t pixel4_7 = __builtin_shufflevector(alive, kZero255,
333 16, 4, 16, 17, 16, 5, 16, 17, 16, 6, 16, 17, 16, 7, 16, 17);
334 u8x16_t pixel8_11 = __builtin_shufflevector(alive, kZero255,
335 16, 8, 16, 17, 16, 9, 16, 17, 16, 10, 16, 17, 16, 11, 16, 17);
336 u8x16_t pixel12_15 = __builtin_shufflevector(alive, kZero255,
337 16, 12, 16, 17, 16, 13, 16, 17, 16, 14, 16, 17, 16, 15, 16, 17);
338
339 /* Write 16 pixels to output pixel buffer. */
340 *(u8x16_t*)(pixel_line + 0) = pixel0_3;
341 *(u8x16_t*)(pixel_line + 4) = pixel4_7;
342 *(u8x16_t*)(pixel_line + 8) = pixel8_11;
343 *(u8x16_t*)(pixel_line + 12) = pixel12_15;
344
345 /* Convert alive mask to 1 or 0 and store in destination cell array. */
346 *(u8x16_t*)dst = alive & kOne;
347
348 /* Increment pointers. */
349 pixel_line += 16;
350 dst += 16;
351 src0 += 16;
352 src1 += 16;
353 src2 += 16;
354
355 /* Shift source over by 16 cells and read the next 16 cells. */
356 src00 = src01;
357 src01 = *(u8x16_t*)&src0[16];
358 src10 = src11;
359 src11 = *(u8x16_t*)&src1[16];
360 src20 = src21;
361 src21 = *(u8x16_t*)&src2[16];
362 }
363
364 /*
365 * The SIMD loop above does 16 cells at a time. The loop below is the
366 * regular version which processes one cell at a time. It is used to
367 * finish the remainder of the scanline not handled by the SIMD loop.
368 */
369 for (; x < (desc.size.width - 1); ++x) {
370 /* Sum the jittered sources to construct neighbor count. */
371 int count = src0[0] + src0[1] + src0[2] +
372 src1[0] + + src1[2] +
373 src2[0] + src2[1] + src2[2];
374 /* Add the center cell. */
375 count = count + count + src1[1];
376 /* Use table lookup indexed by count to determine pixel & alive state. */
377 uint32_t color = kNeighborColors[count];
261 *pixel_line++ = color; 378 *pixel_line++ = color;
262 *dst++ = kIsAlive[count]; 379 *dst++ = kIsAlive[count];
263 ++src0; 380 ++src0;
264 ++src1; 381 ++src1;
265 ++src2; 382 ++src2;
266 } 383 }
267 } 384 }
268 385
269 cell_temp = g_Context.cell_in; 386 cell_temp = g_Context.cell_in;
270 g_Context.cell_in = g_Context.cell_out; 387 g_Context.cell_in = g_Context.cell_out;
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after
318 } 435 }
319 } 436 }
320 return 0; 437 return 0;
321 } 438 }
322 439
323 /* 440 /*
324 * Register the function to call once the Instance Object is initialized. 441 * Register the function to call once the Instance Object is initialized.
325 * see: pappi_simple/ps_main.h 442 * see: pappi_simple/ps_main.h
326 */ 443 */
327 PPAPI_SIMPLE_REGISTER_MAIN(example_main); 444 PPAPI_SIMPLE_REGISTER_MAIN(example_main);
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698