Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(3)

Unified Diff: native_client_sdk/src/examples/demo/life_simd/life.c

Issue 289023002: Initial SIMD demos life and earth for PNaCl. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Created 6 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: native_client_sdk/src/examples/demo/life_simd/life.c
diff --git a/native_client_sdk/src/examples/demo/life/life.c b/native_client_sdk/src/examples/demo/life_simd/life.c
similarity index 57%
copy from native_client_sdk/src/examples/demo/life/life.c
copy to native_client_sdk/src/examples/demo/life_simd/life.c
index d9d9def3fbccf998fb2623cdab182860b2970cc3..c66fab1a650c9c4df36e53982f0ee62159491368 100644
--- a/native_client_sdk/src/examples/demo/life/life.c
+++ b/native_client_sdk/src/examples/demo/life_simd/life.c
@@ -43,36 +43,44 @@ struct {
const unsigned int kInitialRandSeed = 0xC0DE533D;
+#define INLINE inline __attribute__((always_inline))
+
/* BGRA helper macro, for constructing a pixel for a BGRA buffer. */
#define MakeBGRA(b, g, r, a) \
(((a) << 24) | ((r) << 16) | ((g) << 8) | (b))
+/* 128 bit vector types */
+typedef uint8_t u8x16_t __attribute__ ((vector_size (16)));
+
+/* Helper function to broadcast x across 16 element vector. */
+INLINE u8x16_t broadcast(uint8_t x) {
+ u8x16_t r = {x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x};
+ return r;
+}
+
/*
- * Given a count of cells in a 3x3 grid where cells are worth 1 except for
- * the center which is worth 9, this is a color representation of how
- * "alive" that cell is making for a more interesting representation than
- * a binary alive or dead.
+ * Convert a count value into a live (green) or dead color value.
*/
const uint32_t kNeighborColors[] = {
- MakeBGRA(0x00, 0x00, 0x00, 0xff),
- MakeBGRA(0x00, 0x40, 0x00, 0xff),
- MakeBGRA(0x00, 0x60, 0x00, 0xff),
- MakeBGRA(0x00, 0x80, 0x00, 0xff),
- MakeBGRA(0x00, 0xA0, 0x00, 0xff),
- MakeBGRA(0x00, 0xC0, 0x00, 0xff),
- MakeBGRA(0x00, 0xE0, 0x00, 0xff),
- MakeBGRA(0x00, 0x00, 0x00, 0xff),
- MakeBGRA(0x00, 0x40, 0x00, 0xff),
- MakeBGRA(0x00, 0x60, 0x00, 0xff),
- MakeBGRA(0x00, 0x80, 0x00, 0xff),
- MakeBGRA(0x00, 0xA0, 0x00, 0xff),
- MakeBGRA(0x00, 0xC0, 0x00, 0xff),
- MakeBGRA(0x00, 0xE0, 0x00, 0xff),
- MakeBGRA(0x00, 0xFF, 0x00, 0xff),
- MakeBGRA(0x00, 0xFF, 0x00, 0xff),
- MakeBGRA(0x00, 0xFF, 0x00, 0xff),
- MakeBGRA(0x00, 0xFF, 0x00, 0xff),
+ MakeBGRA(0x00, 0x00, 0x00, 0xFF),
+ MakeBGRA(0x00, 0x00, 0x00, 0xFF),
+ MakeBGRA(0x00, 0x00, 0x00, 0xFF),
+ MakeBGRA(0x00, 0x00, 0x00, 0xFF),
+ MakeBGRA(0x00, 0x00, 0x00, 0xFF),
+ MakeBGRA(0x00, 0xFF, 0x00, 0xFF),
+ MakeBGRA(0x00, 0xFF, 0x00, 0xFF),
+ MakeBGRA(0x00, 0xFF, 0x00, 0xFF),
+ MakeBGRA(0x00, 0x00, 0x00, 0xFF),
+ MakeBGRA(0x00, 0x00, 0x00, 0xFF),
+ MakeBGRA(0x00, 0x00, 0x00, 0xFF),
+ MakeBGRA(0x00, 0x00, 0x00, 0xFF),
+ MakeBGRA(0x00, 0x00, 0x00, 0xFF),
+ MakeBGRA(0x00, 0x00, 0x00, 0xFF),
+ MakeBGRA(0x00, 0x00, 0x00, 0xFF),
+ MakeBGRA(0x00, 0x00, 0x00, 0xFF),
+ MakeBGRA(0x00, 0x00, 0x00, 0xFF),
+ MakeBGRA(0x00, 0x00, 0x00, 0xFF),
};
/*
@@ -80,8 +88,8 @@ const uint32_t kNeighborColors[] = {
* values. The health is binary: either alive or dead.
*/
const uint8_t kIsAlive[] = {
- 0, 0, 0, 1, 0, 0, 0, 0, 0, /* Values if the center cell is dead. */
- 0, 0, 1, 1, 0, 0, 0, 0, 0 /* Values if the center cell is alive. */
+ 0, 0, 0, 0, 0, 1, 1, 1, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0
};
void UpdateContext(uint32_t width, uint32_t height) {
@@ -216,7 +224,9 @@ void Stir(uint32_t width, uint32_t height) {
}
}
-void Render() {
+#define NOINLINE __attribute__((noinline))
binji 2014/05/15 18:58:37 needed?
nfullagar 2014/05/15 21:25:13 Done.
+
+NOINLINE void Render() {
struct PP_Size* psize = &g_Context.size;
PP_ImageDataFormat format = PP_IMAGEDATAFORMAT_BGRA_PREMUL;
@@ -241,23 +251,130 @@ void Render() {
/* Stir up the edges to prevent the simulation from reaching steady state. */
Stir(desc.size.width, desc.size.height);
- /* Do neighbor summation; apply rules, output pixel color. */
+ /*
+ * Do neighbor summation; apply rules, output pixel color. Note that a 1 cell
+ * wide perimeter is excluded from the simulation update; only cells from
+ * x = 1 to x < width - 1 and y = 1 to y < height - 1 are updated.
+ */
+
for (y = 1; y < desc.size.height - 1; ++y) {
- uint8_t *src0 = (g_Context.cell_in + (y - 1) * desc.size.width) + 1;
+ uint8_t *src0 = (g_Context.cell_in + (y - 1) * desc.size.width);
uint8_t *src1 = src0 + desc.size.width;
uint8_t *src2 = src1 + desc.size.width;
- int count;
- uint32_t color;
uint8_t *dst = (g_Context.cell_out + y * desc.size.width) + 1;
uint32_t *pixel_line = (uint32_t*) (pixels + y * desc.stride);
+ const u8x16_t kOne = broadcast(1);
+ const u8x16_t kFour = broadcast(4);
+ const u8x16_t kEight = broadcast(8);
+ const u8x16_t kZero255 = {0, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+ /* Prime the src */
+ u8x16_t src00 = *(u8x16_t*)&src0[0];
+ u8x16_t src01 = *(u8x16_t*)&src0[16];
+ u8x16_t src10 = *(u8x16_t*)&src1[0];
+ u8x16_t src11 = *(u8x16_t*)&src1[16];
+ u8x16_t src20 = *(u8x16_t*)&src2[0];
+ u8x16_t src21 = *(u8x16_t*)&src2[16];
+
+ /* This inner loop is SIMD - each loop iteration will process 16 cells. */
+ for (x = 1; (x + 15) < (desc.size.width - 1); x += 16) {
+
+ /*
+ * Construct jittered source temps, using __builtin_shufflevector(..) to
+ * extract a shifted 16 element vector from the 32 element concatination
binji 2014/05/15 18:58:37 sp: concatenation
nfullagar 2014/05/15 21:25:13 Done.
+ * of two source vectors.
+ */
+ u8x16_t src0j0 = src00;
+ u8x16_t src0j1 = __builtin_shufflevector(src00, src01,
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ u8x16_t src0j2 = __builtin_shufflevector(src00, src01,
+ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
+ u8x16_t src1j0 = src10;
+ u8x16_t src1j1 = __builtin_shufflevector(src10, src11,
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ u8x16_t src1j2 = __builtin_shufflevector(src10, src11,
+ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
+ u8x16_t src2j0 = src20;
+ u8x16_t src2j1 = __builtin_shufflevector(src20, src21,
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ u8x16_t src2j2 = __builtin_shufflevector(src20, src21,
+ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
+
+ /* Sum the jittered sources to construct neighbor count. */
+ u8x16_t count = src0j0 + src0j1 + src0j2 +
+ src1j0 + + src1j2 +
+ src2j0 + src2j1 + src2j2;
+ /* Add the center cell. */
+ count = count + count + src1j1;
+ /* If count > 4 and < 8, center cell will be alive in the next frame. */
+ u8x16_t alive1 = count > kFour;
+ u8x16_t alive2 = count < kEight;
+ /* Intersect the two comparisons from above. */
+ u8x16_t alive = alive1 & alive2;
+
+ /*
+ * At this point, alive[x] will be one of two values:
+ * 0x00 for a dead cell
+ * 0xFF for an alive cell.
+ *
+ * Next, convert alive cells to green pixel color.
+ * Use __builtin_shufflevector(..) to construct output pixels from
+ * concantination of alive vector and kZero255 const vector.
+ * Indices 0..15 select the 16 cells from alive vector.
+ * Index 16 is zero constant from kZero255 constant vector.
+ * Index 17 is 255 constant from kZero255 constant vector.
+ * Output pixel color values are in BGRABGRABGRABGRA order.
+ * Since each pixel needs 4 bytes of color information, 16 cells will
+ * need to expand to 4 seperate 16 byte pixel splats.
+ */
+ u8x16_t pixel0_3 = __builtin_shufflevector(alive, kZero255,
+ 16, 0, 16, 17, 16, 1, 16, 17, 16, 2, 16, 17, 16, 3, 16, 17);
+ u8x16_t pixel4_7 = __builtin_shufflevector(alive, kZero255,
+ 16, 4, 16, 17, 16, 5, 16, 17, 16, 6, 16, 17, 16, 7, 16, 17);
+ u8x16_t pixel8_11 = __builtin_shufflevector(alive, kZero255,
+ 16, 8, 16, 17, 16, 9, 16, 17, 16, 10, 16, 17, 16, 11, 16, 17);
+ u8x16_t pixel12_15 = __builtin_shufflevector(alive, kZero255,
+ 16, 12, 16, 17, 16, 13, 16, 17, 16, 14, 16, 17, 16, 15, 16, 17);
+
+ /* Write 16 pixels to output pixel buffer. */
+ *(u8x16_t*)(pixel_line + 0) = pixel0_3;
+ *(u8x16_t*)(pixel_line + 4) = pixel4_7;
+ *(u8x16_t*)(pixel_line + 8) = pixel8_11;
+ *(u8x16_t*)(pixel_line + 12) = pixel12_15;
+
+ /* Convert alive mask to 1 or 0 and store in destination cell array. */
+ *(u8x16_t*)dst = alive & kOne;
+
+ /* Increment pointers. */
+ pixel_line += 16;
+ dst += 16;
+ src0 += 16;
+ src1 += 16;
+ src2 += 16;
+
+ /* Shift source over by 16 cells and read the next 16 cells. */
+ src00 = src01;
+ src01 = *(u8x16_t*)&src0[16];
+ src10 = src11;
+ src11 = *(u8x16_t*)&src1[16];
+ src20 = src21;
+ src21 = *(u8x16_t*)&src2[16];
+ }
- for (x = 1; x < (desc.size.width - 1); ++x) {
- /* Build sum, weight center by 9x. */
- count = src0[-1] + src0[0] + src0[1] +
- src1[-1] + src1[0] * 9 + src1[1] +
- src2[-1] + src2[0] + src2[1];
- color = kNeighborColors[count];
-
+ /*
+ * The SIMD loop above does 16 cells at a time. The loop below is the
+ * regular version which processes one cell at a time. It is used to
+ * finish the remainder of the scanline not handled by the SIMD loop.
+ */
+ for (; x < (desc.size.width - 1); ++x) {
+ /* Sum the jittered sources to construct neighbor count. */
+ int count = src0[0] + src0[1] + src0[2] +
+ src1[0] + + src1[2] +
+ src2[0] + src2[1] + src2[2];
+ /* Add the center cell. */
+ count = count + count + src1[1];
+ /* Use table lookup indexed by count to determine pixel & alive state. */
+ uint32_t color = kNeighborColors[count];
*pixel_line++ = color;
*dst++ = kIsAlive[count];
++src0;

Powered by Google App Engine
This is Rietveld 408576698