Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 /* Copyright (c) 2013 The Chromium Authors. All rights reserved. | 1 /* Copyright (c) 2013 The Chromium Authors. All rights reserved. |
| 2 * Use of this source code is governed by a BSD-style license that can be | 2 * Use of this source code is governed by a BSD-style license that can be |
| 3 * found in the LICENSE file. | 3 * found in the LICENSE file. |
| 4 */ | 4 */ |
| 5 | 5 |
| 6 #include <assert.h> | 6 #include <assert.h> |
| 7 #include <math.h> | 7 #include <math.h> |
| 8 #include <stdio.h> | 8 #include <stdio.h> |
| 9 #include <stdlib.h> | 9 #include <stdlib.h> |
| 10 #include <string.h> | 10 #include <string.h> |
| (...skipping 25 matching lines...) Expand all Loading... | |
| 36 PP_Resource ctx; | 36 PP_Resource ctx; |
| 37 struct PP_Size size; | 37 struct PP_Size size; |
| 38 int bound; | 38 int bound; |
| 39 uint8_t* cell_in; | 39 uint8_t* cell_in; |
| 40 uint8_t* cell_out; | 40 uint8_t* cell_out; |
| 41 } g_Context; | 41 } g_Context; |
| 42 | 42 |
| 43 | 43 |
| 44 const unsigned int kInitialRandSeed = 0xC0DE533D; | 44 const unsigned int kInitialRandSeed = 0xC0DE533D; |
| 45 | 45 |
| 46 #define INLINE inline __attribute__((always_inline)) | |
| 47 | |
| 46 /* BGRA helper macro, for constructing a pixel for a BGRA buffer. */ | 48 /* BGRA helper macro, for constructing a pixel for a BGRA buffer. */ |
| 47 #define MakeBGRA(b, g, r, a) \ | 49 #define MakeBGRA(b, g, r, a) \ |
| 48 (((a) << 24) | ((r) << 16) | ((g) << 8) | (b)) | 50 (((a) << 24) | ((r) << 16) | ((g) << 8) | (b)) |
| 49 | 51 |
| 52 /* 128 bit vector types */ | |
| 53 typedef uint8_t u8x16_t __attribute__ ((vector_size (16))); | |
| 54 | |
| 55 /* Helper function to broadcast x across 16 element vector. */ | |
| 56 INLINE u8x16_t broadcast(uint8_t x) { | |
| 57 u8x16_t r = {x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x}; | |
| 58 return r; | |
| 59 } | |
| 60 | |
| 50 | 61 |
| 51 /* | 62 /* |
| 52 * Given a count of cells in a 3x3 grid where cells are worth 1 except for | 63 * Convert a count value into a live (green) or dead color value. |
| 53 * the center which is worth 9, this is a color representation of how | |
| 54 * "alive" that cell is making for a more interesting representation than | |
| 55 * a binary alive or dead. | |
| 56 */ | 64 */ |
| 57 const uint32_t kNeighborColors[] = { | 65 const uint32_t kNeighborColors[] = { |
| 58 MakeBGRA(0x00, 0x00, 0x00, 0xff), | 66 MakeBGRA(0x00, 0x00, 0x00, 0xFF), |
| 59 MakeBGRA(0x00, 0x40, 0x00, 0xff), | 67 MakeBGRA(0x00, 0x00, 0x00, 0xFF), |
| 60 MakeBGRA(0x00, 0x60, 0x00, 0xff), | 68 MakeBGRA(0x00, 0x00, 0x00, 0xFF), |
| 61 MakeBGRA(0x00, 0x80, 0x00, 0xff), | 69 MakeBGRA(0x00, 0x00, 0x00, 0xFF), |
| 62 MakeBGRA(0x00, 0xA0, 0x00, 0xff), | 70 MakeBGRA(0x00, 0x00, 0x00, 0xFF), |
| 63 MakeBGRA(0x00, 0xC0, 0x00, 0xff), | 71 MakeBGRA(0x00, 0xFF, 0x00, 0xFF), |
| 64 MakeBGRA(0x00, 0xE0, 0x00, 0xff), | 72 MakeBGRA(0x00, 0xFF, 0x00, 0xFF), |
| 65 MakeBGRA(0x00, 0x00, 0x00, 0xff), | 73 MakeBGRA(0x00, 0xFF, 0x00, 0xFF), |
| 66 MakeBGRA(0x00, 0x40, 0x00, 0xff), | 74 MakeBGRA(0x00, 0x00, 0x00, 0xFF), |
| 67 MakeBGRA(0x00, 0x60, 0x00, 0xff), | 75 MakeBGRA(0x00, 0x00, 0x00, 0xFF), |
| 68 MakeBGRA(0x00, 0x80, 0x00, 0xff), | 76 MakeBGRA(0x00, 0x00, 0x00, 0xFF), |
| 69 MakeBGRA(0x00, 0xA0, 0x00, 0xff), | 77 MakeBGRA(0x00, 0x00, 0x00, 0xFF), |
| 70 MakeBGRA(0x00, 0xC0, 0x00, 0xff), | 78 MakeBGRA(0x00, 0x00, 0x00, 0xFF), |
| 71 MakeBGRA(0x00, 0xE0, 0x00, 0xff), | 79 MakeBGRA(0x00, 0x00, 0x00, 0xFF), |
| 72 MakeBGRA(0x00, 0xFF, 0x00, 0xff), | 80 MakeBGRA(0x00, 0x00, 0x00, 0xFF), |
| 73 MakeBGRA(0x00, 0xFF, 0x00, 0xff), | 81 MakeBGRA(0x00, 0x00, 0x00, 0xFF), |
| 74 MakeBGRA(0x00, 0xFF, 0x00, 0xff), | 82 MakeBGRA(0x00, 0x00, 0x00, 0xFF), |
| 75 MakeBGRA(0x00, 0xFF, 0x00, 0xff), | 83 MakeBGRA(0x00, 0x00, 0x00, 0xFF), |
| 76 }; | 84 }; |
| 77 | 85 |
| 78 /* | 86 /* |
| 79 * These represent the new health value of a cell based on its neighboring | 87 * These represent the new health value of a cell based on its neighboring |
| 80 * values. The health is binary: either alive or dead. | 88 * values. The health is binary: either alive or dead. |
| 81 */ | 89 */ |
| 82 const uint8_t kIsAlive[] = { | 90 const uint8_t kIsAlive[] = { |
| 83 0, 0, 0, 1, 0, 0, 0, 0, 0, /* Values if the center cell is dead. */ | 91 0, 0, 0, 0, 0, 1, 1, 1, 0, |
| 84 0, 0, 1, 1, 0, 0, 0, 0, 0 /* Values if the center cell is alive. */ | 92 0, 0, 0, 0, 0, 0, 0, 0, 0 |
| 85 }; | 93 }; |
| 86 | 94 |
| 87 void UpdateContext(uint32_t width, uint32_t height) { | 95 void UpdateContext(uint32_t width, uint32_t height) { |
| 88 if (width != g_Context.size.width || height != g_Context.size.height) { | 96 if (width != g_Context.size.width || height != g_Context.size.height) { |
| 89 size_t size = width * height; | 97 size_t size = width * height; |
| 90 size_t index; | 98 size_t index; |
| 91 | 99 |
| 92 free(g_Context.cell_in); | 100 free(g_Context.cell_in); |
| 93 free(g_Context.cell_out); | 101 free(g_Context.cell_out); |
| 94 | 102 |
| (...skipping 114 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 209 for (i = 0; i < width; ++i) { | 217 for (i = 0; i < width; ++i) { |
| 210 g_Context.cell_in[i] = rand() & 1; | 218 g_Context.cell_in[i] = rand() & 1; |
| 211 g_Context.cell_in[i + (height - 1) * width] = rand() & 1; | 219 g_Context.cell_in[i + (height - 1) * width] = rand() & 1; |
| 212 } | 220 } |
| 213 for (i = 0; i < height; ++i) { | 221 for (i = 0; i < height; ++i) { |
| 214 g_Context.cell_in[i * width] = rand() & 1; | 222 g_Context.cell_in[i * width] = rand() & 1; |
| 215 g_Context.cell_in[i * width + (width - 1)] = rand() & 1; | 223 g_Context.cell_in[i * width + (width - 1)] = rand() & 1; |
| 216 } | 224 } |
| 217 } | 225 } |
| 218 | 226 |
| 219 void Render() { | 227 #define NOINLINE __attribute__((noinline)) |
|
binji
2014/05/15 18:58:37
needed?
nfullagar
2014/05/15 21:25:13
Done.
| |
| 228 | |
| 229 NOINLINE void Render() { | |
| 220 struct PP_Size* psize = &g_Context.size; | 230 struct PP_Size* psize = &g_Context.size; |
| 221 PP_ImageDataFormat format = PP_IMAGEDATAFORMAT_BGRA_PREMUL; | 231 PP_ImageDataFormat format = PP_IMAGEDATAFORMAT_BGRA_PREMUL; |
| 222 | 232 |
| 223 /* | 233 /* |
| 224 * Create a buffer to draw into. Since we are waiting until the next flush | 234 * Create a buffer to draw into. Since we are waiting until the next flush |
| 225 * chrome has an opportunity to cache this buffer see ppb_graphics_2d.h. | 235 * chrome has an opportunity to cache this buffer see ppb_graphics_2d.h. |
| 226 */ | 236 */ |
| 227 PP_Resource image = | 237 PP_Resource image = |
| 228 g_pImageData->Create(PSGetInstanceId(), format, psize, PP_FALSE); | 238 g_pImageData->Create(PSGetInstanceId(), format, psize, PP_FALSE); |
| 229 uint8_t* pixels = g_pImageData->Map(image); | 239 uint8_t* pixels = g_pImageData->Map(image); |
| 230 | 240 |
| 231 struct PP_ImageDataDesc desc; | 241 struct PP_ImageDataDesc desc; |
| 232 uint8_t* cell_temp; | 242 uint8_t* cell_temp; |
| 233 uint32_t x, y; | 243 uint32_t x, y; |
| 234 | 244 |
| 235 /* If we somehow have not allocated these pointers yet, skip this frame. */ | 245 /* If we somehow have not allocated these pointers yet, skip this frame. */ |
| 236 if (!g_Context.cell_in || !g_Context.cell_out) return; | 246 if (!g_Context.cell_in || !g_Context.cell_out) return; |
| 237 | 247 |
| 238 /* Get the stride. */ | 248 /* Get the stride. */ |
| 239 g_pImageData->Describe(image, &desc); | 249 g_pImageData->Describe(image, &desc); |
| 240 | 250 |
| 241 /* Stir up the edges to prevent the simulation from reaching steady state. */ | 251 /* Stir up the edges to prevent the simulation from reaching steady state. */ |
| 242 Stir(desc.size.width, desc.size.height); | 252 Stir(desc.size.width, desc.size.height); |
| 243 | 253 |
| 244 /* Do neighbor summation; apply rules, output pixel color. */ | 254 /* |
| 255 * Do neighbor summation; apply rules, output pixel color. Note that a 1 cell | |
| 256 * wide perimeter is excluded from the simulation update; only cells from | |
| 257 * x = 1 to x < width - 1 and y = 1 to y < height - 1 are updated. | |
| 258 */ | |
| 259 | |
| 245 for (y = 1; y < desc.size.height - 1; ++y) { | 260 for (y = 1; y < desc.size.height - 1; ++y) { |
| 246 uint8_t *src0 = (g_Context.cell_in + (y - 1) * desc.size.width) + 1; | 261 uint8_t *src0 = (g_Context.cell_in + (y - 1) * desc.size.width); |
| 247 uint8_t *src1 = src0 + desc.size.width; | 262 uint8_t *src1 = src0 + desc.size.width; |
| 248 uint8_t *src2 = src1 + desc.size.width; | 263 uint8_t *src2 = src1 + desc.size.width; |
| 249 int count; | |
| 250 uint32_t color; | |
| 251 uint8_t *dst = (g_Context.cell_out + y * desc.size.width) + 1; | 264 uint8_t *dst = (g_Context.cell_out + y * desc.size.width) + 1; |
| 252 uint32_t *pixel_line = (uint32_t*) (pixels + y * desc.stride); | 265 uint32_t *pixel_line = (uint32_t*) (pixels + y * desc.stride); |
| 266 const u8x16_t kOne = broadcast(1); | |
| 267 const u8x16_t kFour = broadcast(4); | |
| 268 const u8x16_t kEight = broadcast(8); | |
| 269 const u8x16_t kZero255 = {0, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; | |
| 253 | 270 |
| 254 for (x = 1; x < (desc.size.width - 1); ++x) { | 271 /* Prime the src */ |
| 255 /* Build sum, weight center by 9x. */ | 272 u8x16_t src00 = *(u8x16_t*)&src0[0]; |
| 256 count = src0[-1] + src0[0] + src0[1] + | 273 u8x16_t src01 = *(u8x16_t*)&src0[16]; |
| 257 src1[-1] + src1[0] * 9 + src1[1] + | 274 u8x16_t src10 = *(u8x16_t*)&src1[0]; |
| 258 src2[-1] + src2[0] + src2[1]; | 275 u8x16_t src11 = *(u8x16_t*)&src1[16]; |
| 259 color = kNeighborColors[count]; | 276 u8x16_t src20 = *(u8x16_t*)&src2[0]; |
| 277 u8x16_t src21 = *(u8x16_t*)&src2[16]; | |
| 260 | 278 |
| 279 /* This inner loop is SIMD - each loop iteration will process 16 cells. */ | |
| 280 for (x = 1; (x + 15) < (desc.size.width - 1); x += 16) { | |
| 281 | |
| 282 /* | |
| 283 * Construct jittered source temps, using __builtin_shufflevector(..) to | |
| 284 * extract a shifted 16 element vector from the 32 element concatination | |
|
binji
2014/05/15 18:58:37
sp: concatenation
nfullagar
2014/05/15 21:25:13
Done.
| |
| 285 * of two source vectors. | |
| 286 */ | |
| 287 u8x16_t src0j0 = src00; | |
| 288 u8x16_t src0j1 = __builtin_shufflevector(src00, src01, | |
| 289 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); | |
| 290 u8x16_t src0j2 = __builtin_shufflevector(src00, src01, | |
| 291 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17); | |
| 292 u8x16_t src1j0 = src10; | |
| 293 u8x16_t src1j1 = __builtin_shufflevector(src10, src11, | |
| 294 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); | |
| 295 u8x16_t src1j2 = __builtin_shufflevector(src10, src11, | |
| 296 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17); | |
| 297 u8x16_t src2j0 = src20; | |
| 298 u8x16_t src2j1 = __builtin_shufflevector(src20, src21, | |
| 299 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); | |
| 300 u8x16_t src2j2 = __builtin_shufflevector(src20, src21, | |
| 301 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17); | |
| 302 | |
| 303 /* Sum the jittered sources to construct neighbor count. */ | |
| 304 u8x16_t count = src0j0 + src0j1 + src0j2 + | |
| 305 src1j0 + + src1j2 + | |
| 306 src2j0 + src2j1 + src2j2; | |
| 307 /* Add the center cell. */ | |
| 308 count = count + count + src1j1; | |
| 309 /* If count > 4 and < 8, center cell will be alive in the next frame. */ | |
| 310 u8x16_t alive1 = count > kFour; | |
| 311 u8x16_t alive2 = count < kEight; | |
| 312 /* Intersect the two comparisons from above. */ | |
| 313 u8x16_t alive = alive1 & alive2; | |
| 314 | |
| 315 /* | |
| 316 * At this point, alive[x] will be one of two values: | |
| 317 * 0x00 for a dead cell | |
| 318 * 0xFF for an alive cell. | |
| 319 * | |
| 320 * Next, convert alive cells to green pixel color. | |
| 321 * Use __builtin_shufflevector(..) to construct output pixels from | |
| 322 * concantination of alive vector and kZero255 const vector. | |
| 323 * Indices 0..15 select the 16 cells from alive vector. | |
| 324 * Index 16 is zero constant from kZero255 constant vector. | |
| 325 * Index 17 is 255 constant from kZero255 constant vector. | |
| 326 * Output pixel color values are in BGRABGRABGRABGRA order. | |
| 327 * Since each pixel needs 4 bytes of color information, 16 cells will | |
| 328 * need to expand to 4 seperate 16 byte pixel splats. | |
| 329 */ | |
| 330 u8x16_t pixel0_3 = __builtin_shufflevector(alive, kZero255, | |
| 331 16, 0, 16, 17, 16, 1, 16, 17, 16, 2, 16, 17, 16, 3, 16, 17); | |
| 332 u8x16_t pixel4_7 = __builtin_shufflevector(alive, kZero255, | |
| 333 16, 4, 16, 17, 16, 5, 16, 17, 16, 6, 16, 17, 16, 7, 16, 17); | |
| 334 u8x16_t pixel8_11 = __builtin_shufflevector(alive, kZero255, | |
| 335 16, 8, 16, 17, 16, 9, 16, 17, 16, 10, 16, 17, 16, 11, 16, 17); | |
| 336 u8x16_t pixel12_15 = __builtin_shufflevector(alive, kZero255, | |
| 337 16, 12, 16, 17, 16, 13, 16, 17, 16, 14, 16, 17, 16, 15, 16, 17); | |
| 338 | |
| 339 /* Write 16 pixels to output pixel buffer. */ | |
| 340 *(u8x16_t*)(pixel_line + 0) = pixel0_3; | |
| 341 *(u8x16_t*)(pixel_line + 4) = pixel4_7; | |
| 342 *(u8x16_t*)(pixel_line + 8) = pixel8_11; | |
| 343 *(u8x16_t*)(pixel_line + 12) = pixel12_15; | |
| 344 | |
| 345 /* Convert alive mask to 1 or 0 and store in destination cell array. */ | |
| 346 *(u8x16_t*)dst = alive & kOne; | |
| 347 | |
| 348 /* Increment pointers. */ | |
| 349 pixel_line += 16; | |
| 350 dst += 16; | |
| 351 src0 += 16; | |
| 352 src1 += 16; | |
| 353 src2 += 16; | |
| 354 | |
| 355 /* Shift source over by 16 cells and read the next 16 cells. */ | |
| 356 src00 = src01; | |
| 357 src01 = *(u8x16_t*)&src0[16]; | |
| 358 src10 = src11; | |
| 359 src11 = *(u8x16_t*)&src1[16]; | |
| 360 src20 = src21; | |
| 361 src21 = *(u8x16_t*)&src2[16]; | |
| 362 } | |
| 363 | |
| 364 /* | |
| 365 * The SIMD loop above does 16 cells at a time. The loop below is the | |
| 366 * regular version which processes one cell at a time. It is used to | |
| 367 * finish the remainder of the scanline not handled by the SIMD loop. | |
| 368 */ | |
| 369 for (; x < (desc.size.width - 1); ++x) { | |
| 370 /* Sum the jittered sources to construct neighbor count. */ | |
| 371 int count = src0[0] + src0[1] + src0[2] + | |
| 372 src1[0] + + src1[2] + | |
| 373 src2[0] + src2[1] + src2[2]; | |
| 374 /* Add the center cell. */ | |
| 375 count = count + count + src1[1]; | |
| 376 /* Use table lookup indexed by count to determine pixel & alive state. */ | |
| 377 uint32_t color = kNeighborColors[count]; | |
| 261 *pixel_line++ = color; | 378 *pixel_line++ = color; |
| 262 *dst++ = kIsAlive[count]; | 379 *dst++ = kIsAlive[count]; |
| 263 ++src0; | 380 ++src0; |
| 264 ++src1; | 381 ++src1; |
| 265 ++src2; | 382 ++src2; |
| 266 } | 383 } |
| 267 } | 384 } |
| 268 | 385 |
| 269 cell_temp = g_Context.cell_in; | 386 cell_temp = g_Context.cell_in; |
| 270 g_Context.cell_in = g_Context.cell_out; | 387 g_Context.cell_in = g_Context.cell_out; |
| (...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 318 } | 435 } |
| 319 } | 436 } |
| 320 return 0; | 437 return 0; |
| 321 } | 438 } |
| 322 | 439 |
| 323 /* | 440 /* |
| 324 * Register the function to call once the Instance Object is initialized. | 441 * Register the function to call once the Instance Object is initialized. |
| 325 * see: pappi_simple/ps_main.h | 442 * see: pappi_simple/ps_main.h |
| 326 */ | 443 */ |
| 327 PPAPI_SIMPLE_REGISTER_MAIN(example_main); | 444 PPAPI_SIMPLE_REGISTER_MAIN(example_main); |
| OLD | NEW |