Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(19)

Side by Side Diff: native_client_sdk/src/examples/demo/earth_simd/earth.cc

Issue 289023002: Initial SIMD demos life and earth for PNaCl. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Created 6 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include <assert.h> 5 #include <assert.h>
6 #include <math.h> 6 #include <math.h>
7 #include <ppapi/c/ppb_input_event.h> 7 #include <ppapi/c/ppb_input_event.h>
8 #include <ppapi/cpp/input_event.h> 8 #include <ppapi/cpp/input_event.h>
9 #include <ppapi/cpp/var.h> 9 #include <ppapi/cpp/var.h>
10 #include <ppapi/cpp/var_array.h> 10 #include <ppapi/cpp/var_array.h>
(...skipping 12 matching lines...) Expand all
23 #include "ppapi_simple/ps.h" 23 #include "ppapi_simple/ps.h"
24 #include "ppapi_simple/ps_context_2d.h" 24 #include "ppapi_simple/ps_context_2d.h"
25 #include "ppapi_simple/ps_event.h" 25 #include "ppapi_simple/ps_event.h"
26 #include "ppapi_simple/ps_interface.h" 26 #include "ppapi_simple/ps_interface.h"
27 #include "ppapi_simple/ps_main.h" 27 #include "ppapi_simple/ps_main.h"
28 #include "sdk_util/macros.h" 28 #include "sdk_util/macros.h"
29 #include "sdk_util/thread_pool.h" 29 #include "sdk_util/thread_pool.h"
30 30
31 using namespace sdk_util; // For sdk_util::ThreadPool 31 using namespace sdk_util; // For sdk_util::ThreadPool
32 32
33 #define INLINE inline __attribute__((always_inline))
34
35 // 128 bit SIMD vector types
36 typedef uint8_t u8x16_t __attribute__ ((vector_size (16)));
37 typedef int32_t i32x4_t __attribute__ ((vector_size (16)));
38 typedef uint32_t u32x4_t __attribute__ ((vector_size (16)));
39 typedef float f32x4_t __attribute__ ((vector_size (16)));
40
33 // Global properties used to setup Earth demo. 41 // Global properties used to setup Earth demo.
34 namespace { 42 namespace {
35 const float kPI = M_PI; 43 const float kPI = M_PI;
36 const float kTwoPI = kPI * 2.0f; 44 const float kTwoPI = kPI * 2.0f;
37 const float kOneOverPI = 1.0f / kPI; 45 const float kOneOverPI = 1.0f / kPI;
38 const float kOneOver2PI = 1.0f / kTwoPI; 46 const float kOneOver2PI = 1.0f / kTwoPI;
39 const float kOneOver255 = 1.0f / 255.0f;
40 const int kArcCosineTableSize = 4096; 47 const int kArcCosineTableSize = 4096;
41 const int kFramesToBenchmark = 100; 48 const int kFramesToBenchmark = 100;
42 const float kZoomMin = 1.0f; 49 const float kZoomMin = 1.0f;
43 const float kZoomMax = 50.0f; 50 const float kZoomMax = 50.0f;
44 const float kWheelSpeed = 2.0f; 51 const float kWheelSpeed = 2.0f;
45 const float kLightMin = 0.0f; 52 const float kLightMin = 0.0f;
46 const float kLightMax = 2.0f; 53 const float kLightMax = 2.0f;
47 54
48 // Timer helper for benchmarking. Returns seconds elapsed since program start, 55 // Timer helper for benchmarking. Returns seconds elapsed since program start,
49 // as a double. 56 // as a double.
50 timeval start_tv; 57 timeval start_tv;
51 int start_tv_retv = gettimeofday(&start_tv, NULL); 58 int start_tv_retv = gettimeofday(&start_tv, NULL);
52 59
53 inline double getseconds() { 60 inline double getseconds() {
54 const double usec_to_sec = 0.000001; 61 const double usec_to_sec = 0.000001;
55 timeval tv; 62 timeval tv;
56 if ((0 == start_tv_retv) && (0 == gettimeofday(&tv, NULL))) 63 if ((0 == start_tv_retv) && (0 == gettimeofday(&tv, NULL)))
57 return (tv.tv_sec - start_tv.tv_sec) + tv.tv_usec * usec_to_sec; 64 return (tv.tv_sec - start_tv.tv_sec) + tv.tv_usec * usec_to_sec;
58 return 0.0; 65 return 0.0;
59 } 66 }
60 67
61 // RGBA helper functions, used for extracting color from RGBA source image. 68 // SIMD Vector helper functions.
62 inline float ExtractR(uint32_t c) { 69 INLINE f32x4_t min(f32x4_t a, f32x4_t b) {
63 return static_cast<float>(c & 0xFF) * kOneOver255; 70 i32x4_t m = a < b;
71 return (f32x4_t)(((i32x4_t)a & m) | ((i32x4_t)b & ~m));
64 } 72 }
65 73
66 inline float ExtractG(uint32_t c) { 74 INLINE f32x4_t max(f32x4_t a, f32x4_t b) {
67 return static_cast<float>((c & 0xFF00) >> 8) * kOneOver255; 75 i32x4_t m = a > b;
76 return (f32x4_t)(((i32x4_t)a & m) | ((i32x4_t)b & ~m));
68 } 77 }
69 78
70 inline float ExtractB(uint32_t c) { 79 INLINE float dot3(f32x4_t a, f32x4_t b) {
71 return static_cast<float>((c & 0xFF0000) >> 16) * kOneOver255; 80 f32x4_t c = a * b;
81 return c[0] + c[1] + c[2];
82 }
83
84 INLINE f32x4_t broadcast(float x) {
85 f32x4_t r = {x, x, x, x};
86 return r;
87 }
88
89 // SIMD RGBA helper functions, used for extracting color from RGBA source image.
90 INLINE f32x4_t ExtractRGBA(uint32_t c) {
91 const f32x4_t kOneOver255 = broadcast(1.0f / 255.0f);
92 const i32x4_t kZero = {0, 0, 0, 0};
93 i32x4_t v = {c, c, c, c};
94 // zero extend packed color into 32x4 integer vector
95 v = (i32x4_t)__builtin_shufflevector((u8x16_t)v, (u8x16_t)kZero,
96 0, 16, 16, 16, 1, 16, 16, 16, 2, 16, 16, 16, 3, 16, 16, 16);
97 // convert color values to float, range 0..1
98 f32x4_t f = __builtin_convertvector(v, f32x4_t) * kOneOver255;
99 return f;
100 }
101
102 // SIMD BGRA helper function, for constructing a pixel for a BGRA buffer.
103 INLINE uint32_t PackBGRA(f32x4_t f) {
104 const f32x4_t kZero = broadcast(0.0f);
105 const f32x4_t kHalf = broadcast(0.5f);
106 const f32x4_t k255 = broadcast(255.0f);
107 f = max(f, kZero);
108 f = f * k255 + kHalf;
109 f = min(f, k255);
110 i32x4_t i = __builtin_convertvector(f, i32x4_t);
111 u32x4_t p = (u32x4_t)__builtin_shufflevector((u8x16_t)i, (u8x16_t)i,
112 8, 4, 0, 12, 8, 4, 0, 12, 8, 4, 0, 12, 8, 4, 0, 12);
113 return p[0];
72 } 114 }
73 115
74 // BGRA helper function, for constructing a pixel for a BGRA buffer. 116 // BGRA helper function, for constructing a pixel for a BGRA buffer.
75 inline uint32_t MakeBGRA(uint32_t b, uint32_t g, uint32_t r, uint32_t a) { 117 INLINE uint32_t MakeBGRA(uint32_t b, uint32_t g, uint32_t r, uint32_t a) {
76 return (((a) << 24) | ((r) << 16) | ((g) << 8) | (b)); 118 return (((a) << 24) | ((r) << 16) | ((g) << 8) | (b));
77 } 119 }
78 120
79 // simple container for earth texture 121 // simple container for earth texture
80 struct Texture { 122 struct Texture {
81 int width, height; 123 int width, height;
82 uint32_t* pixels; 124 uint32_t* pixels;
83 Texture(int w, int h) : width(w), height(h) { 125 Texture(int w, int h) : width(w), height(h) {
84 pixels = new uint32_t[w * h]; 126 pixels = new uint32_t[w * h];
85 memset(pixels, 0, sizeof(uint32_t) * w * h); 127 memset(pixels, 0, sizeof(uint32_t) * w * h);
(...skipping 20 matching lines...) Expand all
106 // build a slightly larger table to allow for numeric imprecision 148 // build a slightly larger table to allow for numeric imprecision
107 for (int i = 0; i < (kArcCosineTableSize + 2); ++i) { 149 for (int i = 0; i < (kArcCosineTableSize + 2); ++i) {
108 float f = static_cast<float>(i) / kArcCosineTableSize; 150 float f = static_cast<float>(i) / kArcCosineTableSize;
109 f = f * 2.0f - 1.0f; 151 f = f * 2.0f - 1.0f;
110 table[i] = acos(f); 152 table[i] = acos(f);
111 } 153 }
112 } 154 }
113 155
114 // looks up acos(f) using a table and lerping between entries 156 // looks up acos(f) using a table and lerping between entries
115 // (it is expected that input f is between -1 and 1) 157 // (it is expected that input f is between -1 and 1)
116 float ArcCosine::TableLerp(float f) { 158 INLINE float ArcCosine::TableLerp(float f) {
117 float x = (f + 1.0f) * 0.5f; 159 float x = (f + 1.0f) * 0.5f;
118 x = x * kArcCosineTableSize; 160 x = x * kArcCosineTableSize;
119 int ix = static_cast<int>(x); 161 int ix = static_cast<int>(x);
120 float fx = static_cast<float>(ix); 162 float fx = static_cast<float>(ix);
121 float dx = x - fx; 163 float dx = x - fx;
122 float af = table[ix]; 164 float af = table[ix];
123 float af2 = table[ix + 1]; 165 float af2 = table[ix + 1];
124 return af + (af2 - af) * dx; 166 return af + (af2 - af) * dx;
125 } 167 }
126 168
127 // Helper functions for quick but approximate sqrt. 169 // Helper functions for quick but approximate sqrt.
128 union Convert { 170 union Convert {
129 float f; 171 float f;
130 int i; 172 int i;
131 Convert(int x) { i = x; } 173 Convert(int x) { i = x; }
132 Convert(float x) { f = x; } 174 Convert(float x) { f = x; }
133 int AsInt() { return i; } 175 int AsInt() { return i; }
134 float AsFloat() { return f; } 176 float AsFloat() { return f; }
135 }; 177 };
136 178
137 inline const int AsInteger(const float f) { 179 INLINE const int AsInteger(const float f) {
138 Convert u(f); 180 Convert u(f);
139 return u.AsInt(); 181 return u.AsInt();
140 } 182 }
141 183
142 inline const float AsFloat(const int i) { 184 INLINE const float AsFloat(const int i) {
143 Convert u(i); 185 Convert u(i);
144 return u.AsFloat(); 186 return u.AsFloat();
145 } 187 }
146 188
147 const long int kOneAsInteger = AsInteger(1.0f); 189 const long int kOneAsInteger = AsInteger(1.0f);
148 190
149 inline float inline_quick_sqrt(float x) { 191 INLINE float inline_quick_sqrt(float x) {
150 int i; 192 int i;
151 i = (AsInteger(x) >> 1) + (kOneAsInteger >> 1); 193 i = (AsInteger(x) >> 1) + (kOneAsInteger >> 1);
152 return AsFloat(i); 194 return AsFloat(i);
153 } 195 }
154 196
155 inline float inline_sqrt(float x) { 197 INLINE float inline_sqrt(float x) {
156 float y; 198 float y;
157 y = inline_quick_sqrt(x); 199 y = inline_quick_sqrt(x);
158 y = (y * y + x) / (2.0f * y); 200 y = (y * y + x) / (2.0f * y);
159 y = (y * y + x) / (2.0f * y); 201 y = (y * y + x) / (2.0f * y);
160 return y; 202 return y;
161 } 203 }
162 204
163 // takes a -0..1+ color, clamps it to 0..1 and maps it to 0..255 integer
164 inline uint32_t Clamp255(float x) {
165 if (x < 0.0f) {
166 x = 0.0f;
167 } else if (x > 1.0f) {
168 x = 1.0f;
169 }
170 return static_cast<uint32_t>(x * 255.0f);
171 }
172 } // namespace 205 } // namespace
173 206
174 207
175 // The main object that runs the Earth demo. 208 // The main object that runs the Earth demo.
176 class Planet { 209 class Planet {
177 public: 210 public:
178 Planet(); 211 Planet();
179 virtual ~Planet(); 212 virtual ~Planet();
180 // Runs a tick of the simulations, update 2D output. 213 // Runs a tick of the simulations, update 2D output.
181 void Update(); 214 void Update();
(...skipping 187 matching lines...) Expand 10 before | Expand all | Expand 10 after
369 *w = ps_context_->width; 402 *w = ps_context_->width;
370 *y = r; 403 *y = r;
371 *h = 1; 404 *h = 1;
372 } 405 }
373 406
374 407
375 inline uint32_t* Planet::wGetAddr(int x, int y) { 408 inline uint32_t* Planet::wGetAddr(int x, int y) {
376 return ps_context_->data + x + y * ps_context_->stride / sizeof(uint32_t); 409 return ps_context_->data + x + y * ps_context_->stride / sizeof(uint32_t);
377 } 410 }
378 411
379 // This is the meat of the ray tracer. Given a pixel span (x0, x1) on 412 // This is the inner loop of the ray tracer. Given a pixel span (x0, x1) on
380 // scanline y, shoot rays into the scene and render what they hit. Use 413 // scanline y, shoot rays into the scene and render what they hit. Use
381 // scanline coherence to do a few optimizations 414 // scanline coherence to do a few optimizations.
415 // This version uses portable SIMD 4 element single precision floating point
416 // vectors to perform many of the calculations, and builds only on PNaCl.
382 void Planet::wRenderPixelSpan(int x0, int x1, int y) { 417 void Planet::wRenderPixelSpan(int x0, int x1, int y) {
383 if (!base_tex_ || !night_tex_) 418 if (!base_tex_ || !night_tex_)
384 return; 419 return;
385 const int kColorBlack = MakeBGRA(0, 0, 0, 0xFF); 420 const uint32_t kColorBlack = MakeBGRA(0, 0, 0, 0xFF);
421 const uint32_t kSolidAlpha = MakeBGRA(0, 0, 0, 0xFF);
422 const f32x4_t kOne = {1.0f, 1.0f, 1.0f, 1.0f};
423 const f32x4_t diffuse = {diffuse_r_, diffuse_g_, diffuse_b_, 0.0f};
424 const f32x4_t ambient = {ambient_r_, ambient_g_, ambient_b_, 0.0f};
425 const f32x4_t light_pos = {light_x_, light_y_, light_z_, 1.0f};
426 const f32x4_t planet_pos = {planet_x_, planet_y_, planet_z_, 1.0f};
427 const f32x4_t planet_one_over_radius = broadcast(planet_one_over_radius_);
428 const f32x4_t planet_equator = {
429 planet_equator_x_, planet_equator_y_, planet_equator_z_, 0.0f};
430 const f32x4_t planet_pole = {
431 planet_pole_x_, planet_pole_y_, planet_pole_z_, 1.0f};
432 const f32x4_t planet_pole_x_equator = {
433 planet_pole_x_equator_x_, planet_pole_x_equator_y_,
434 planet_pole_x_equator_z_, 0.0f};
435
386 float width = ps_context_->width; 436 float width = ps_context_->width;
387 float height = ps_context_->height; 437 float height = ps_context_->height;
388 float min_dim = width < height ? width : height; 438 float min_dim = width < height ? width : height;
389 float offset_x = width < height ? 0 : (width - min_dim) * 0.5f; 439 float offset_x = width < height ? 0 : (width - min_dim) * 0.5f;
390 float offset_y = width < height ? (height - min_dim) * 0.5f : 0; 440 float offset_y = width < height ? (height - min_dim) * 0.5f : 0;
391 float y0 = eye_y_; 441 float y0 = eye_y_;
392 float z0 = eye_z_; 442 float z0 = eye_z_;
393 float y1 = (static_cast<float>(y - offset_y) / min_dim) * 2.0f - 1.0f; 443 float y1 = (static_cast<float>(y - offset_y) / min_dim) * 2.0f - 1.0f;
394 float z1 = 0.0f; 444 float z1 = 0.0f;
395 float dy = (y1 - y0); 445 float dy = (y1 - y0);
(...skipping 20 matching lines...) Expand all
416 // calculate discriminant 466 // calculate discriminant
417 float disc = b * b - 4.0f * a * c; 467 float disc = b * b - 4.0f * a * c;
418 468
419 // Did ray hit the sphere? 469 // Did ray hit the sphere?
420 if (disc < 0.0f) { 470 if (disc < 0.0f) {
421 *pixels = kColorBlack; 471 *pixels = kColorBlack;
422 ++pixels; 472 ++pixels;
423 continue; 473 continue;
424 } 474 }
425 475
426 // calc parametric t value 476 f32x4_t delta = {dx, dy, dz, 1.0f};
477 f32x4_t base = {x0, y0, z0, 1.0f};
478
479 // Calc parametric t value.
427 float t = (-b - inline_sqrt(disc)) / (2.0f * a); 480 float t = (-b - inline_sqrt(disc)) / (2.0f * a);
428 float px = x0 + t * dx; 481
429 float py = y0 + t * dy; 482 f32x4_t pos = base + broadcast(t) * delta;
430 float pz = z0 + t * dz; 483 f32x4_t normal = (pos - planet_pos) * planet_one_over_radius;
431 float nx = (px - planet_x_) * planet_one_over_radius_;
432 float ny = (py - planet_y_) * planet_one_over_radius_;
433 float nz = (pz - planet_z_) * planet_one_over_radius_;
434 484
435 // Misc raytrace calculations. 485 // Misc raytrace calculations.
436 float Lx = (light_x_ - px); 486 f32x4_t L = light_pos - pos;
437 float Ly = (light_y_ - py); 487 float Lq = 1.0f / inline_quick_sqrt(dot3(L, L));
438 float Lz = (light_z_ - pz); 488 L = L * broadcast(Lq);
439 float Lq = 1.0f / inline_quick_sqrt(Lx * Lx + Ly * Ly + Lz * Lz); 489 float d = dot3(L, normal);
440 Lx *= Lq; 490 f32x4_t p = diffuse * broadcast(d) + ambient;
441 Ly *= Lq; 491 float ds = -dot3(normal, planet_pole);
442 Lz *= Lq;
443 float d = (Lx * nx + Ly * ny + Lz * nz);
444 float pr = (diffuse_r_ * d) + ambient_r_;
445 float pg = (diffuse_g_ * d) + ambient_g_;
446 float pb = (diffuse_b_ * d) + ambient_b_;
447 float ds = -(nx * planet_pole_x_ +
448 ny * planet_pole_y_ +
449 nz * planet_pole_z_);
450 float ang = acos_.TableLerp(ds); 492 float ang = acos_.TableLerp(ds);
451 float v = ang * kOneOverPI; 493 float v = ang * kOneOverPI;
452 float dp = planet_equator_x_ * nx + 494 float dp = dot3(planet_equator, normal);
453 planet_equator_y_ * ny + 495 float w = dp / sinf(ang);
454 planet_equator_z_ * nz;
455 float w = dp / sin(ang);
456 if (w > 1.0f) w = 1.0f; 496 if (w > 1.0f) w = 1.0f;
457 if (w < -1.0f) w = -1.0f; 497 if (w < -1.0f) w = -1.0f;
458 float th = acos_.TableLerp(w) * kOneOver2PI; 498 float th = acos_.TableLerp(w) * kOneOver2PI;
459 float dps = planet_pole_x_equator_x_ * nx + 499 float dps = dot3(planet_pole_x_equator, normal);
460 planet_pole_x_equator_y_ * ny +
461 planet_pole_x_equator_z_ * nz;
462 float u; 500 float u;
463 if (dps < 0.0f) 501 if (dps < 0.0f)
464 u = th; 502 u = th;
465 else 503 else
466 u = 1.0f - th; 504 u = 1.0f - th;
467 505
468 // Look up daylight texel. 506 // Look up daylight texel.
469 int tx = static_cast<int>(u * base_tex_->width); 507 int tx = static_cast<int>(u * base_tex_->width);
470 int ty = static_cast<int>(v * base_tex_->height); 508 int ty = static_cast<int>(v * base_tex_->height);
471 int offset = tx + ty * base_tex_->width; 509 int offset = tx + ty * base_tex_->width;
472 uint32_t base_texel = base_tex_->pixels[offset]; 510 uint32_t base_texel = base_tex_->pixels[offset];
473 float tr = ExtractR(base_texel); 511 f32x4_t dc = ExtractRGBA(base_texel);
474 float tg = ExtractG(base_texel);
475 float tb = ExtractB(base_texel);
476
477 float ipr = 1.0f - pr;
binji 2014/05/15 18:58:37 You don't clamp this anymore?
nfullagar 2014/05/15 21:25:13 good catch! (Its only slightly visual when adjusti
478 if (ipr < 0.0f) ipr = 0.0f;
479 float ipg = 1.0f - pg;
480 if (ipg < 0.0f) ipg = 0.0f;
481 float ipb = 1.0f - pb;
482 if (ipb < 0.0f) ipb = 0.0f;
483 512
484 // Look up night texel. 513 // Look up night texel.
485 int nix = static_cast<int>(u * night_tex_->width); 514 int nix = static_cast<int>(u * night_tex_->width);
486 int niy = static_cast<int>(v * night_tex_->height); 515 int niy = static_cast<int>(v * night_tex_->height);
487 int noffset = nix + niy * night_tex_->width; 516 int noffset = nix + niy * night_tex_->width;
488 uint32_t night_texel = night_tex_->pixels[noffset]; 517 uint32_t night_texel = night_tex_->pixels[noffset];
489 float nr = ExtractR(night_texel); 518 f32x4_t nc = ExtractRGBA(night_texel);
490 float ng = ExtractG(night_texel);
491 float nb = ExtractB(night_texel);
492 519
493 // Final color value is lerp between day and night texels. 520 // Blend between daylight (dc) and nighttime (nc) color.
494 unsigned int ir = Clamp255(pr * tr + nr * ipr); 521 f32x4_t fc = dc * p + nc * (kOne - p);
495 unsigned int ig = Clamp255(pg * tg + ng * ipg); 522 uint32_t color = PackBGRA(fc);
496 unsigned int ib = Clamp255(pb * tb + nb * ipb);
497 523
498 unsigned int color = MakeBGRA(ib, ig, ir, 0xFF); 524 *pixels = color | kSolidAlpha;
499
500 *pixels = color;
501 ++pixels; 525 ++pixels;
502 } 526 }
503 } 527 }
504 528
505 // Renders a rectangular area of the screen, scan line at a time 529 // Renders a rectangular area of the screen, scan line at a time
506 void Planet::wRenderRect(int x, int y, int w, int h) { 530 void Planet::wRenderRect(int x, int y, int w, int h) {
507 for (int j = y; j < (y + h); ++j) { 531 for (int j = y; j < (y + h); ++j) {
508 this->wRenderPixelSpan(x, x + w - 1, j); 532 this->wRenderPixelSpan(x, x + w - 1, j);
509 } 533 }
510 } 534 }
(...skipping 302 matching lines...) Expand 10 before | Expand all | Expand 10 after
813 // Do simulation, render and present. 837 // Do simulation, render and present.
814 earth.Update(); 838 earth.Update();
815 } 839 }
816 840
817 return 0; 841 return 0;
818 } 842 }
819 843
820 // Register the function to call once the Instance Object is initialized. 844 // Register the function to call once the Instance Object is initialized.
821 // see: pappi_simple/ps_main.h 845 // see: pappi_simple/ps_main.h
822 PPAPI_SIMPLE_REGISTER_MAIN(example_main); 846 PPAPI_SIMPLE_REGISTER_MAIN(example_main);
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698