Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1)

Side by Side Diff: src/opts/Sk4px_NEON.h

Issue 1286093004: Refactor to put SkXfermode_opts inside SK_OPTS_NS. (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/core/SkPMFloat.h ('k') | src/opts/Sk4px_SSE2.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2015 Google Inc. 2 * Copyright 2015 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 namespace { // See Sk4px.h 8 SK_ALWAYS_INLINE Sk4px Sk4px::DupPMColor(SkPMColor px) {
9 return Sk16b((uint8x16_t)vdupq_n_u32(px));
10 }
9 11
10 inline Sk4px Sk4px::DupPMColor(SkPMColor px) { return Sk16b((uint8x16_t)vdupq_n_ u32(px)); } 12 SK_ALWAYS_INLINE Sk4px Sk4px::Load4(const SkPMColor px[4]) {
11
12 inline Sk4px Sk4px::Load4(const SkPMColor px[4]) {
13 return Sk16b((uint8x16_t)vld1q_u32(px)); 13 return Sk16b((uint8x16_t)vld1q_u32(px));
14 } 14 }
15 inline Sk4px Sk4px::Load2(const SkPMColor px[2]) { 15 SK_ALWAYS_INLINE Sk4px Sk4px::Load2(const SkPMColor px[2]) {
16 uint32x2_t px2 = vld1_u32(px); 16 uint32x2_t px2 = vld1_u32(px);
17 return Sk16b((uint8x16_t)vcombine_u32(px2, px2)); 17 return Sk16b((uint8x16_t)vcombine_u32(px2, px2));
18 } 18 }
19 inline Sk4px Sk4px::Load1(const SkPMColor px[1]) { 19 SK_ALWAYS_INLINE Sk4px Sk4px::Load1(const SkPMColor px[1]) {
20 return Sk16b((uint8x16_t)vdupq_n_u32(*px)); 20 return Sk16b((uint8x16_t)vdupq_n_u32(*px));
21 } 21 }
22 22
23 inline void Sk4px::store4(SkPMColor px[4]) const { 23 SK_ALWAYS_INLINE void Sk4px::store4(SkPMColor px[4]) const {
24 vst1q_u32(px, (uint32x4_t)this->fVec); 24 vst1q_u32(px, (uint32x4_t)this->fVec);
25 } 25 }
26 inline void Sk4px::store2(SkPMColor px[2]) const { 26 SK_ALWAYS_INLINE void Sk4px::store2(SkPMColor px[2]) const {
27 vst1_u32(px, (uint32x2_t)vget_low_u8(this->fVec)); 27 vst1_u32(px, (uint32x2_t)vget_low_u8(this->fVec));
28 } 28 }
29 inline void Sk4px::store1(SkPMColor px[1]) const { 29 SK_ALWAYS_INLINE void Sk4px::store1(SkPMColor px[1]) const {
30 vst1q_lane_u32(px, (uint32x4_t)this->fVec, 0); 30 vst1q_lane_u32(px, (uint32x4_t)this->fVec, 0);
31 } 31 }
32 32
33 inline Sk4px::Wide Sk4px::widenLo() const { 33 SK_ALWAYS_INLINE Sk4px::Wide Sk4px::widenLo() const {
34 return Sk16h(vmovl_u8(vget_low_u8 (this->fVec)), 34 return Sk16h(vmovl_u8(vget_low_u8 (this->fVec)),
35 vmovl_u8(vget_high_u8(this->fVec))); 35 vmovl_u8(vget_high_u8(this->fVec)));
36 } 36 }
37 37
38 inline Sk4px::Wide Sk4px::widenHi() const { 38 SK_ALWAYS_INLINE Sk4px::Wide Sk4px::widenHi() const {
39 return Sk16h(vshll_n_u8(vget_low_u8 (this->fVec), 8), 39 return Sk16h(vshll_n_u8(vget_low_u8 (this->fVec), 8),
40 vshll_n_u8(vget_high_u8(this->fVec), 8)); 40 vshll_n_u8(vget_high_u8(this->fVec), 8));
41 } 41 }
42 42
43 inline Sk4px::Wide Sk4px::widenLoHi() const { 43 SK_ALWAYS_INLINE Sk4px::Wide Sk4px::widenLoHi() const {
44 auto zipped = vzipq_u8(this->fVec, this->fVec); 44 auto zipped = vzipq_u8(this->fVec, this->fVec);
45 return Sk16h((uint16x8_t)zipped.val[0], 45 return Sk16h((uint16x8_t)zipped.val[0],
46 (uint16x8_t)zipped.val[1]); 46 (uint16x8_t)zipped.val[1]);
47 } 47 }
48 48
49 inline Sk4px::Wide Sk4px::mulWiden(const Sk16b& other) const { 49 SK_ALWAYS_INLINE Sk4px::Wide Sk4px::mulWiden(const Sk16b& other) const {
50 return Sk16h(vmull_u8(vget_low_u8 (this->fVec), vget_low_u8 (other.fVec)), 50 return Sk16h(vmull_u8(vget_low_u8 (this->fVec), vget_low_u8 (other.fVec)),
51 vmull_u8(vget_high_u8(this->fVec), vget_high_u8(other.fVec))); 51 vmull_u8(vget_high_u8(this->fVec), vget_high_u8(other.fVec)));
52 } 52 }
53 53
54 inline Sk4px Sk4px::Wide::addNarrowHi(const Sk16h& other) const { 54 SK_ALWAYS_INLINE Sk4px Sk4px::Wide::addNarrowHi(const Sk16h& other) const {
55 const Sk4px::Wide o(other); // Should be no code, but allows us to access f Lo, fHi. 55 const Sk4px::Wide o(other); // Should be no code, but allows us to access f Lo, fHi.
56 return Sk16b(vcombine_u8(vaddhn_u16(this->fLo.fVec, o.fLo.fVec), 56 return Sk16b(vcombine_u8(vaddhn_u16(this->fLo.fVec, o.fLo.fVec),
57 vaddhn_u16(this->fHi.fVec, o.fHi.fVec))); 57 vaddhn_u16(this->fHi.fVec, o.fHi.fVec)));
58 } 58 }
59 59
60 inline Sk4px Sk4px::alphas() const { 60 SK_ALWAYS_INLINE Sk4px Sk4px::alphas() const {
61 auto as = vshrq_n_u32((uint32x4_t)fVec, SK_A32_SHIFT); // ___3 ___2 ___1 __ _0 61 auto as = vshrq_n_u32((uint32x4_t)fVec, SK_A32_SHIFT); // ___3 ___2 ___1 __ _0
62 return Sk16b((uint8x16_t)vmulq_n_u32(as, 0x01010101)); // 3333 2222 1111 00 00 62 return Sk16b((uint8x16_t)vmulq_n_u32(as, 0x01010101)); // 3333 2222 1111 00 00
63 } 63 }
64 64
65 inline Sk4px Sk4px::Load4Alphas(const SkAlpha a[4]) { 65 SK_ALWAYS_INLINE Sk4px Sk4px::Load4Alphas(const SkAlpha a[4]) {
66 uint8x16_t a8 = vdupq_n_u8(0); // ____ ____ ____ _ ___ 66 uint8x16_t a8 = vdupq_n_u8(0); // ____ ____ ____ _ ___
67 a8 = vld1q_lane_u8(a+0, a8, 0); // ____ ____ ____ _ __0 67 a8 = vld1q_lane_u8(a+0, a8, 0); // ____ ____ ____ _ __0
68 a8 = vld1q_lane_u8(a+1, a8, 4); // ____ ____ ___1 _ __0 68 a8 = vld1q_lane_u8(a+1, a8, 4); // ____ ____ ___1 _ __0
69 a8 = vld1q_lane_u8(a+2, a8, 8); // ____ ___2 ___1 _ __0 69 a8 = vld1q_lane_u8(a+2, a8, 8); // ____ ___2 ___1 _ __0
70 a8 = vld1q_lane_u8(a+3, a8, 12); // ___3 ___2 ___1 _ __0 70 a8 = vld1q_lane_u8(a+3, a8, 12); // ___3 ___2 ___1 _ __0
71 auto a32 = (uint32x4_t)a8; // 71 auto a32 = (uint32x4_t)a8; //
72 return Sk16b((uint8x16_t)vmulq_n_u32(a32, 0x01010101)); // 3333 2222 1111 0 000 72 return Sk16b((uint8x16_t)vmulq_n_u32(a32, 0x01010101)); // 3333 2222 1111 0 000
73 } 73 }
74 74
75 inline Sk4px Sk4px::Load2Alphas(const SkAlpha a[2]) { 75 SK_ALWAYS_INLINE Sk4px Sk4px::Load2Alphas(const SkAlpha a[2]) {
76 uint8x16_t a8 = vdupq_n_u8(0); // ____ ____ ____ _ ___ 76 uint8x16_t a8 = vdupq_n_u8(0); // ____ ____ ____ _ ___
77 a8 = vld1q_lane_u8(a+0, a8, 0); // ____ ____ ____ _ __0 77 a8 = vld1q_lane_u8(a+0, a8, 0); // ____ ____ ____ _ __0
78 a8 = vld1q_lane_u8(a+1, a8, 4); // ____ ____ ___1 _ __0 78 a8 = vld1q_lane_u8(a+1, a8, 4); // ____ ____ ___1 _ __0
79 auto a32 = (uint32x4_t)a8; // 79 auto a32 = (uint32x4_t)a8; //
80 return Sk16b((uint8x16_t)vmulq_n_u32(a32, 0x01010101)); // ____ ____ 1111 0 000 80 return Sk16b((uint8x16_t)vmulq_n_u32(a32, 0x01010101)); // ____ ____ 1111 0 000
81 } 81 }
82 82
83 inline Sk4px Sk4px::zeroColors() const { 83 SK_ALWAYS_INLINE Sk4px Sk4px::zeroColors() const {
84 return Sk16b(vandq_u8(this->fVec, (uint8x16_t)vdupq_n_u32(0xFF << SK_A32_SHI FT))); 84 return Sk16b(vandq_u8(this->fVec, (uint8x16_t)vdupq_n_u32(0xFF << SK_A32_SHI FT)));
85 } 85 }
86 86
87 inline Sk4px Sk4px::zeroAlphas() const { 87 SK_ALWAYS_INLINE Sk4px Sk4px::zeroAlphas() const {
88 // vbic(a,b) == a & ~b 88 // vbic(a,b) == a & ~b
89 return Sk16b(vbicq_u8(this->fVec, (uint8x16_t)vdupq_n_u32(0xFF << SK_A32_SHI FT))); 89 return Sk16b(vbicq_u8(this->fVec, (uint8x16_t)vdupq_n_u32(0xFF << SK_A32_SHI FT)));
90 } 90 }
91 91
92 static inline uint8x16_t widen_to_8888(uint16x4_t v) { 92 static SK_ALWAYS_INLINE uint8x16_t widen_to_8888(uint16x4_t v) {
93 // RGB565 format: |R....|G.....|B....| 93 // RGB565 format: |R....|G.....|B....|
94 // Bit: 16 11 5 0 94 // Bit: 16 11 5 0
95 95
96 // First get each pixel into its own 32-bit lane. 96 // First get each pixel into its own 32-bit lane.
97 // v == rgb3 rgb2 rgb1 rgb0 97 // v == rgb3 rgb2 rgb1 rgb0
98 // spread == 0000 rgb3 0000 rgb2 0000 rgb1 0000 rgb0 98 // spread == 0000 rgb3 0000 rgb2 0000 rgb1 0000 rgb0
99 uint32x4_t spread = vmovl_u16(v); 99 uint32x4_t spread = vmovl_u16(v);
100 100
101 // Get each color independently, still in 565 precison but down at bit 0. 101 // Get each color independently, still in 565 precison but down at bit 0.
102 auto r5 = vshrq_n_u32(spread, 11), 102 auto r5 = vshrq_n_u32(spread, 11),
103 g6 = vandq_u32(vdupq_n_u32(63), vshrq_n_u32(spread, 5)), 103 g6 = vandq_u32(vdupq_n_u32(63), vshrq_n_u32(spread, 5)),
104 b5 = vandq_u32(vdupq_n_u32(31), spread); 104 b5 = vandq_u32(vdupq_n_u32(31), spread);
105 105
106 // Scale 565 precision up to 8-bit each, filling low 323 bits with high bits of each component. 106 // Scale 565 precision up to 8-bit each, filling low 323 bits with high bits of each component.
107 auto r8 = vorrq_u32(vshlq_n_u32(r5, 3), vshrq_n_u32(r5, 2)), 107 auto r8 = vorrq_u32(vshlq_n_u32(r5, 3), vshrq_n_u32(r5, 2)),
108 g8 = vorrq_u32(vshlq_n_u32(g6, 2), vshrq_n_u32(g6, 4)), 108 g8 = vorrq_u32(vshlq_n_u32(g6, 2), vshrq_n_u32(g6, 4)),
109 b8 = vorrq_u32(vshlq_n_u32(b5, 3), vshrq_n_u32(b5, 2)); 109 b8 = vorrq_u32(vshlq_n_u32(b5, 3), vshrq_n_u32(b5, 2));
110 110
111 // Now put all the 8-bit components into SkPMColor order. 111 // Now put all the 8-bit components into SkPMColor order.
112 return (uint8x16_t)vorrq_u32(vshlq_n_u32(r8, SK_R32_SHIFT), // TODO: one s hift is zero... 112 return (uint8x16_t)vorrq_u32(vshlq_n_u32(r8, SK_R32_SHIFT), // TODO: one s hift is zero...
113 vorrq_u32(vshlq_n_u32(g8, SK_G32_SHIFT), 113 vorrq_u32(vshlq_n_u32(g8, SK_G32_SHIFT),
114 vorrq_u32(vshlq_n_u32(b8, SK_B32_SHIFT), 114 vorrq_u32(vshlq_n_u32(b8, SK_B32_SHIFT),
115 vdupq_n_u32(0xFF << SK_A32_SHIFT)))); 115 vdupq_n_u32(0xFF << SK_A32_SHIFT))));
116 } 116 }
117 117
118 static inline uint16x4_t narrow_to_565(uint8x16_t w8x16) { 118 static SK_ALWAYS_INLINE uint16x4_t narrow_to_565(uint8x16_t w8x16) {
119 uint32x4_t w = (uint32x4_t)w8x16; 119 uint32x4_t w = (uint32x4_t)w8x16;
120 120
121 // Extract out top RGB 565 bits of each pixel, with no rounding. 121 // Extract out top RGB 565 bits of each pixel, with no rounding.
122 auto r5 = vandq_u32(vdupq_n_u32(31), vshrq_n_u32(w, SK_R32_SHIFT + 3)), 122 auto r5 = vandq_u32(vdupq_n_u32(31), vshrq_n_u32(w, SK_R32_SHIFT + 3)),
123 g6 = vandq_u32(vdupq_n_u32(63), vshrq_n_u32(w, SK_G32_SHIFT + 2)), 123 g6 = vandq_u32(vdupq_n_u32(63), vshrq_n_u32(w, SK_G32_SHIFT + 2)),
124 b5 = vandq_u32(vdupq_n_u32(31), vshrq_n_u32(w, SK_B32_SHIFT + 3)); 124 b5 = vandq_u32(vdupq_n_u32(31), vshrq_n_u32(w, SK_B32_SHIFT + 3));
125 125
126 // Now put the bits in place in the low 16-bits of each 32-bit lane. 126 // Now put the bits in place in the low 16-bits of each 32-bit lane.
127 auto spread = vorrq_u32(vshlq_n_u32(r5, 11), 127 auto spread = vorrq_u32(vshlq_n_u32(r5, 11),
128 vorrq_u32(vshlq_n_u32(g6, 5), 128 vorrq_u32(vshlq_n_u32(g6, 5),
129 b5)); 129 b5));
130 130
131 // Pack the low 16-bits of our 128-bit register down into a 64-bit register. 131 // Pack the low 16-bits of our 128-bit register down into a 64-bit register.
132 // spread == 0000 rgb3 0000 rgb2 0000 rgb1 0000 rgb0 132 // spread == 0000 rgb3 0000 rgb2 0000 rgb1 0000 rgb0
133 // v == rgb3 rgb2 rgb1 rgb0 133 // v == rgb3 rgb2 rgb1 rgb0
134 auto v = vmovn_u32(spread); 134 auto v = vmovn_u32(spread);
135 return v; 135 return v;
136 } 136 }
137 137
138 138
139 inline Sk4px Sk4px::Load4(const SkPMColor16 src[4]) { 139 SK_ALWAYS_INLINE Sk4px Sk4px::Load4(const SkPMColor16 src[4]) {
140 return Sk16b(widen_to_8888(vld1_u16(src))); 140 return Sk16b(widen_to_8888(vld1_u16(src)));
141 } 141 }
142 inline Sk4px Sk4px::Load2(const SkPMColor16 src[2]) { 142 SK_ALWAYS_INLINE Sk4px Sk4px::Load2(const SkPMColor16 src[2]) {
143 auto src2 = ((uint32_t)src[0] ) 143 auto src2 = ((uint32_t)src[0] )
144 | ((uint32_t)src[1] << 16); 144 | ((uint32_t)src[1] << 16);
145 return Sk16b(widen_to_8888(vcreate_u16(src2))); 145 return Sk16b(widen_to_8888(vcreate_u16(src2)));
146 } 146 }
147 inline Sk4px Sk4px::Load1(const SkPMColor16 src[1]) { 147 SK_ALWAYS_INLINE Sk4px Sk4px::Load1(const SkPMColor16 src[1]) {
148 return Sk16b(widen_to_8888(vcreate_u16(src[0]))); 148 return Sk16b(widen_to_8888(vcreate_u16(src[0])));
149 } 149 }
150 150
151 inline void Sk4px::store4(SkPMColor16 dst[4]) const { 151 SK_ALWAYS_INLINE void Sk4px::store4(SkPMColor16 dst[4]) const {
152 vst1_u16(dst, narrow_to_565(this->fVec)); 152 vst1_u16(dst, narrow_to_565(this->fVec));
153 } 153 }
154 inline void Sk4px::store2(SkPMColor16 dst[2]) const { 154 SK_ALWAYS_INLINE void Sk4px::store2(SkPMColor16 dst[2]) const {
155 auto v = narrow_to_565(this->fVec); 155 auto v = narrow_to_565(this->fVec);
156 dst[0] = vget_lane_u16(v, 0); 156 dst[0] = vget_lane_u16(v, 0);
157 dst[1] = vget_lane_u16(v, 1); 157 dst[1] = vget_lane_u16(v, 1);
158 } 158 }
159 inline void Sk4px::store1(SkPMColor16 dst[1]) const { 159 SK_ALWAYS_INLINE void Sk4px::store1(SkPMColor16 dst[1]) const {
160 dst[0] = vget_lane_u16(narrow_to_565(this->fVec), 0); 160 dst[0] = vget_lane_u16(narrow_to_565(this->fVec), 0);
161 } 161 }
162 162
163 } // namespace
164
OLDNEW
« no previous file with comments | « src/core/SkPMFloat.h ('k') | src/opts/Sk4px_SSE2.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698