src/opts/SkPx_neon.h - Issue 1436513002: SkPx: use namespaces as namespaces

Side by Side Diff: src/opts/SkPx_neon.h

Issue 1436513002: SkPx: use namespaces as namespaces (Closed) Base URL: https://skia.googlesource.com/skia.git@master

Patch Set: Created 5 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright 2015 Google Inc.	2 * Copyright 2015 Google Inc.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license that can be	4 * Use of this source code is governed by a BSD-style license that can be

5 * found in the LICENSE file.	5 * found in the LICENSE file.

6 */	6 */

7	7

8 #ifndef SkPx_neon_DEFINED	8 #ifndef SkPx_neon_DEFINED

9 #define SkPx_neon_DEFINED	9 #define SkPx_neon_DEFINED

10	10

11 // When we have NEON, we like to work 8 pixels at a time.	11 // When we have NEON, we like to work 8 pixels at a time.

12 // This lets us exploit vld4/vst4 and represent SkPx as planar uint8x8x4_t,	12 // This lets us exploit vld4/vst4 and represent SkPx as planar uint8x8x4_t,

13 // Wide as planar uint16x8x4_t, and Alpha as a single uint8x8_t plane.	13 // Wide as planar uint16x8x4_t, and Alpha as a single uint8x8_t plane.

14	14

15 struct SkPx_neon {	15 namespace neon {

	16

	17 struct SkPx {

16 static const int N = 8;	18 static const int N = 8;

17	19

18 uint8x8x4_t fVec;	20 uint8x8x4_t fVec;

19 SkPx_neon(uint8x8x4_t vec) : fVec(vec) {}	21 SkPx(uint8x8x4_t vec) : fVec(vec) {}

20	22

21 static SkPx_neon Dup(uint32_t px) { return vld4_dup_u8((const uint8_t*)&px); }	23 static SkPx Dup(uint32_t px) { return vld4_dup_u8((const uint8_t*)&px); }

22 static SkPx_neon Load(const uint32_t* px) { return vld4_u8((const uint8_t*)p x); }	24 static SkPx Load(const uint32_t* px) { return vld4_u8((const uint8_t*)px); }

23 static SkPx_neon Load(const uint32_t* px, int n) {	25 static SkPx Load(const uint32_t* px, int n) {

24 SkASSERT(0 < n && n < 8);	26 SkASSERT(0 < n && n < 8);

25 uint8x8x4_t v = vld4_dup_u8((const uint8_t*)px); // n>=1, so start all lanes with pixel 0.	27 uint8x8x4_t v = vld4_dup_u8((const uint8_t*)px); // n>=1, so start all lanes with pixel 0.

26 switch (n) {	28 switch (n) {

27 case 7: v = vld4_lane_u8((const uint8_t*)(px+6), v, 6); // fall thr ough	29 case 7: v = vld4_lane_u8((const uint8_t*)(px+6), v, 6); // fall thr ough

28 case 6: v = vld4_lane_u8((const uint8_t*)(px+5), v, 5); // fall thr ough	30 case 6: v = vld4_lane_u8((const uint8_t*)(px+5), v, 5); // fall thr ough

29 case 5: v = vld4_lane_u8((const uint8_t*)(px+4), v, 4); // fall thr ough	31 case 5: v = vld4_lane_u8((const uint8_t*)(px+4), v, 4); // fall thr ough

30 case 4: v = vld4_lane_u8((const uint8_t*)(px+3), v, 3); // fall thr ough	32 case 4: v = vld4_lane_u8((const uint8_t*)(px+3), v, 3); // fall thr ough

31 case 3: v = vld4_lane_u8((const uint8_t*)(px+2), v, 2); // fall thr ough	33 case 3: v = vld4_lane_u8((const uint8_t*)(px+2), v, 2); // fall thr ough

32 case 2: v = vld4_lane_u8((const uint8_t*)(px+1), v, 1);	34 case 2: v = vld4_lane_u8((const uint8_t*)(px+1), v, 1);

33 }	35 }

(...skipping 67 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
101 }	103 }

102 template <int bits> Wide shr() const {	104 template <int bits> Wide shr() const {

103 return (uint16x8x4_t) {{	105 return (uint16x8x4_t) {{

104 vshrq_n_u16(fVec.val[0], bits),	106 vshrq_n_u16(fVec.val[0], bits),

105 vshrq_n_u16(fVec.val[1], bits),	107 vshrq_n_u16(fVec.val[1], bits),

106 vshrq_n_u16(fVec.val[2], bits),	108 vshrq_n_u16(fVec.val[2], bits),

107 vshrq_n_u16(fVec.val[3], bits),	109 vshrq_n_u16(fVec.val[3], bits),

108 }};	110 }};

109 }	111 }

110	112

111 SkPx_neon addNarrowHi(const SkPx_neon& o) const {	113 SkPx addNarrowHi(const SkPx& o) const {

112 return (uint8x8x4_t) {{	114 return (uint8x8x4_t) {{

113 vshrn_n_u16(vaddw_u8(fVec.val[0], o.fVec.val[0]), 8),	115 vshrn_n_u16(vaddw_u8(fVec.val[0], o.fVec.val[0]), 8),

114 vshrn_n_u16(vaddw_u8(fVec.val[1], o.fVec.val[1]), 8),	116 vshrn_n_u16(vaddw_u8(fVec.val[1], o.fVec.val[1]), 8),

115 vshrn_n_u16(vaddw_u8(fVec.val[2], o.fVec.val[2]), 8),	117 vshrn_n_u16(vaddw_u8(fVec.val[2], o.fVec.val[2]), 8),

116 vshrn_n_u16(vaddw_u8(fVec.val[3], o.fVec.val[3]), 8),	118 vshrn_n_u16(vaddw_u8(fVec.val[3], o.fVec.val[3]), 8),

117 }};	119 }};

118 }	120 }

119 };	121 };

120	122

121 Alpha alpha() const { return fVec.val[3]; }	123 Alpha alpha() const { return fVec.val[3]; }

122	124

123 Wide widenLo() const {	125 Wide widenLo() const {

124 return (uint16x8x4_t) {{	126 return (uint16x8x4_t) {{

125 vmovl_u8(fVec.val[0]),	127 vmovl_u8(fVec.val[0]),

126 vmovl_u8(fVec.val[1]),	128 vmovl_u8(fVec.val[1]),

127 vmovl_u8(fVec.val[2]),	129 vmovl_u8(fVec.val[2]),

128 vmovl_u8(fVec.val[3]),	130 vmovl_u8(fVec.val[3]),

129 }};	131 }};

130 }	132 }

131 // TODO: these two can probably be done faster.	133 // TODO: these two can probably be done faster.

132 Wide widenHi() const { return this->widenLo().shl<8>(); }	134 Wide widenHi() const { return this->widenLo().shl<8>(); }

133 Wide widenLoHi() const { return this->widenLo() + this->widenHi(); }	135 Wide widenLoHi() const { return this->widenLo() + this->widenHi(); }

134	136

135 SkPx_neon operator+(const SkPx_neon& o) const {	137 SkPx operator+(const SkPx& o) const {

136 return (uint8x8x4_t) {{	138 return (uint8x8x4_t) {{

137 vadd_u8(fVec.val[0], o.fVec.val[0]),	139 vadd_u8(fVec.val[0], o.fVec.val[0]),

138 vadd_u8(fVec.val[1], o.fVec.val[1]),	140 vadd_u8(fVec.val[1], o.fVec.val[1]),

139 vadd_u8(fVec.val[2], o.fVec.val[2]),	141 vadd_u8(fVec.val[2], o.fVec.val[2]),

140 vadd_u8(fVec.val[3], o.fVec.val[3]),	142 vadd_u8(fVec.val[3], o.fVec.val[3]),

141 }};	143 }};

142 }	144 }

143 SkPx_neon operator-(const SkPx_neon& o) const {	145 SkPx operator-(const SkPx& o) const {

144 return (uint8x8x4_t) {{	146 return (uint8x8x4_t) {{

145 vsub_u8(fVec.val[0], o.fVec.val[0]),	147 vsub_u8(fVec.val[0], o.fVec.val[0]),

146 vsub_u8(fVec.val[1], o.fVec.val[1]),	148 vsub_u8(fVec.val[1], o.fVec.val[1]),

147 vsub_u8(fVec.val[2], o.fVec.val[2]),	149 vsub_u8(fVec.val[2], o.fVec.val[2]),

148 vsub_u8(fVec.val[3], o.fVec.val[3]),	150 vsub_u8(fVec.val[3], o.fVec.val[3]),

149 }};	151 }};

150 }	152 }

151 SkPx_neon saturatedAdd(const SkPx_neon& o) const {	153 SkPx saturatedAdd(const SkPx& o) const {

152 return (uint8x8x4_t) {{	154 return (uint8x8x4_t) {{

153 vqadd_u8(fVec.val[0], o.fVec.val[0]),	155 vqadd_u8(fVec.val[0], o.fVec.val[0]),

154 vqadd_u8(fVec.val[1], o.fVec.val[1]),	156 vqadd_u8(fVec.val[1], o.fVec.val[1]),

155 vqadd_u8(fVec.val[2], o.fVec.val[2]),	157 vqadd_u8(fVec.val[2], o.fVec.val[2]),

156 vqadd_u8(fVec.val[3], o.fVec.val[3]),	158 vqadd_u8(fVec.val[3], o.fVec.val[3]),

157 }};	159 }};

158 }	160 }

159	161

160 Wide operator*(const Alpha& a) const {	162 Wide operator*(const Alpha& a) const {

161 return (uint16x8x4_t) {{	163 return (uint16x8x4_t) {{

162 vmull_u8(fVec.val[0], a.fA),	164 vmull_u8(fVec.val[0], a.fA),

163 vmull_u8(fVec.val[1], a.fA),	165 vmull_u8(fVec.val[1], a.fA),

164 vmull_u8(fVec.val[2], a.fA),	166 vmull_u8(fVec.val[2], a.fA),

165 vmull_u8(fVec.val[3], a.fA),	167 vmull_u8(fVec.val[3], a.fA),

166 }};	168 }};

167 }	169 }

168 SkPx_neon approxMulDiv255(const Alpha& a) const {	170 SkPx approxMulDiv255(const Alpha& a) const {

169 return (this a).addNarrowHi(*this);	171 return (this a).addNarrowHi(*this);

170 }	172 }

171	173

172 SkPx_neon addAlpha(const Alpha& a) const {	174 SkPx addAlpha(const Alpha& a) const {

173 return (uint8x8x4_t) {{	175 return (uint8x8x4_t) {{

174 fVec.val[0],	176 fVec.val[0],

175 fVec.val[1],	177 fVec.val[1],

176 fVec.val[2],	178 fVec.val[2],

177 vadd_u8(fVec.val[3], a.fA),	179 vadd_u8(fVec.val[3], a.fA),

178 }};	180 }};

179 }	181 }

180 };	182 };

181 typedef SkPx_neon SkPx;	183

	184 } // namespace neon

	185

	186 typedef neon::SkPx SkPx;

182	187

183 #endif//SkPx_neon_DEFINED	188 #endif//SkPx_neon_DEFINED

OLD	NEW

« no previous file with comments | « no previous file | src/opts/SkPx_none.h » ('j') | no next file with comments »