src/opts/SkTextureCompressor_opts.h - Issue 2264103002: compress_r11eac_blocks() required more alignment than dst has.

Side by Side Diff: src/opts/SkTextureCompressor_opts.h

Issue 2264103002: compress_r11eac_blocks() required more alignment than dst has. (Closed) Base URL: https://skia.googlesource.com/skia.git@master

Patch Set: Turn test back on. Created 4 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright 2015 Google Inc.	2 * Copyright 2015 Google Inc.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license that can be	4 * Use of this source code is governed by a BSD-style license that can be

5 * found in the LICENSE file.	5 * found in the LICENSE file.

6 */	6 */

7	7

8 #ifndef SkTextureCompressor_opts_DEFINED	8 #ifndef SkTextureCompressor_opts_DEFINED

9 #define SkTextureCompressor_opts_DEFINED	9 #define SkTextureCompressor_opts_DEFINED

10	10

(...skipping 132 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
143 #if defined (SK_CPU_BENDIAN)	143 #if defined (SK_CPU_BENDIAN)

144 static inline uint64x2_t fix_endianness(uint64x2_t x) {	144 static inline uint64x2_t fix_endianness(uint64x2_t x) {

145 return x;	145 return x;

146 }	146 }

147 #else	147 #else

148 static inline uint64x2_t fix_endianness(uint64x2_t x) {	148 static inline uint64x2_t fix_endianness(uint64x2_t x) {

149 return vreinterpretq_u64_u8(vrev64q_u8(vreinterpretq_u8_u64(x)));	149 return vreinterpretq_u64_u8(vrev64q_u8(vreinterpretq_u8_u64(x)));

150 }	150 }

151 #endif	151 #endif

152	152

153 static void compress_r11eac_blocks(uint64_t* dst, const uint8_t* src, size_t rowBytes) {	153 static void compress_r11eac_blocks(uint8_t* dst, const uint8_t* src, size_t rowBytes) {

154 // Try to avoid switching between vector and non-vector ops...	154 // Try to avoid switching between vector and non-vector ops...

155 const uint8_t *const src1 = src;	155 const uint8_t *const src1 = src;

156 const uint8_t *const src2 = src + rowBytes;	156 const uint8_t *const src2 = src + rowBytes;

157 const uint8_t const src3 = src + 2rowBytes;	157 const uint8_t const src3 = src + 2rowBytes;

158 const uint8_t const src4 = src + 3rowBytes;	158 const uint8_t const src4 = src + 3rowBytes;

159 uint64_t *const dst1 = dst;	159 uint8_t *const dst1 = dst;

160 uint64_t *const dst2 = dst + 2;	160 uint8_t *const dst2 = dst + 16;

161	161

162 const uint8x16_t alphaRow1 = vld1q_u8(src1);	162 const uint8x16_t alphaRow1 = vld1q_u8(src1);

163 const uint8x16_t alphaRow2 = vld1q_u8(src2);	163 const uint8x16_t alphaRow2 = vld1q_u8(src2);

164 const uint8x16_t alphaRow3 = vld1q_u8(src3);	164 const uint8x16_t alphaRow3 = vld1q_u8(src3);

165 const uint8x16_t alphaRow4 = vld1q_u8(src4);	165 const uint8x16_t alphaRow4 = vld1q_u8(src4);

166	166

167 const uint8x16_t cmp12 = vceqq_u8(alphaRow1, alphaRow2);	167 const uint8x16_t cmp12 = vceqq_u8(alphaRow1, alphaRow2);

168 const uint8x16_t cmp34 = vceqq_u8(alphaRow3, alphaRow4);	168 const uint8x16_t cmp34 = vceqq_u8(alphaRow3, alphaRow4);

169 const uint8x16_t cmp13 = vceqq_u8(alphaRow1, alphaRow3);	169 const uint8x16_t cmp13 = vceqq_u8(alphaRow1, alphaRow3);

170	170

171 const uint8x16_t cmp = vandq_u8(vandq_u8(cmp12, cmp34), cmp13);	171 const uint8x16_t cmp = vandq_u8(vandq_u8(cmp12, cmp34), cmp13);

172 const uint8x16_t ncmp = vmvnq_u8(cmp);	172 const uint8x16_t ncmp = vmvnq_u8(cmp);

173 const uint8x16_t nAlphaRow1 = vmvnq_u8(alphaRow1);	173 const uint8x16_t nAlphaRow1 = vmvnq_u8(alphaRow1);

174 if (is_zero(ncmp)) {	174 if (is_zero(ncmp)) {

175 if (is_zero(alphaRow1)) {	175 if (is_zero(alphaRow1)) {

176 static const uint64x2_t kTransparent = { 0x0020000000002000ULL,	176 static const uint64x2_t kTransparent = { 0x0020000000002000ULL,

177 0x0020000000002000ULL } ;	177 0x0020000000002000ULL } ;

178 vst1q_u64(dst1, kTransparent);	178 vst1q_u8(dst1, vreinterpretq_u8_u64(kTransparent));

179 vst1q_u64(dst2, kTransparent);	179 vst1q_u8(dst2, vreinterpretq_u8_u64(kTransparent));

180 return;	180 return;

181 } else if (is_zero(nAlphaRow1)) {	181 } else if (is_zero(nAlphaRow1)) {

182 vst1q_u64(dst1, vreinterpretq_u64_u8(cmp));	182 vst1q_u8(dst1, cmp);

183 vst1q_u64(dst2, vreinterpretq_u64_u8(cmp));	183 vst1q_u8(dst2, cmp);

184 return;	184 return;

185 }	185 }

186 }	186 }

187	187

188 const uint8x16_t indexRow1 = convert_indices(make_index_row(alphaRow1));	188 const uint8x16_t indexRow1 = convert_indices(make_index_row(alphaRow1));

189 const uint8x16_t indexRow2 = convert_indices(make_index_row(alphaRow2));	189 const uint8x16_t indexRow2 = convert_indices(make_index_row(alphaRow2));

190 const uint8x16_t indexRow3 = convert_indices(make_index_row(alphaRow3));	190 const uint8x16_t indexRow3 = convert_indices(make_index_row(alphaRow3));

191 const uint8x16_t indexRow4 = convert_indices(make_index_row(alphaRow4));	191 const uint8x16_t indexRow4 = convert_indices(make_index_row(alphaRow4));

192	192

193 const uint64x2_t indexRow12 = vreinterpretq_u64_u8(	193 const uint64x2_t indexRow12 = vreinterpretq_u64_u8(

194 vorrq_u8(vshlq_n_u8(indexRow1, 3), indexRow2));	194 vorrq_u8(vshlq_n_u8(indexRow1, 3), indexRow2));

195 const uint64x2_t indexRow34 = vreinterpretq_u64_u8(	195 const uint64x2_t indexRow34 = vreinterpretq_u64_u8(

196 vorrq_u8(vshlq_n_u8(indexRow3, 3), indexRow4));	196 vorrq_u8(vshlq_n_u8(indexRow3, 3), indexRow4));

197	197

198 const uint32x4x2_t blockIndices = vtrnq_u32(vreinterpretq_u32_u64(indexR ow12),	198 const uint32x4x2_t blockIndices = vtrnq_u32(vreinterpretq_u32_u64(indexR ow12),

199 vreinterpretq_u32_u64(indexR ow34));	199 vreinterpretq_u32_u64(indexR ow34));

200 const uint64x2_t blockIndicesLeft = vreinterpretq_u64_u32(vrev64q_u32(bl ockIndices.val[0]));	200 const uint64x2_t blockIndicesLeft = vreinterpretq_u64_u32(vrev64q_u32(bl ockIndices.val[0]));

201 const uint64x2_t blockIndicesRight = vreinterpretq_u64_u32(vrev64q_u32(b lockIndices.val[1]));	201 const uint64x2_t blockIndicesRight = vreinterpretq_u64_u32(vrev64q_u32(b lockIndices.val[1]));

202	202

203 const uint64x2_t indicesLeft = fix_endianness(pack_indices(blockIndicesL eft));	203 const uint64x2_t indicesLeft = fix_endianness(pack_indices(blockIndicesL eft));

204 const uint64x2_t indicesRight = fix_endianness(pack_indices(blockIndices Right));	204 const uint64x2_t indicesRight = fix_endianness(pack_indices(blockIndices Right));

205	205

206 const uint64x2_t d1 = vcombine_u64(vget_low_u64(indicesLeft), vget_low_u 64(indicesRight));	206 const uint64x2_t d1 = vcombine_u64(vget_low_u64(indicesLeft), vget_low_u 64(indicesRight));

207 const uint64x2_t d2 = vcombine_u64(vget_high_u64(indicesLeft), vget_high _u64(indicesRight));	207 const uint64x2_t d2 = vcombine_u64(vget_high_u64(indicesLeft), vget_high _u64(indicesRight));

208 vst1q_u64(dst1, d1);	208 vst1q_u8(dst1, vreinterpretq_u8_u64(d1));

209 vst1q_u64(dst2, d2);	209 vst1q_u8(dst2, vreinterpretq_u8_u64(d2));

210 }	210 }

211	211

212 static bool compress_a8_r11eac(uint8_t* dst, const uint8_t* src,	212 static bool compress_a8_r11eac(uint8_t* dst, const uint8_t* src,

213 int width, int height, size_t rowBytes) {	213 int width, int height, size_t rowBytes) {

214	214

215 // Since we're going to operate on 4 blocks at a time, the src width	215 // Since we're going to operate on 4 blocks at a time, the src width

216 // must be a multiple of 16. However, the height only needs to be a	216 // must be a multiple of 16. However, the height only needs to be a

217 // multiple of 4	217 // multiple of 4

218 if (0 == width \|\| 0 == height \|\| (width % 16) != 0 \|\| (height % 4) != 0) {	218 if (0 == width \|\| 0 == height \|\| (width % 16) != 0 \|\| (height % 4) != 0) {

219 return false;	219 return false;

220 }	220 }

221	221

222 const int blocksX = width >> 2;	222 const int blocksX = width >> 2;

223 const int blocksY = height >> 2;	223 const int blocksY = height >> 2;

224	224

225 SkASSERT((blocksX % 4) == 0);	225 SkASSERT((blocksX % 4) == 0);

226	226

227 uint64_t* encPtr = reinterpret_cast<uint64_t*>(dst);

228 for (int y = 0; y < blocksY; ++y) {	227 for (int y = 0; y < blocksY; ++y) {

229 for (int x = 0; x < blocksX; x+=4) {	228 for (int x = 0; x < blocksX; x+=4) {

230 // Compress it	229 // Compress it

231 compress_r11eac_blocks(encPtr, src + 4*x, rowBytes);	230 compress_r11eac_blocks(dst, src + 4*x, rowBytes);

232 encPtr += 4;	231 dst += 32;

233 }	232 }

234 src += 4 * rowBytes;	233 src += 4 * rowBytes;

235 }	234 }

236 return true;	235 return true;

237 }	236 }

238	237

239 static SkOpts::TextureCompressor texture_compressor(SkColorType ct,	238 static SkOpts::TextureCompressor texture_compressor(SkColorType ct,

240 SkTextureCompressor::For mat fmt) {	239 SkTextureCompressor::For mat fmt) {

241 if (ct == kAlpha_8_SkColorType && fmt == SkTextureCompressor::kR11_EAC_F ormat) {	240 if (ct == kAlpha_8_SkColorType && fmt == SkTextureCompressor::kR11_EAC_F ormat) {

242 return compress_a8_r11eac;	241 return compress_a8_r11eac;

(...skipping 15 matching lines...) Expand all Loading...
258 }	257 }

259 static bool fill_block_dimensions(SkTextureCompressor::Format, int, int) {	258 static bool fill_block_dimensions(SkTextureCompressor::Format, int, int) {

260 return false;	259 return false;

261 }	260 }

262	261

263 #endif	262 #endif

264	263

265 } // namespace SK_OPTS_NS	264 } // namespace SK_OPTS_NS

266	265

267 #endif//SkTextureCompressor_opts_DEFINED	266 #endif//SkTextureCompressor_opts_DEFINED

OLD	NEW

« no previous file with comments | « infra/bots/recipes/swarm_test.expected/Test-Android-GCC-Nexus6-GPU-Adreno420-Arm7-Debug.json ('k') | no next file » | no next file with comments »