Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(6)

Side by Side Diff: src/codec/SkSwizzler.cpp

Issue 1656543002: NEON for table lookups? (Closed) Base URL: https://skia.googlesource.com/skia.git@index
Patch Set: Use VTBL and OR Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « dm/DM.cpp ('k') | src/opts/SkBlitRow_opts_arm.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2015 Google Inc. 2 * Copyright 2015 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #include "SkCodecPriv.h" 8 #include "SkCodecPriv.h"
9 #include "SkColorPriv.h" 9 #include "SkColorPriv.h"
10 #include "SkOpts.h" 10 #include "SkOpts.h"
(...skipping 202 matching lines...) Expand 10 before | Expand all | Expand 10 after
213 index = (currByte >> (8 - bpp - bitIndex)) & mask; 213 index = (currByte >> (8 - bpp - bitIndex)) & mask;
214 dst[x] = ctable[index]; 214 dst[x] = ctable[index];
215 } 215 }
216 } 216 }
217 217
218 // kIndex 218 // kIndex
219 219
220 static void swizzle_index_to_n32( 220 static void swizzle_index_to_n32(
221 void* SK_RESTRICT dstRow, const uint8_t* SK_RESTRICT src, int dstWidth, 221 void* SK_RESTRICT dstRow, const uint8_t* SK_RESTRICT src, int dstWidth,
222 int bpp, int deltaSrc, int offset, const SkPMColor ctable[]) { 222 int bpp, int deltaSrc, int offset, const SkPMColor ctable[]) {
223 #if defined(SK_ARM_HAS_NEON)
224 uint32_t* dst = (uint32_t*) dstRow;
225 src += offset;
223 226
227 while (dstWidth >= 16) {
228 // Table registers
229 uint8x16x4_t t0, t1;
230 uint8x8x4_t tr, tg, tb, ta;
231
232 // Indices into table
233 uint8x16_t indices = vld1q_u8(src);
234
235 // Pixel output registers
236 uint8x16x4_t rgba;
237 rgba.val[0] = vdupq_n_u8(0);
238 rgba.val[1] = vdupq_n_u8(0);
239 rgba.val[2] = vdupq_n_u8(0);
240 rgba.val[3] = vdupq_n_u8(0);
241
242 const uint32_t* table = ctable;
243 const int numColors = 256;
244 const int numColorsPerLoop = 32;
245 for (int j = 0; j < numColors / numColorsPerLoop; j++) {
246 // Load a separate color table for each of r, g, b, a
247 t0 = vld4q_u8((const uint8_t*) (table + 0)); // rgba
mtklein 2016/02/01 16:30:36 Where's our weak link? I don't suppose things get
248 t1 = vld4q_u8((const uint8_t*) (table + 16)); // RGBA
249 SkTSwap(t0.val[1], t1.val[0]); // rRba, gGBA
250 SkTSwap(t0.val[3], t1.val[2]); // rRbB, gGaA
251 tr = *(((uint8x8x4_t*) &t0) + 0); // rR
252 tb = *(((uint8x8x4_t*) &t0) + 1); // bB
253 tg = *(((uint8x8x4_t*) &t1) + 0); // gG
254 ta = *(((uint8x8x4_t*) &t1) + 1); // aA
255
256 // Use VTBL, then OR the results together.
257 rgba.val[0] = vorrq_u8(rgba.val[0],
258 vcombine_u8(vtbl4_u8(tr, *(((uint8x8_t*) &ind ices) + 0)),
259 vtbl4_u8(tr, *(((uint8x8_t*) &ind ices) + 1))));
260 rgba.val[1] = vorrq_u8(rgba.val[1],
261 vcombine_u8(vtbl4_u8(tg, *(((uint8x8_t*) &ind ices) + 0)),
262 vtbl4_u8(tg, *(((uint8x8_t*) &ind ices) + 1))));
263 rgba.val[2] = vorrq_u8(rgba.val[2],
264 vcombine_u8(vtbl4_u8(tb, *(((uint8x8_t*) &ind ices) + 0)),
265 vtbl4_u8(tb, *(((uint8x8_t*) &ind ices) + 1))));
266 rgba.val[3] = vorrq_u8(rgba.val[3],
267 vcombine_u8(vtbl4_u8(ta, *(((uint8x8_t*) &ind ices) + 0)),
268 vtbl4_u8(ta, *(((uint8x8_t*) &ind ices) + 1))));
269
270 // Move the next set of indices into the range of the color table. Indices
271 // that are currently in range should go out of range due to underfl ow.
272 indices = vsubq_u8(indices, vdupq_n_u8(32));
273 table += numColorsPerLoop;
274 }
275
276 // Store output pixel values.
277 vst4q_u8((uint8_t*) dst, rgba);
278
279 src += 16;
280 dst += 16;
281 dstWidth -= 16;
282 }
283
284 for (int x = 0; x < dstWidth; x++) {
285 SkPMColor c = ctable[*src];
286 dst[x] = c;
287 src += deltaSrc;
288 }
289
290
291 #else
224 src += offset; 292 src += offset;
225 SkPMColor* SK_RESTRICT dst = (SkPMColor*)dstRow; 293 SkPMColor* SK_RESTRICT dst = (SkPMColor*)dstRow;
226 for (int x = 0; x < dstWidth; x++) { 294 for (int x = 0; x < dstWidth; x++) {
227 SkPMColor c = ctable[*src]; 295 SkPMColor c = ctable[*src];
228 dst[x] = c; 296 dst[x] = c;
229 src += deltaSrc; 297 src += deltaSrc;
230 } 298 }
299 #endif
231 } 300 }
232 301
233 static void swizzle_index_to_n32_skipZ( 302 static void swizzle_index_to_n32_skipZ(
234 void* SK_RESTRICT dstRow, const uint8_t* SK_RESTRICT src, int dstWidth, 303 void* SK_RESTRICT dstRow, const uint8_t* SK_RESTRICT src, int dstWidth,
235 int bpp, int deltaSrc, int offset, const SkPMColor ctable[]) { 304 int bpp, int deltaSrc, int offset, const SkPMColor ctable[]) {
236 305
237 src += offset; 306 src += offset;
238 SkPMColor* SK_RESTRICT dst = (SkPMColor*)dstRow; 307 SkPMColor* SK_RESTRICT dst = (SkPMColor*)dstRow;
239 for (int x = 0; x < dstWidth; x++) { 308 for (int x = 0; x < dstWidth; x++) {
240 SkPMColor c = ctable[*src]; 309 SkPMColor c = ctable[*src];
(...skipping 573 matching lines...) Expand 10 before | Expand all | Expand 10 after
814 } 883 }
815 884
816 return fAllocatedWidth; 885 return fAllocatedWidth;
817 } 886 }
818 887
819 void SkSwizzler::swizzle(void* dst, const uint8_t* SK_RESTRICT src) { 888 void SkSwizzler::swizzle(void* dst, const uint8_t* SK_RESTRICT src) {
820 SkASSERT(nullptr != dst && nullptr != src); 889 SkASSERT(nullptr != dst && nullptr != src);
821 fActualProc(SkTAddOffset<void>(dst, fDstOffsetBytes), src, fSwizzleWidth, fS rcBPP, 890 fActualProc(SkTAddOffset<void>(dst, fDstOffsetBytes), src, fSwizzleWidth, fS rcBPP,
822 fSampleX * fSrcBPP, fSrcOffsetUnits, fColorTable); 891 fSampleX * fSrcBPP, fSrcOffsetUnits, fColorTable);
823 } 892 }
OLDNEW
« no previous file with comments | « dm/DM.cpp ('k') | src/opts/SkBlitRow_opts_arm.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698