OLD | NEW |
---|---|
1 /* | 1 /* |
2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #include "SkCodecPriv.h" | 8 #include "SkCodecPriv.h" |
9 #include "SkColorPriv.h" | 9 #include "SkColorPriv.h" |
10 #include "SkOpts.h" | 10 #include "SkOpts.h" |
(...skipping 202 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
213 index = (currByte >> (8 - bpp - bitIndex)) & mask; | 213 index = (currByte >> (8 - bpp - bitIndex)) & mask; |
214 dst[x] = ctable[index]; | 214 dst[x] = ctable[index]; |
215 } | 215 } |
216 } | 216 } |
217 | 217 |
218 // kIndex | 218 // kIndex |
219 | 219 |
220 static void swizzle_index_to_n32( | 220 static void swizzle_index_to_n32( |
221 void* SK_RESTRICT dstRow, const uint8_t* SK_RESTRICT src, int dstWidth, | 221 void* SK_RESTRICT dstRow, const uint8_t* SK_RESTRICT src, int dstWidth, |
222 int bpp, int deltaSrc, int offset, const SkPMColor ctable[]) { | 222 int bpp, int deltaSrc, int offset, const SkPMColor ctable[]) { |
223 #if defined(SK_ARM_HAS_NEON) | |
224 uint32_t* dst = (uint32_t*) dstRow; | |
225 src += offset; | |
223 | 226 |
227 while (dstWidth >= 16) { | |
228 // Table registers | |
229 uint8x16x4_t t0, t1; | |
230 uint8x8x4_t tr, tg, tb, ta; | |
231 | |
232 // Indices into table | |
233 uint8x16_t indices = vld1q_u8(src); | |
234 | |
235 // Pixel output registers | |
236 uint8x16x4_t rgba; | |
237 rgba.val[0] = vdupq_n_u8(0); | |
238 rgba.val[1] = vdupq_n_u8(0); | |
239 rgba.val[2] = vdupq_n_u8(0); | |
240 rgba.val[3] = vdupq_n_u8(0); | |
241 | |
242 const uint32_t* table = ctable; | |
243 const int numColors = 256; | |
244 const int numColorsPerLoop = 32; | |
245 for (int j = 0; j < numColors / numColorsPerLoop; j++) { | |
246 // Load a separate color table for each of r, g, b, a | |
247 t0 = vld4q_u8((const uint8_t*) (table + 0)); // rgba | |
mtklein
2016/02/01 16:30:36
Where's our weak link? I don't suppose things get
| |
248 t1 = vld4q_u8((const uint8_t*) (table + 16)); // RGBA | |
249 SkTSwap(t0.val[1], t1.val[0]); // rRba, gGBA | |
250 SkTSwap(t0.val[3], t1.val[2]); // rRbB, gGaA | |
251 tr = *(((uint8x8x4_t*) &t0) + 0); // rR | |
252 tb = *(((uint8x8x4_t*) &t0) + 1); // bB | |
253 tg = *(((uint8x8x4_t*) &t1) + 0); // gG | |
254 ta = *(((uint8x8x4_t*) &t1) + 1); // aA | |
255 | |
256 // Use VTBL, then OR the results together. | |
257 rgba.val[0] = vorrq_u8(rgba.val[0], | |
258 vcombine_u8(vtbl4_u8(tr, *(((uint8x8_t*) &ind ices) + 0)), | |
259 vtbl4_u8(tr, *(((uint8x8_t*) &ind ices) + 1)))); | |
260 rgba.val[1] = vorrq_u8(rgba.val[1], | |
261 vcombine_u8(vtbl4_u8(tg, *(((uint8x8_t*) &ind ices) + 0)), | |
262 vtbl4_u8(tg, *(((uint8x8_t*) &ind ices) + 1)))); | |
263 rgba.val[2] = vorrq_u8(rgba.val[2], | |
264 vcombine_u8(vtbl4_u8(tb, *(((uint8x8_t*) &ind ices) + 0)), | |
265 vtbl4_u8(tb, *(((uint8x8_t*) &ind ices) + 1)))); | |
266 rgba.val[3] = vorrq_u8(rgba.val[3], | |
267 vcombine_u8(vtbl4_u8(ta, *(((uint8x8_t*) &ind ices) + 0)), | |
268 vtbl4_u8(ta, *(((uint8x8_t*) &ind ices) + 1)))); | |
269 | |
270 // Move the next set of indices into the range of the color table. Indices | |
271 // that are currently in range should go out of range due to underfl ow. | |
272 indices = vsubq_u8(indices, vdupq_n_u8(32)); | |
273 table += numColorsPerLoop; | |
274 } | |
275 | |
276 // Store output pixel values. | |
277 vst4q_u8((uint8_t*) dst, rgba); | |
278 | |
279 src += 16; | |
280 dst += 16; | |
281 dstWidth -= 16; | |
282 } | |
283 | |
284 for (int x = 0; x < dstWidth; x++) { | |
285 SkPMColor c = ctable[*src]; | |
286 dst[x] = c; | |
287 src += deltaSrc; | |
288 } | |
289 | |
290 | |
291 #else | |
224 src += offset; | 292 src += offset; |
225 SkPMColor* SK_RESTRICT dst = (SkPMColor*)dstRow; | 293 SkPMColor* SK_RESTRICT dst = (SkPMColor*)dstRow; |
226 for (int x = 0; x < dstWidth; x++) { | 294 for (int x = 0; x < dstWidth; x++) { |
227 SkPMColor c = ctable[*src]; | 295 SkPMColor c = ctable[*src]; |
228 dst[x] = c; | 296 dst[x] = c; |
229 src += deltaSrc; | 297 src += deltaSrc; |
230 } | 298 } |
299 #endif | |
231 } | 300 } |
232 | 301 |
233 static void swizzle_index_to_n32_skipZ( | 302 static void swizzle_index_to_n32_skipZ( |
234 void* SK_RESTRICT dstRow, const uint8_t* SK_RESTRICT src, int dstWidth, | 303 void* SK_RESTRICT dstRow, const uint8_t* SK_RESTRICT src, int dstWidth, |
235 int bpp, int deltaSrc, int offset, const SkPMColor ctable[]) { | 304 int bpp, int deltaSrc, int offset, const SkPMColor ctable[]) { |
236 | 305 |
237 src += offset; | 306 src += offset; |
238 SkPMColor* SK_RESTRICT dst = (SkPMColor*)dstRow; | 307 SkPMColor* SK_RESTRICT dst = (SkPMColor*)dstRow; |
239 for (int x = 0; x < dstWidth; x++) { | 308 for (int x = 0; x < dstWidth; x++) { |
240 SkPMColor c = ctable[*src]; | 309 SkPMColor c = ctable[*src]; |
(...skipping 573 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
814 } | 883 } |
815 | 884 |
816 return fAllocatedWidth; | 885 return fAllocatedWidth; |
817 } | 886 } |
818 | 887 |
819 void SkSwizzler::swizzle(void* dst, const uint8_t* SK_RESTRICT src) { | 888 void SkSwizzler::swizzle(void* dst, const uint8_t* SK_RESTRICT src) { |
820 SkASSERT(nullptr != dst && nullptr != src); | 889 SkASSERT(nullptr != dst && nullptr != src); |
821 fActualProc(SkTAddOffset<void>(dst, fDstOffsetBytes), src, fSwizzleWidth, fS rcBPP, | 890 fActualProc(SkTAddOffset<void>(dst, fDstOffsetBytes), src, fSwizzleWidth, fS rcBPP, |
822 fSampleX * fSrcBPP, fSrcOffsetUnits, fColorTable); | 891 fSampleX * fSrcBPP, fSrcOffsetUnits, fColorTable); |
823 } | 892 } |
OLD | NEW |