Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 /* | 1 /* |
| 2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #include "SkCodecPriv.h" | 8 #include "SkCodecPriv.h" |
| 9 #include "SkColorPriv.h" | 9 #include "SkColorPriv.h" |
| 10 #include "SkOpts.h" | 10 #include "SkOpts.h" |
| (...skipping 202 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 213 index = (currByte >> (8 - bpp - bitIndex)) & mask; | 213 index = (currByte >> (8 - bpp - bitIndex)) & mask; |
| 214 dst[x] = ctable[index]; | 214 dst[x] = ctable[index]; |
| 215 } | 215 } |
| 216 } | 216 } |
| 217 | 217 |
| 218 // kIndex | 218 // kIndex |
| 219 | 219 |
| 220 static void swizzle_index_to_n32( | 220 static void swizzle_index_to_n32( |
| 221 void* SK_RESTRICT dstRow, const uint8_t* SK_RESTRICT src, int dstWidth, | 221 void* SK_RESTRICT dstRow, const uint8_t* SK_RESTRICT src, int dstWidth, |
| 222 int bpp, int deltaSrc, int offset, const SkPMColor ctable[]) { | 222 int bpp, int deltaSrc, int offset, const SkPMColor ctable[]) { |
| 223 #if defined(SK_ARM_HAS_NEON) | |
| 224 uint32_t* dst = (uint32_t*) dstRow; | |
| 225 src += offset; | |
| 223 | 226 |
| 227 while (dstWidth >= 16) { | |
| 228 // Table registers | |
| 229 uint8x16x4_t t0, t1; | |
| 230 uint8x8x4_t tr, tg, tb, ta; | |
| 231 | |
| 232 // Indices into table | |
| 233 uint8x16_t indices = vld1q_u8(src); | |
| 234 | |
| 235 // Pixel output registers | |
| 236 uint8x16x4_t rgba; | |
| 237 rgba.val[0] = vdupq_n_u8(0); | |
| 238 rgba.val[1] = vdupq_n_u8(0); | |
| 239 rgba.val[2] = vdupq_n_u8(0); | |
| 240 rgba.val[3] = vdupq_n_u8(0); | |
| 241 | |
| 242 const uint32_t* table = ctable; | |
| 243 const int numColors = 256; | |
| 244 const int numColorsPerLoop = 32; | |
| 245 for (int j = 0; j < numColors / numColorsPerLoop; j++) { | |
| 246 // Load a separate color table for each of r, g, b, a | |
| 247 t0 = vld4q_u8((const uint8_t*) (table + 0)); // rgba | |
|
mtklein
2016/02/01 16:30:36
Where's our weak link? I don't suppose things get
| |
| 248 t1 = vld4q_u8((const uint8_t*) (table + 16)); // RGBA | |
| 249 SkTSwap(t0.val[1], t1.val[0]); // rRba, gGBA | |
| 250 SkTSwap(t0.val[3], t1.val[2]); // rRbB, gGaA | |
| 251 tr = *(((uint8x8x4_t*) &t0) + 0); // rR | |
| 252 tb = *(((uint8x8x4_t*) &t0) + 1); // bB | |
| 253 tg = *(((uint8x8x4_t*) &t1) + 0); // gG | |
| 254 ta = *(((uint8x8x4_t*) &t1) + 1); // aA | |
| 255 | |
| 256 // Use VTBL, then OR the results together. | |
| 257 rgba.val[0] = vorrq_u8(rgba.val[0], | |
| 258 vcombine_u8(vtbl4_u8(tr, *(((uint8x8_t*) &ind ices) + 0)), | |
| 259 vtbl4_u8(tr, *(((uint8x8_t*) &ind ices) + 1)))); | |
| 260 rgba.val[1] = vorrq_u8(rgba.val[1], | |
| 261 vcombine_u8(vtbl4_u8(tg, *(((uint8x8_t*) &ind ices) + 0)), | |
| 262 vtbl4_u8(tg, *(((uint8x8_t*) &ind ices) + 1)))); | |
| 263 rgba.val[2] = vorrq_u8(rgba.val[2], | |
| 264 vcombine_u8(vtbl4_u8(tb, *(((uint8x8_t*) &ind ices) + 0)), | |
| 265 vtbl4_u8(tb, *(((uint8x8_t*) &ind ices) + 1)))); | |
| 266 rgba.val[3] = vorrq_u8(rgba.val[3], | |
| 267 vcombine_u8(vtbl4_u8(ta, *(((uint8x8_t*) &ind ices) + 0)), | |
| 268 vtbl4_u8(ta, *(((uint8x8_t*) &ind ices) + 1)))); | |
| 269 | |
| 270 // Move the next set of indices into the range of the color table. Indices | |
| 271 // that are currently in range should go out of range due to underfl ow. | |
| 272 indices = vsubq_u8(indices, vdupq_n_u8(32)); | |
| 273 table += numColorsPerLoop; | |
| 274 } | |
| 275 | |
| 276 // Store output pixel values. | |
| 277 vst4q_u8((uint8_t*) dst, rgba); | |
| 278 | |
| 279 src += 16; | |
| 280 dst += 16; | |
| 281 dstWidth -= 16; | |
| 282 } | |
| 283 | |
| 284 for (int x = 0; x < dstWidth; x++) { | |
| 285 SkPMColor c = ctable[*src]; | |
| 286 dst[x] = c; | |
| 287 src += deltaSrc; | |
| 288 } | |
| 289 | |
| 290 | |
| 291 #else | |
| 224 src += offset; | 292 src += offset; |
| 225 SkPMColor* SK_RESTRICT dst = (SkPMColor*)dstRow; | 293 SkPMColor* SK_RESTRICT dst = (SkPMColor*)dstRow; |
| 226 for (int x = 0; x < dstWidth; x++) { | 294 for (int x = 0; x < dstWidth; x++) { |
| 227 SkPMColor c = ctable[*src]; | 295 SkPMColor c = ctable[*src]; |
| 228 dst[x] = c; | 296 dst[x] = c; |
| 229 src += deltaSrc; | 297 src += deltaSrc; |
| 230 } | 298 } |
| 299 #endif | |
| 231 } | 300 } |
| 232 | 301 |
| 233 static void swizzle_index_to_n32_skipZ( | 302 static void swizzle_index_to_n32_skipZ( |
| 234 void* SK_RESTRICT dstRow, const uint8_t* SK_RESTRICT src, int dstWidth, | 303 void* SK_RESTRICT dstRow, const uint8_t* SK_RESTRICT src, int dstWidth, |
| 235 int bpp, int deltaSrc, int offset, const SkPMColor ctable[]) { | 304 int bpp, int deltaSrc, int offset, const SkPMColor ctable[]) { |
| 236 | 305 |
| 237 src += offset; | 306 src += offset; |
| 238 SkPMColor* SK_RESTRICT dst = (SkPMColor*)dstRow; | 307 SkPMColor* SK_RESTRICT dst = (SkPMColor*)dstRow; |
| 239 for (int x = 0; x < dstWidth; x++) { | 308 for (int x = 0; x < dstWidth; x++) { |
| 240 SkPMColor c = ctable[*src]; | 309 SkPMColor c = ctable[*src]; |
| (...skipping 573 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 814 } | 883 } |
| 815 | 884 |
| 816 return fAllocatedWidth; | 885 return fAllocatedWidth; |
| 817 } | 886 } |
| 818 | 887 |
| 819 void SkSwizzler::swizzle(void* dst, const uint8_t* SK_RESTRICT src) { | 888 void SkSwizzler::swizzle(void* dst, const uint8_t* SK_RESTRICT src) { |
| 820 SkASSERT(nullptr != dst && nullptr != src); | 889 SkASSERT(nullptr != dst && nullptr != src); |
| 821 fActualProc(SkTAddOffset<void>(dst, fDstOffsetBytes), src, fSwizzleWidth, fS rcBPP, | 890 fActualProc(SkTAddOffset<void>(dst, fDstOffsetBytes), src, fSwizzleWidth, fS rcBPP, |
| 822 fSampleX * fSrcBPP, fSrcOffsetUnits, fColorTable); | 891 fSampleX * fSrcBPP, fSrcOffsetUnits, fColorTable); |
| 823 } | 892 } |
| OLD | NEW |