Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 /* | 1 /* |
| 2 * Copyright 2012 The Android Open Source Project | 2 * Copyright 2012 The Android Open Source Project |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #include "SkBlitRow_opts_arm_neon.h" | 8 #include "SkBlitRow_opts_arm_neon.h" |
| 9 | 9 |
| 10 #include "SkBlitMask.h" | 10 #include "SkBlitMask.h" |
| 11 #include "SkBlitRow.h" | 11 #include "SkBlitRow.h" |
| 12 #include "SkColorPriv.h" | 12 #include "SkColorPriv.h" |
| 13 #include "SkDither.h" | 13 #include "SkDither.h" |
| 14 #include "SkMathPriv.h" | 14 #include "SkMathPriv.h" |
| 15 #include "SkUtils.h" | 15 #include "SkUtils.h" |
| 16 | 16 |
| 17 #include "SkColor_opts_neon.h" | 17 #include "SkColor_opts_neon.h" |
| 18 #include <arm_neon.h> | 18 #include <arm_neon.h> |
| 19 | 19 |
| 20 #ifdef SK_CPU_ARM32 | |
| 21 void S32_D565_Opaque_neon(uint16_t* SK_RESTRICT dst, | 20 void S32_D565_Opaque_neon(uint16_t* SK_RESTRICT dst, |
| 22 const SkPMColor* SK_RESTRICT src, int count, | 21 const SkPMColor* SK_RESTRICT src, int count, |
| 23 U8CPU alpha, int /*x*/, int /*y*/) { | 22 U8CPU alpha, int /*x*/, int /*y*/) { |
| 24 SkASSERT(255 == alpha); | 23 SkASSERT(255 == alpha); |
| 25 | 24 |
| 26 while (count >= 8) { | 25 while (count >= 8) { |
| 27 uint8x8x4_t vsrc; | 26 uint8x8x4_t vsrc; |
| 28 uint16x8_t vdst; | 27 uint16x8_t vdst; |
| 29 | 28 |
| 30 // Load | 29 // Load |
| 30 #ifdef SK_CPU_ARM64 | |
| 31 uint8x8_t vsrc_0, vsrc_1, vsrc_2; | |
| 32 asm ( | |
| 33 "ld4 {v0.8b - v3.8b}, [%[src]] \t\n" | |
|
mtklein
2014/06/05 16:28:36
Looks like we've got the same load-32-bytes thing
kevin.petit
2014/06/05 17:00:02
There are actually a few more differences. Some us
| |
| 34 "mov %[vsrc0].8b, v0.8b \t\n" | |
| 35 "mov %[vsrc1].8b, v1.8b \t\n" | |
| 36 "mov %[vsrc2].8b, v2.8b \t\n" | |
| 37 : [vsrc0] "=w" (vsrc_0), [vsrc1] "=w" (vsrc_1), | |
| 38 [vsrc2] "=w" (vsrc_2) | |
| 39 : [src] "r" (src) | |
| 40 : "v0", "v1", "v2", "v3" | |
| 41 ); | |
| 42 vsrc.val[0] = vsrc_0; | |
| 43 vsrc.val[1] = vsrc_1; | |
| 44 vsrc.val[2] = vsrc_2; | |
| 45 | |
| 46 #else | |
| 31 vsrc = vld4_u8((uint8_t*)src); | 47 vsrc = vld4_u8((uint8_t*)src); |
| 48 #endif | |
| 32 | 49 |
| 33 // Convert src to 565 | 50 // Convert src to 565 |
| 34 vdst = SkPixel32ToPixel16_neon8(vsrc); | 51 vdst = SkPixel32ToPixel16_neon8(vsrc); |
| 35 | 52 |
| 36 // Store | 53 // Store |
| 37 vst1q_u16(dst, vdst); | 54 vst1q_u16(dst, vdst); |
| 38 | 55 |
| 39 // Prepare next iteration | 56 // Prepare next iteration |
| 40 dst += 8; | 57 dst += 8; |
| 41 src += 8; | 58 src += 8; |
| (...skipping 15 matching lines...) Expand all Loading... | |
| 57 U8CPU alpha, int /*x*/, int /*y*/) { | 74 U8CPU alpha, int /*x*/, int /*y*/) { |
| 58 SkASSERT(255 > alpha); | 75 SkASSERT(255 > alpha); |
| 59 | 76 |
| 60 uint16x8_t vmask_blue, vscale; | 77 uint16x8_t vmask_blue, vscale; |
| 61 | 78 |
| 62 // prepare constants | 79 // prepare constants |
| 63 vscale = vdupq_n_u16(SkAlpha255To256(alpha)); | 80 vscale = vdupq_n_u16(SkAlpha255To256(alpha)); |
| 64 vmask_blue = vmovq_n_u16(0x1F); | 81 vmask_blue = vmovq_n_u16(0x1F); |
| 65 | 82 |
| 66 while (count >= 8) { | 83 while (count >= 8) { |
| 84 uint8x8x4_t vsrc; | |
| 67 uint16x8_t vdst, vdst_r, vdst_g, vdst_b; | 85 uint16x8_t vdst, vdst_r, vdst_g, vdst_b; |
| 68 uint16x8_t vres_r, vres_g, vres_b; | 86 uint16x8_t vres_r, vres_g, vres_b; |
| 69 uint8x8_t vsrc_r, vsrc_g, vsrc_b; | |
| 70 | 87 |
| 71 // Load src | 88 // Load src |
| 89 #ifdef SK_CPU_ARM64 | |
| 90 uint8x8_t vsrc_0, vsrc_1, vsrc_2; | |
| 91 asm ( | |
| 92 "ld4 {v0.8b - v3.8b}, [%[src]] \t\n" | |
| 93 "mov %[vsrc0].8b, v0.8b \t\n" | |
| 94 "mov %[vsrc1].8b, v1.8b \t\n" | |
| 95 "mov %[vsrc2].8b, v2.8b \t\n" | |
| 96 : [vsrc0] "=w" (vsrc_0), [vsrc1] "=w" (vsrc_1), | |
| 97 [vsrc2] "=w" (vsrc_2) | |
| 98 : [src] "r" (src) | |
| 99 : "v0", "v1", "v2", "v3" | |
| 100 ); | |
| 101 vsrc.val[0] = vsrc_0; | |
| 102 vsrc.val[1] = vsrc_1; | |
| 103 vsrc.val[2] = vsrc_2; | |
| 104 src += 8; | |
| 105 #else | |
| 72 { | 106 { |
| 73 register uint8x8_t d0 asm("d0"); | 107 register uint8x8_t d0 asm("d0"); |
| 74 register uint8x8_t d1 asm("d1"); | 108 register uint8x8_t d1 asm("d1"); |
| 75 register uint8x8_t d2 asm("d2"); | 109 register uint8x8_t d2 asm("d2"); |
| 76 register uint8x8_t d3 asm("d3"); | 110 register uint8x8_t d3 asm("d3"); |
| 77 | 111 |
| 78 asm ( | 112 asm ( |
| 79 "vld4.8 {d0-d3},[%[src]]!" | 113 "vld4.8 {d0-d3},[%[src]]!" |
| 80 : "=w" (d0), "=w" (d1), "=w" (d2), "=w" (d3), [src] "+&r" (src) | 114 : "=w" (d0), "=w" (d1), "=w" (d2), "=w" (d3), [src] "+&r" (src) |
| 81 : | 115 : |
| 82 ); | 116 ); |
| 83 vsrc_g = d1; | 117 vsrc.val[0] = d0; |
| 84 #if SK_PMCOLOR_BYTE_ORDER(B,G,R,A) | 118 vsrc.val[1] = d1; |
| 85 vsrc_r = d2; vsrc_b = d0; | 119 vsrc.val[2] = d2; |
| 86 #elif SK_PMCOLOR_BYTE_ORDER(R,G,B,A) | 120 } |
| 87 vsrc_r = d0; vsrc_b = d2; | |
| 88 #endif | 121 #endif |
| 89 } | |
| 90 | 122 |
| 91 // Load and unpack dst | 123 // Load and unpack dst |
| 92 vdst = vld1q_u16(dst); | 124 vdst = vld1q_u16(dst); |
| 93 vdst_g = vshlq_n_u16(vdst, 5); // shift green to top of lanes | 125 vdst_g = vshlq_n_u16(vdst, 5); // shift green to top of lanes |
| 94 vdst_b = vandq_u16(vdst, vmask_blue); // extract blue | 126 vdst_b = vandq_u16(vdst, vmask_blue); // extract blue |
| 95 vdst_r = vshrq_n_u16(vdst, 6+5); // extract red | 127 vdst_r = vshrq_n_u16(vdst, 6+5); // extract red |
| 96 vdst_g = vshrq_n_u16(vdst_g, 5+5); // extract green | 128 vdst_g = vshrq_n_u16(vdst_g, 5+5); // extract green |
| 97 | 129 |
| 98 // Shift src to 565 | 130 // Shift src to 565 range |
| 99 vsrc_r = vshr_n_u8(vsrc_r, 3); // shift red to 565 range | 131 vsrc.val[NEON_R] = vshr_n_u8(vsrc.val[NEON_R], 3); |
| 100 vsrc_g = vshr_n_u8(vsrc_g, 2); // shift green to 565 range | 132 vsrc.val[NEON_G] = vshr_n_u8(vsrc.val[NEON_G], 2); |
| 101 vsrc_b = vshr_n_u8(vsrc_b, 3); // shift blue to 565 range | 133 vsrc.val[NEON_B] = vshr_n_u8(vsrc.val[NEON_B], 3); |
| 102 | 134 |
| 103 // Scale src - dst | 135 // Scale src - dst |
| 104 vres_r = vmovl_u8(vsrc_r) - vdst_r; | 136 vres_r = vmovl_u8(vsrc.val[NEON_R]) - vdst_r; |
| 105 vres_g = vmovl_u8(vsrc_g) - vdst_g; | 137 vres_g = vmovl_u8(vsrc.val[NEON_G]) - vdst_g; |
| 106 vres_b = vmovl_u8(vsrc_b) - vdst_b; | 138 vres_b = vmovl_u8(vsrc.val[NEON_B]) - vdst_b; |
| 107 | 139 |
| 108 vres_r = vshrq_n_u16(vres_r * vscale, 8); | 140 vres_r = vshrq_n_u16(vres_r * vscale, 8); |
| 109 vres_g = vshrq_n_u16(vres_g * vscale, 8); | 141 vres_g = vshrq_n_u16(vres_g * vscale, 8); |
| 110 vres_b = vshrq_n_u16(vres_b * vscale, 8); | 142 vres_b = vshrq_n_u16(vres_b * vscale, 8); |
| 111 | 143 |
| 112 vres_r += vdst_r; | 144 vres_r += vdst_r; |
| 113 vres_g += vdst_g; | 145 vres_g += vdst_g; |
| 114 vres_b += vdst_b; | 146 vres_b += vdst_b; |
| 115 | 147 |
| 116 // Combine | 148 // Combine |
| (...skipping 12 matching lines...) Expand all Loading... | |
| 129 SkPMColorAssert(c); | 161 SkPMColorAssert(c); |
| 130 uint16_t d = *dst; | 162 uint16_t d = *dst; |
| 131 *dst++ = SkPackRGB16( | 163 *dst++ = SkPackRGB16( |
| 132 SkAlphaBlend(SkPacked32ToR16(c), SkGetPackedR16(d), scale), | 164 SkAlphaBlend(SkPacked32ToR16(c), SkGetPackedR16(d), scale), |
| 133 SkAlphaBlend(SkPacked32ToG16(c), SkGetPackedG16(d), scale), | 165 SkAlphaBlend(SkPacked32ToG16(c), SkGetPackedG16(d), scale), |
| 134 SkAlphaBlend(SkPacked32ToB16(c), SkGetPackedB16(d), scale)); | 166 SkAlphaBlend(SkPacked32ToB16(c), SkGetPackedB16(d), scale)); |
| 135 } while (--count != 0); | 167 } while (--count != 0); |
| 136 } | 168 } |
| 137 } | 169 } |
| 138 | 170 |
| 171 #ifdef SK_CPU_ARM32 | |
| 139 void S32A_D565_Opaque_neon(uint16_t* SK_RESTRICT dst, | 172 void S32A_D565_Opaque_neon(uint16_t* SK_RESTRICT dst, |
| 140 const SkPMColor* SK_RESTRICT src, int count, | 173 const SkPMColor* SK_RESTRICT src, int count, |
| 141 U8CPU alpha, int /*x*/, int /*y*/) { | 174 U8CPU alpha, int /*x*/, int /*y*/) { |
| 142 SkASSERT(255 == alpha); | 175 SkASSERT(255 == alpha); |
| 143 | 176 |
| 144 if (count >= 8) { | 177 if (count >= 8) { |
| 145 uint16_t* SK_RESTRICT keep_dst = 0; | 178 uint16_t* SK_RESTRICT keep_dst = 0; |
| 146 | 179 |
| 147 asm volatile ( | 180 asm volatile ( |
| 148 "ands ip, %[count], #7 \n\t" | 181 "ands ip, %[count], #7 \n\t" |
| (...skipping 157 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 306 | 339 |
| 307 "21: \n\t" | 340 "21: \n\t" |
| 308 : [count] "+r" (count) | 341 : [count] "+r" (count) |
| 309 : [dst] "r" (dst), [keep_dst] "r" (keep_dst), [src] "r" (s rc) | 342 : [dst] "r" (dst), [keep_dst] "r" (keep_dst), [src] "r" (s rc) |
| 310 : "ip", "cc", "memory", "d0","d1","d2","d3","d4","d5","d6" ,"d7", | 343 : "ip", "cc", "memory", "d0","d1","d2","d3","d4","d5","d6" ,"d7", |
| 311 "d16","d17","d18","d19","d20","d21","d22","d23","d24","d25 ","d26","d27","d28","d29", | 344 "d16","d17","d18","d19","d20","d21","d22","d23","d24","d25 ","d26","d27","d28","d29", |
| 312 "d30","d31" | 345 "d30","d31" |
| 313 ); | 346 ); |
| 314 } | 347 } |
| 315 } | 348 } |
| 349 #endif | |
| 316 | 350 |
| 317 static inline uint16x8_t SkDiv255Round_neon8(uint16x8_t prod) { | 351 static inline uint16x8_t SkDiv255Round_neon8(uint16x8_t prod) { |
| 318 prod += vdupq_n_u16(128); | 352 prod += vdupq_n_u16(128); |
| 319 prod += vshrq_n_u16(prod, 8); | 353 prod += vshrq_n_u16(prod, 8); |
| 320 return vshrq_n_u16(prod, 8); | 354 return vshrq_n_u16(prod, 8); |
| 321 } | 355 } |
| 322 | 356 |
| 323 void S32A_D565_Blend_neon(uint16_t* SK_RESTRICT dst, | 357 void S32A_D565_Blend_neon(uint16_t* SK_RESTRICT dst, |
| 324 const SkPMColor* SK_RESTRICT src, int count, | 358 const SkPMColor* SK_RESTRICT src, int count, |
| 325 U8CPU alpha, int /*x*/, int /*y*/) { | 359 U8CPU alpha, int /*x*/, int /*y*/) { |
| (...skipping 13 matching lines...) Expand all Loading... | |
| 339 valpha = vdup_n_u8(alpha); | 373 valpha = vdup_n_u8(alpha); |
| 340 vmask_blue = vmovq_n_u16(SK_B16_MASK); | 374 vmask_blue = vmovq_n_u16(SK_B16_MASK); |
| 341 | 375 |
| 342 do { | 376 do { |
| 343 uint16x8_t vdst, vdst_r, vdst_g, vdst_b; | 377 uint16x8_t vdst, vdst_r, vdst_g, vdst_b; |
| 344 uint16x8_t vres_a, vres_r, vres_g, vres_b; | 378 uint16x8_t vres_a, vres_r, vres_g, vres_b; |
| 345 uint8x8x4_t vsrc; | 379 uint8x8x4_t vsrc; |
| 346 | 380 |
| 347 // load pixels | 381 // load pixels |
| 348 vdst = vld1q_u16(dst); | 382 vdst = vld1q_u16(dst); |
| 383 #ifdef SK_CPU_ARM64 | |
| 384 uint8x8_t vsrc_0, vsrc_1, vsrc_2, vsrc_3; | |
| 385 asm ( | |
| 386 "ld4 {v0.8b - v3.8b}, [%[src]] \t\n" | |
| 387 "mov %[vsrc0].8b, v0.8b \t\n" | |
| 388 "mov %[vsrc1].8b, v1.8b \t\n" | |
| 389 "mov %[vsrc2].8b, v2.8b \t\n" | |
| 390 "mov %[vsrc3].8b, v3.8b \t\n" | |
| 391 : [vsrc0] "=w" (vsrc_0), [vsrc1] "=w" (vsrc_1), | |
| 392 [vsrc2] "=w" (vsrc_2), [vsrc3] "=w" (vsrc_3) | |
| 393 : [src] "r" (src) | |
| 394 : "v0", "v1", "v2", "v3" | |
| 395 ); | |
| 396 src += 8; | |
| 397 vsrc.val[0] = vsrc_0; | |
| 398 vsrc.val[1] = vsrc_1; | |
| 399 vsrc.val[2] = vsrc_2; | |
| 400 vsrc.val[3] = vsrc_3; | |
| 401 #else | |
| 349 #if (__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6)) | 402 #if (__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6)) |
| 350 asm ( | 403 asm ( |
| 351 "vld4.u8 %h[vsrc], [%[src]]!" | 404 "vld4.u8 %h[vsrc], [%[src]]!" |
| 352 : [vsrc] "=w" (vsrc), [src] "+&r" (src) | 405 : [vsrc] "=w" (vsrc), [src] "+&r" (src) |
| 353 : : | 406 : : |
| 354 ); | 407 ); |
| 355 #else | 408 #else |
| 356 register uint8x8_t d0 asm("d0"); | 409 register uint8x8_t d0 asm("d0"); |
| 357 register uint8x8_t d1 asm("d1"); | 410 register uint8x8_t d1 asm("d1"); |
| 358 register uint8x8_t d2 asm("d2"); | 411 register uint8x8_t d2 asm("d2"); |
| 359 register uint8x8_t d3 asm("d3"); | 412 register uint8x8_t d3 asm("d3"); |
| 360 | 413 |
| 361 asm volatile ( | 414 asm volatile ( |
| 362 "vld4.u8 {d0-d3},[%[src]]!;" | 415 "vld4.u8 {d0-d3},[%[src]]!;" |
| 363 : "=w" (d0), "=w" (d1), "=w" (d2), "=w" (d3), | 416 : "=w" (d0), "=w" (d1), "=w" (d2), "=w" (d3), |
| 364 [src] "+&r" (src) | 417 [src] "+&r" (src) |
| 365 : : | 418 : : |
| 366 ); | 419 ); |
| 367 vsrc.val[0] = d0; | 420 vsrc.val[0] = d0; |
| 368 vsrc.val[1] = d1; | 421 vsrc.val[1] = d1; |
| 369 vsrc.val[2] = d2; | 422 vsrc.val[2] = d2; |
| 370 vsrc.val[3] = d3; | 423 vsrc.val[3] = d3; |
| 371 #endif | 424 #endif |
| 425 #endif // #ifdef SK_CPU_ARM64 | |
| 372 | 426 |
| 373 | 427 |
| 374 // deinterleave dst | 428 // deinterleave dst |
| 375 vdst_g = vshlq_n_u16(vdst, SK_R16_BITS); // shift green to to p of lanes | 429 vdst_g = vshlq_n_u16(vdst, SK_R16_BITS); // shift green to to p of lanes |
| 376 vdst_b = vdst & vmask_blue; // extract blue | 430 vdst_b = vdst & vmask_blue; // extract blue |
| 377 vdst_r = vshrq_n_u16(vdst, SK_R16_SHIFT); // extract red | 431 vdst_r = vshrq_n_u16(vdst, SK_R16_SHIFT); // extract red |
| 378 vdst_g = vshrq_n_u16(vdst_g, SK_R16_BITS + SK_B16_BITS); // extract green | 432 vdst_g = vshrq_n_u16(vdst_g, SK_R16_BITS + SK_B16_BITS); // extract green |
| 379 | 433 |
| 380 // shift src to 565 | 434 // shift src to 565 |
| 381 vsrc.val[NEON_R] = vshr_n_u8(vsrc.val[NEON_R], 8 - SK_R16_BITS); | 435 vsrc.val[NEON_R] = vshr_n_u8(vsrc.val[NEON_R], 8 - SK_R16_BITS); |
| (...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 461 const uint8_t *dstart = &gDitherMatrix_Neon[(y&3)*12 + (x&3)]; | 515 const uint8_t *dstart = &gDitherMatrix_Neon[(y&3)*12 + (x&3)]; |
| 462 | 516 |
| 463 uint8x8_t vdither = vld1_u8(dstart); // load dither values | 517 uint8x8_t vdither = vld1_u8(dstart); // load dither values |
| 464 uint8x8_t vdither_g = vshr_n_u8(vdither, 1); // calc. green dither value s | 518 uint8x8_t vdither_g = vshr_n_u8(vdither, 1); // calc. green dither value s |
| 465 | 519 |
| 466 int16x8_t vscale = vdupq_n_s16(scale); // duplicate scale into ne on reg | 520 int16x8_t vscale = vdupq_n_s16(scale); // duplicate scale into ne on reg |
| 467 uint16x8_t vmask_b = vdupq_n_u16(0x1F); // set up blue mask | 521 uint16x8_t vmask_b = vdupq_n_u16(0x1F); // set up blue mask |
| 468 | 522 |
| 469 do { | 523 do { |
| 470 | 524 |
| 525 uint8x8x4_t vsrc; | |
| 471 uint8x8_t vsrc_r, vsrc_g, vsrc_b; | 526 uint8x8_t vsrc_r, vsrc_g, vsrc_b; |
| 472 uint8x8_t vsrc565_r, vsrc565_g, vsrc565_b; | 527 uint8x8_t vsrc565_r, vsrc565_g, vsrc565_b; |
| 473 uint16x8_t vsrc_dit_r, vsrc_dit_g, vsrc_dit_b; | 528 uint16x8_t vsrc_dit_r, vsrc_dit_g, vsrc_dit_b; |
| 474 uint16x8_t vsrc_res_r, vsrc_res_g, vsrc_res_b; | 529 uint16x8_t vsrc_res_r, vsrc_res_g, vsrc_res_b; |
| 475 uint16x8_t vdst; | 530 uint16x8_t vdst; |
| 476 uint16x8_t vdst_r, vdst_g, vdst_b; | 531 uint16x8_t vdst_r, vdst_g, vdst_b; |
| 477 int16x8_t vres_r, vres_g, vres_b; | 532 int16x8_t vres_r, vres_g, vres_b; |
| 478 int8x8_t vres8_r, vres8_g, vres8_b; | 533 int8x8_t vres8_r, vres8_g, vres8_b; |
| 479 | 534 |
| 480 // Load source and add dither | 535 // Load source and add dither |
| 536 #ifdef SK_CPU_ARM64 | |
| 537 uint8x8_t vsrc_0, vsrc_1, vsrc_2; | |
| 538 asm ( | |
| 539 "ld4 {v0.8b - v3.8b}, [%[src]] \t\n" | |
| 540 "mov %[vsrc0].8b, v0.8b \t\n" | |
| 541 "mov %[vsrc1].8b, v1.8b \t\n" | |
| 542 "mov %[vsrc2].8b, v2.8b \t\n" | |
| 543 : [vsrc0] "=w" (vsrc_0), [vsrc1] "=w" (vsrc_1), | |
| 544 [vsrc2] "=w" (vsrc_2) | |
| 545 : [src] "r" (src) | |
| 546 : "v0", "v1", "v2", "v3" | |
| 547 ); | |
| 548 | |
| 549 src += 8; | |
| 550 | |
| 551 vsrc.val[0] = vsrc_0; | |
| 552 vsrc.val[1] = vsrc_1; | |
| 553 vsrc.val[2] = vsrc_2; | |
| 554 #else | |
| 481 { | 555 { |
| 482 register uint8x8_t d0 asm("d0"); | 556 register uint8x8_t d0 asm("d0"); |
| 483 register uint8x8_t d1 asm("d1"); | 557 register uint8x8_t d1 asm("d1"); |
| 484 register uint8x8_t d2 asm("d2"); | 558 register uint8x8_t d2 asm("d2"); |
| 485 register uint8x8_t d3 asm("d3"); | 559 register uint8x8_t d3 asm("d3"); |
| 486 | 560 |
| 487 asm ( | 561 asm ( |
| 488 "vld4.8 {d0-d3},[%[src]]! /* r=%P0 g=%P1 b=%P2 a=%P3 */" | 562 "vld4.8 {d0-d3},[%[src]]! /* r=%P0 g=%P1 b=%P2 a=%P3 */" |
| 489 : "=w" (d0), "=w" (d1), "=w" (d2), "=w" (d3), [src] "+&r" (src) | 563 : "=w" (d0), "=w" (d1), "=w" (d2), "=w" (d3), [src] "+&r" (src) |
| 490 : | 564 : |
| 491 ); | 565 ); |
| 492 vsrc_g = d1; | 566 vsrc.val[0] = d0; |
| 493 #if SK_PMCOLOR_BYTE_ORDER(B,G,R,A) | 567 vsrc.val[1] = d1; |
| 494 vsrc_r = d2; vsrc_b = d0; | 568 vsrc.val[2] = d2; |
| 495 #elif SK_PMCOLOR_BYTE_ORDER(R,G,B,A) | 569 } |
| 496 vsrc_r = d0; vsrc_b = d2; | |
| 497 #endif | 570 #endif |
| 498 } | 571 vsrc_r = vsrc.val[NEON_R]; |
| 572 vsrc_g = vsrc.val[NEON_G]; | |
| 573 vsrc_b = vsrc.val[NEON_B]; | |
| 499 | 574 |
| 500 vsrc565_g = vshr_n_u8(vsrc_g, 6); // calc. green >> 6 | 575 vsrc565_g = vshr_n_u8(vsrc_g, 6); // calc. green >> 6 |
| 501 vsrc565_r = vshr_n_u8(vsrc_r, 5); // calc. red >> 5 | 576 vsrc565_r = vshr_n_u8(vsrc_r, 5); // calc. red >> 5 |
| 502 vsrc565_b = vshr_n_u8(vsrc_b, 5); // calc. blue >> 5 | 577 vsrc565_b = vshr_n_u8(vsrc_b, 5); // calc. blue >> 5 |
| 503 | 578 |
| 504 vsrc_dit_g = vaddl_u8(vsrc_g, vdither_g); // add in dither to green and widen | 579 vsrc_dit_g = vaddl_u8(vsrc_g, vdither_g); // add in dither to green and widen |
| 505 vsrc_dit_r = vaddl_u8(vsrc_r, vdither); // add in dither to red an d widen | 580 vsrc_dit_r = vaddl_u8(vsrc_r, vdither); // add in dither to red an d widen |
| 506 vsrc_dit_b = vaddl_u8(vsrc_b, vdither); // add in dither to blue a nd widen | 581 vsrc_dit_b = vaddl_u8(vsrc_b, vdither); // add in dither to blue a nd widen |
| 507 | 582 |
| 508 vsrc_dit_r = vsubw_u8(vsrc_dit_r, vsrc565_r); // sub shifted red fr om result | 583 vsrc_dit_r = vsubw_u8(vsrc_dit_r, vsrc565_r); // sub shifted red fr om result |
| (...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 569 sb = SkDITHER_B32To565(sb, dither); | 644 sb = SkDITHER_B32To565(sb, dither); |
| 570 | 645 |
| 571 uint16_t d = *dst; | 646 uint16_t d = *dst; |
| 572 *dst++ = SkPackRGB16(SkAlphaBlend(sr, SkGetPackedR16(d), scale), | 647 *dst++ = SkPackRGB16(SkAlphaBlend(sr, SkGetPackedR16(d), scale), |
| 573 SkAlphaBlend(sg, SkGetPackedG16(d), scale), | 648 SkAlphaBlend(sg, SkGetPackedG16(d), scale), |
| 574 SkAlphaBlend(sb, SkGetPackedB16(d), scale)); | 649 SkAlphaBlend(sb, SkGetPackedB16(d), scale)); |
| 575 DITHER_INC_X(x); | 650 DITHER_INC_X(x); |
| 576 } while (--count != 0); | 651 } while (--count != 0); |
| 577 } | 652 } |
| 578 } | 653 } |
| 579 #endif | |
| 580 | 654 |
| 581 void S32A_Opaque_BlitRow32_neon(SkPMColor* SK_RESTRICT dst, | 655 void S32A_Opaque_BlitRow32_neon(SkPMColor* SK_RESTRICT dst, |
| 582 const SkPMColor* SK_RESTRICT src, | 656 const SkPMColor* SK_RESTRICT src, |
| 583 int count, U8CPU alpha) { | 657 int count, U8CPU alpha) { |
| 584 | 658 |
| 585 SkASSERT(255 == alpha); | 659 SkASSERT(255 == alpha); |
| 586 if (count > 0) { | 660 if (count > 0) { |
| 587 | 661 |
| 588 | 662 |
| 589 uint8x8_t alpha_mask; | 663 uint8x8_t alpha_mask; |
| (...skipping 450 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1040 uint16_t *pc = (uint16_t*) p; | 1114 uint16_t *pc = (uint16_t*) p; |
| 1041 sprintf(buf,"%8s:", str); | 1115 sprintf(buf,"%8s:", str); |
| 1042 len = (len / sizeof(uint16_t)); /* passed as bytes */ | 1116 len = (len / sizeof(uint16_t)); /* passed as bytes */ |
| 1043 for(i=0;i<len;i++) { | 1117 for(i=0;i<len;i++) { |
| 1044 sprintf(tbuf, " %04x", pc[i]); | 1118 sprintf(tbuf, " %04x", pc[i]); |
| 1045 strcat(buf, tbuf); | 1119 strcat(buf, tbuf); |
| 1046 } | 1120 } |
| 1047 SkDebugf("%s\n", buf); | 1121 SkDebugf("%s\n", buf); |
| 1048 } | 1122 } |
| 1049 #endif | 1123 #endif |
| 1124 #endif // #ifdef SK_CPU_ARM32 | |
| 1050 | 1125 |
| 1051 void S32A_D565_Opaque_Dither_neon (uint16_t * SK_RESTRICT dst, | 1126 void S32A_D565_Opaque_Dither_neon (uint16_t * SK_RESTRICT dst, |
| 1052 const SkPMColor* SK_RESTRICT src, | 1127 const SkPMColor* SK_RESTRICT src, |
| 1053 int count, U8CPU alpha, int x, int y) { | 1128 int count, U8CPU alpha, int x, int y) { |
| 1054 SkASSERT(255 == alpha); | 1129 SkASSERT(255 == alpha); |
| 1055 | 1130 |
| 1056 #define UNROLL 8 | 1131 #define UNROLL 8 |
| 1057 | 1132 |
| 1058 if (count >= UNROLL) { | 1133 if (count >= UNROLL) { |
| 1059 | 1134 |
| 1060 #if defined(DEBUG_OPAQUE_DITHER) | 1135 #if defined(DEBUG_OPAQUE_DITHER) |
| 1061 uint16_t tmpbuf[UNROLL]; | 1136 uint16_t tmpbuf[UNROLL]; |
| 1062 int td[UNROLL]; | 1137 int td[UNROLL]; |
| 1063 int tdv[UNROLL]; | 1138 int tdv[UNROLL]; |
| 1064 int ta[UNROLL]; | 1139 int ta[UNROLL]; |
| 1065 int tap[UNROLL]; | 1140 int tap[UNROLL]; |
| 1066 uint16_t in_dst[UNROLL]; | 1141 uint16_t in_dst[UNROLL]; |
| 1067 int offset = 0; | 1142 int offset = 0; |
| 1068 int noisy = 0; | 1143 int noisy = 0; |
| 1069 #endif | 1144 #endif |
| 1070 | 1145 |
| 1071 uint8x8_t dbase; | 1146 uint8x8_t dbase; |
| 1072 const uint8_t *dstart = &gDitherMatrix_Neon[(y&3)*12 + (x&3)]; | 1147 const uint8_t *dstart = &gDitherMatrix_Neon[(y&3)*12 + (x&3)]; |
| 1073 dbase = vld1_u8(dstart); | 1148 dbase = vld1_u8(dstart); |
| 1074 | 1149 |
| 1075 do { | 1150 do { |
| 1151 uint8x8x4_t vsrc; | |
| 1076 uint8x8_t sr, sg, sb, sa, d; | 1152 uint8x8_t sr, sg, sb, sa, d; |
| 1077 uint16x8_t dst8, scale8, alpha8; | 1153 uint16x8_t dst8, scale8, alpha8; |
| 1078 uint16x8_t dst_r, dst_g, dst_b; | 1154 uint16x8_t dst_r, dst_g, dst_b; |
| 1079 | 1155 |
| 1080 #if defined(DEBUG_OPAQUE_DITHER) | 1156 #if defined(DEBUG_OPAQUE_DITHER) |
| 1081 // calculate 8 elements worth into a temp buffer | 1157 // calculate 8 elements worth into a temp buffer |
| 1082 { | 1158 { |
| 1083 int my_y = y; | 1159 int my_y = y; |
| 1084 int my_x = x; | 1160 int my_x = x; |
| 1085 SkPMColor* my_src = (SkPMColor*)src; | 1161 SkPMColor* my_src = (SkPMColor*)src; |
| (...skipping 30 matching lines...) Expand all Loading... | |
| 1116 tmpbuf[i] = *my_dst; | 1192 tmpbuf[i] = *my_dst; |
| 1117 ta[i] = tdv[i] = td[i] = 0xbeef; | 1193 ta[i] = tdv[i] = td[i] = 0xbeef; |
| 1118 } | 1194 } |
| 1119 in_dst[i] = *my_dst; | 1195 in_dst[i] = *my_dst; |
| 1120 my_dst += 1; | 1196 my_dst += 1; |
| 1121 DITHER_INC_X(my_x); | 1197 DITHER_INC_X(my_x); |
| 1122 } | 1198 } |
| 1123 } | 1199 } |
| 1124 #endif | 1200 #endif |
| 1125 | 1201 |
| 1202 #ifdef SK_CPU_ARM64 | |
| 1203 uint8x8_t vsrc_0, vsrc_1, vsrc_2, vsrc_3; | |
| 1204 asm ( | |
| 1205 "ld4 {v0.8b - v3.8b}, [%[src]] \t\n" | |
| 1206 "mov %[vsrc0].8b, v0.8b \t\n" | |
| 1207 "mov %[vsrc1].8b, v1.8b \t\n" | |
| 1208 "mov %[vsrc2].8b, v2.8b \t\n" | |
| 1209 "mov %[vsrc3].8b, v3.8b \t\n" | |
| 1210 : [vsrc0] "=w" (vsrc_0), [vsrc1] "=w" (vsrc_1), | |
| 1211 [vsrc2] "=w" (vsrc_2), [vsrc3] "=w" (vsrc_3) | |
| 1212 : [src] "r" (src) | |
| 1213 : "v0", "v1", "v2", "v3" | |
| 1214 ); | |
| 1126 | 1215 |
| 1216 src += 8; | |
| 1217 vsrc.val[0] = vsrc_0; | |
| 1218 vsrc.val[1] = vsrc_1; | |
| 1219 vsrc.val[2] = vsrc_2; | |
| 1220 vsrc.val[3] = vsrc_3; | |
| 1221 #else | |
| 1127 { | 1222 { |
| 1128 register uint8x8_t d0 asm("d0"); | 1223 register uint8x8_t d0 asm("d0"); |
| 1129 register uint8x8_t d1 asm("d1"); | 1224 register uint8x8_t d1 asm("d1"); |
| 1130 register uint8x8_t d2 asm("d2"); | 1225 register uint8x8_t d2 asm("d2"); |
| 1131 register uint8x8_t d3 asm("d3"); | 1226 register uint8x8_t d3 asm("d3"); |
| 1132 | 1227 |
| 1133 asm ("vld4.8 {d0-d3},[%[src]]! /* r=%P0 g=%P1 b=%P2 a=%P3 */" | 1228 asm ("vld4.8 {d0-d3},[%[src]]! " |
| 1134 : "=w" (d0), "=w" (d1), "=w" (d2), "=w" (d3), [src] "+r" (src) | 1229 : "=w" (d0), "=w" (d1), "=w" (d2), "=w" (d3), [src] "+r" (src) |
| 1135 : | 1230 : |
| 1136 ); | 1231 ); |
| 1137 #if SK_PMCOLOR_BYTE_ORDER(B,G,R,A) | 1232 vsrc.val[0] = d0; |
| 1138 sr = d2; sg = d1; sb = d0; sa = d3; | 1233 vsrc.val[1] = d1; |
| 1139 #elif SK_PMCOLOR_BYTE_ORDER(R,G,B,A) | 1234 vsrc.val[2] = d2; |
| 1140 sr = d0; sg = d1; sb = d2; sa = d3; | 1235 vsrc.val[3] = d3; |
| 1236 } | |
| 1141 #endif | 1237 #endif |
| 1142 } | 1238 sa = vsrc.val[NEON_A]; |
| 1239 sr = vsrc.val[NEON_R]; | |
| 1240 sg = vsrc.val[NEON_G]; | |
| 1241 sb = vsrc.val[NEON_B]; | |
| 1143 | 1242 |
| 1144 /* calculate 'd', which will be 0..7 | 1243 /* calculate 'd', which will be 0..7 |
| 1145 * dbase[] is 0..7; alpha is 0..256; 16 bits suffice | 1244 * dbase[] is 0..7; alpha is 0..256; 16 bits suffice |
| 1146 */ | 1245 */ |
| 1147 alpha8 = vmovl_u8(dbase); | 1246 alpha8 = vmovl_u8(dbase); |
| 1148 alpha8 = vmlal_u8(alpha8, sa, dbase); | 1247 alpha8 = vmlal_u8(alpha8, sa, dbase); |
| 1149 d = vshrn_n_u16(alpha8, 8); // narrowing too | 1248 d = vshrn_n_u16(alpha8, 8); // narrowing too |
| 1150 | 1249 |
| 1151 // sr = sr - (sr>>5) + d | 1250 // sr = sr - (sr>>5) + d |
| 1152 /* watching for 8-bit overflow. d is 0..7; risky range of | 1251 /* watching for 8-bit overflow. d is 0..7; risky range of |
| (...skipping 123 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1276 #define UNROLL 8 | 1375 #define UNROLL 8 |
| 1277 if (count >= UNROLL) { | 1376 if (count >= UNROLL) { |
| 1278 uint8x8_t d; | 1377 uint8x8_t d; |
| 1279 const uint8_t *dstart = &gDitherMatrix_Neon[(y&3)*12 + (x&3)]; | 1378 const uint8_t *dstart = &gDitherMatrix_Neon[(y&3)*12 + (x&3)]; |
| 1280 d = vld1_u8(dstart); | 1379 d = vld1_u8(dstart); |
| 1281 | 1380 |
| 1282 while (count >= UNROLL) { | 1381 while (count >= UNROLL) { |
| 1283 uint8x8_t sr, sg, sb; | 1382 uint8x8_t sr, sg, sb; |
| 1284 uint16x8_t dr, dg, db; | 1383 uint16x8_t dr, dg, db; |
| 1285 uint16x8_t dst8; | 1384 uint16x8_t dst8; |
| 1385 uint8x8x4_t vsrc; | |
| 1286 | 1386 |
| 1387 #ifdef SK_CPU_ARM64 | |
| 1388 uint8x8_t vsrc_0, vsrc_1, vsrc_2; | |
| 1389 asm ( | |
| 1390 "ld4 {v0.8b - v3.8b}, [%[src]] \t\n" | |
| 1391 "mov %[vsrc0].8b, v0.8b \t\n" | |
| 1392 "mov %[vsrc1].8b, v1.8b \t\n" | |
| 1393 "mov %[vsrc2].8b, v2.8b \t\n" | |
| 1394 : [vsrc0] "=w" (vsrc_0), [vsrc1] "=w" (vsrc_1), | |
| 1395 [vsrc2] "=w" (vsrc_2) | |
| 1396 : [src] "r" (src) | |
| 1397 : "v0", "v1", "v2", "v3" | |
| 1398 ); | |
| 1399 src += 8; | |
| 1400 | |
| 1401 vsrc.val[0] = vsrc_0; | |
| 1402 vsrc.val[1] = vsrc_1; | |
| 1403 vsrc.val[2] = vsrc_2; | |
| 1404 #else | |
| 1287 { | 1405 { |
| 1288 register uint8x8_t d0 asm("d0"); | 1406 register uint8x8_t d0 asm("d0"); |
| 1289 register uint8x8_t d1 asm("d1"); | 1407 register uint8x8_t d1 asm("d1"); |
| 1290 register uint8x8_t d2 asm("d2"); | 1408 register uint8x8_t d2 asm("d2"); |
| 1291 register uint8x8_t d3 asm("d3"); | 1409 register uint8x8_t d3 asm("d3"); |
| 1292 | 1410 |
| 1293 asm ( | 1411 asm ( |
| 1294 "vld4.8 {d0-d3},[%[src]]! /* r=%P0 g=%P1 b=%P2 a=%P3 */" | 1412 "vld4.8 {d0-d3},[%[src]]! " |
| 1295 : "=w" (d0), "=w" (d1), "=w" (d2), "=w" (d3), [src] "+&r" (src) | 1413 : "=w" (d0), "=w" (d1), "=w" (d2), "=w" (d3), [src] "+&r" (src) |
| 1296 : | 1414 : |
| 1297 ); | 1415 ); |
| 1298 sg = d1; | 1416 vsrc.val[0] = d0; |
| 1299 #if SK_PMCOLOR_BYTE_ORDER(B,G,R,A) | 1417 vsrc.val[1] = d1; |
| 1300 sr = d2; sb = d0; | 1418 vsrc.val[2] = d2; |
| 1301 #elif SK_PMCOLOR_BYTE_ORDER(R,G,B,A) | 1419 } |
| 1302 sr = d0; sb = d2; | |
| 1303 #endif | 1420 #endif |
| 1304 } | 1421 sr = vsrc.val[NEON_R]; |
| 1422 sg = vsrc.val[NEON_G]; | |
| 1423 sb = vsrc.val[NEON_B]; | |
| 1424 | |
| 1305 /* XXX: if we want to prefetch, hide it in the above asm() | 1425 /* XXX: if we want to prefetch, hide it in the above asm() |
| 1306 * using the gcc __builtin_prefetch(), the prefetch will | 1426 * using the gcc __builtin_prefetch(), the prefetch will |
| 1307 * fall to the bottom of the loop -- it won't stick up | 1427 * fall to the bottom of the loop -- it won't stick up |
| 1308 * at the top of the loop, just after the vld4. | 1428 * at the top of the loop, just after the vld4. |
| 1309 */ | 1429 */ |
| 1310 | 1430 |
| 1311 // sr = sr - (sr>>5) + d | 1431 // sr = sr - (sr>>5) + d |
| 1312 sr = vsub_u8(sr, vshr_n_u8(sr, 5)); | 1432 sr = vsub_u8(sr, vshr_n_u8(sr, 5)); |
| 1313 dr = vaddl_u8(sr, d); | 1433 dr = vaddl_u8(sr, d); |
| 1314 | 1434 |
| (...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1362 SkPMColor c = *src++; | 1482 SkPMColor c = *src++; |
| 1363 SkPMColorAssert(c); | 1483 SkPMColorAssert(c); |
| 1364 SkASSERT(SkGetPackedA32(c) == 255); | 1484 SkASSERT(SkGetPackedA32(c) == 255); |
| 1365 | 1485 |
| 1366 unsigned dither = DITHER_VALUE(x); | 1486 unsigned dither = DITHER_VALUE(x); |
| 1367 *dst++ = SkDitherRGB32To565(c, dither); | 1487 *dst++ = SkDitherRGB32To565(c, dither); |
| 1368 DITHER_INC_X(x); | 1488 DITHER_INC_X(x); |
| 1369 } while (--count != 0); | 1489 } while (--count != 0); |
| 1370 } | 1490 } |
| 1371 } | 1491 } |
| 1372 #endif | |
| 1373 | 1492 |
| 1374 void Color32_arm_neon(SkPMColor* dst, const SkPMColor* src, int count, | 1493 void Color32_arm_neon(SkPMColor* dst, const SkPMColor* src, int count, |
| 1375 SkPMColor color) { | 1494 SkPMColor color) { |
| 1376 if (count <= 0) { | 1495 if (count <= 0) { |
| 1377 return; | 1496 return; |
| 1378 } | 1497 } |
| 1379 | 1498 |
| 1380 if (0 == color) { | 1499 if (0 == color) { |
| 1381 if (src != dst) { | 1500 if (src != dst) { |
| 1382 memcpy(dst, src, count * sizeof(SkPMColor)); | 1501 memcpy(dst, src, count * sizeof(SkPMColor)); |
| (...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1468 *dst = color + SkAlphaMulQ(*src, scale); | 1587 *dst = color + SkAlphaMulQ(*src, scale); |
| 1469 src += 1; | 1588 src += 1; |
| 1470 dst += 1; | 1589 dst += 1; |
| 1471 count--; | 1590 count--; |
| 1472 } | 1591 } |
| 1473 } | 1592 } |
| 1474 | 1593 |
| 1475 /////////////////////////////////////////////////////////////////////////////// | 1594 /////////////////////////////////////////////////////////////////////////////// |
| 1476 | 1595 |
| 1477 const SkBlitRow::Proc sk_blitrow_platform_565_procs_arm_neon[] = { | 1596 const SkBlitRow::Proc sk_blitrow_platform_565_procs_arm_neon[] = { |
| 1478 #ifdef SK_CPU_ARM32 | |
| 1479 // no dither | 1597 // no dither |
| 1480 S32_D565_Opaque_neon, | 1598 S32_D565_Opaque_neon, |
| 1481 S32_D565_Blend_neon, | 1599 S32_D565_Blend_neon, |
| 1600 #ifdef SK_CPU_ARM32 | |
| 1482 S32A_D565_Opaque_neon, | 1601 S32A_D565_Opaque_neon, |
| 1602 #else | |
| 1603 NULL, | |
| 1604 #endif | |
| 1483 S32A_D565_Blend_neon, | 1605 S32A_D565_Blend_neon, |
| 1484 | 1606 |
| 1485 // dither | 1607 // dither |
| 1486 S32_D565_Opaque_Dither_neon, | 1608 S32_D565_Opaque_Dither_neon, |
| 1487 S32_D565_Blend_Dither_neon, | 1609 S32_D565_Blend_Dither_neon, |
| 1488 S32A_D565_Opaque_Dither_neon, | 1610 S32A_D565_Opaque_Dither_neon, |
| 1489 NULL, // S32A_D565_Blend_Dither | 1611 NULL, // S32A_D565_Blend_Dither |
| 1490 #else | |
| 1491 NULL, NULL, NULL, NULL, | |
| 1492 NULL, NULL, NULL, NULL | |
| 1493 #endif | |
| 1494 }; | 1612 }; |
| 1495 | 1613 |
| 1496 const SkBlitRow::Proc32 sk_blitrow_platform_32_procs_arm_neon[] = { | 1614 const SkBlitRow::Proc32 sk_blitrow_platform_32_procs_arm_neon[] = { |
| 1497 NULL, // S32_Opaque, | 1615 NULL, // S32_Opaque, |
| 1498 S32_Blend_BlitRow32_neon, // S32_Blend, | 1616 S32_Blend_BlitRow32_neon, // S32_Blend, |
| 1499 /* | 1617 /* |
| 1500 * We have two choices for S32A_Opaque procs. The one reads the src alpha | 1618 * We have two choices for S32A_Opaque procs. The one reads the src alpha |
| 1501 * value and attempts to optimize accordingly. The optimization is | 1619 * value and attempts to optimize accordingly. The optimization is |
| 1502 * sensitive to the source content and is not a win in all cases. For | 1620 * sensitive to the source content and is not a win in all cases. For |
| 1503 * example, if there are a lot of transitions between the alpha states, | 1621 * example, if there are a lot of transitions between the alpha states, |
| 1504 * the performance will almost certainly be worse. However, for many | 1622 * the performance will almost certainly be worse. However, for many |
| 1505 * common cases the performance is equivalent or better than the standard | 1623 * common cases the performance is equivalent or better than the standard |
| 1506 * case where we do not inspect the src alpha. | 1624 * case where we do not inspect the src alpha. |
| 1507 */ | 1625 */ |
| 1508 #if SK_A32_SHIFT == 24 | 1626 #if SK_A32_SHIFT == 24 |
| 1509 // This proc assumes the alpha value occupies bits 24-32 of each SkPMColor | 1627 // This proc assumes the alpha value occupies bits 24-32 of each SkPMColor |
| 1510 S32A_Opaque_BlitRow32_neon_src_alpha, // S32A_Opaque, | 1628 S32A_Opaque_BlitRow32_neon_src_alpha, // S32A_Opaque, |
| 1511 #else | 1629 #else |
| 1512 S32A_Opaque_BlitRow32_neon, // S32A_Opaque, | 1630 S32A_Opaque_BlitRow32_neon, // S32A_Opaque, |
| 1513 #endif | 1631 #endif |
| 1514 #ifdef SK_CPU_ARM32 | 1632 #ifdef SK_CPU_ARM32 |
| 1515 S32A_Blend_BlitRow32_neon // S32A_Blend | 1633 S32A_Blend_BlitRow32_neon // S32A_Blend |
| 1516 #else | 1634 #else |
| 1517 NULL | 1635 NULL |
| 1518 #endif | 1636 #endif |
| 1519 }; | 1637 }; |
| OLD | NEW |