Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 /* | 1 /* |
| 2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #ifndef SkBlurImageFilter_opts_DEFINED | 8 #ifndef SkBlurImageFilter_opts_DEFINED |
| 9 #define SkBlurImageFilter_opts_DEFINED | 9 #define SkBlurImageFilter_opts_DEFINED |
| 10 | 10 |
| (...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 60 #define INCREMENT_SUMS(c) sum = _mm_add_epi32(sum, expand(c)) | 60 #define INCREMENT_SUMS(c) sum = _mm_add_epi32(sum, expand(c)) |
| 61 #define DECREMENT_SUMS(c) sum = _mm_sub_epi32(sum, expand(c)) | 61 #define DECREMENT_SUMS(c) sum = _mm_sub_epi32(sum, expand(c)) |
| 62 #define STORE_SUMS \ | 62 #define STORE_SUMS \ |
| 63 auto result = mullo_epi32(sum, scale); \ | 63 auto result = mullo_epi32(sum, scale); \ |
| 64 result = _mm_add_epi32(result, half); \ | 64 result = _mm_add_epi32(result, half); \ |
| 65 *dptr = repack(result); | 65 *dptr = repack(result); |
| 66 #define DOUBLE_ROW_OPTIMIZATION | 66 #define DOUBLE_ROW_OPTIMIZATION |
| 67 | 67 |
| 68 #elif defined(SK_ARM_HAS_NEON) | 68 #elif defined(SK_ARM_HAS_NEON) |
| 69 | 69 |
| 70 // val = (sum * scale * 2 + 0x8000) >> 16 | |
| 71 #define STORE_SUMS_DOUBLE \ | |
|
mtklein
2015/10/28 22:31:17
This just symmetry or are you thinking of expandin
Stephen White
2015/10/28 22:38:33
Well, mostly because it's repeated four times belo
| |
| 72 uint16x8_t resultPixels = vreinterpretq_u16_s16(vqrdmulhq_s16( \ | |
| 73 vreinterpretq_s16_u16(sum), vreinterpretq_s16_u16(scale))); \ | |
| 74 if (dstDirection == BlurDirection::kX) { \ | |
| 75 uint32x2_t px2 = vreinterpret_u32_u8(vmovn_u16(resultPixels)); \ | |
| 76 vst1_lane_u32(dptr + 0, px2, 0); \ | |
| 77 vst1_lane_u32(dptr + width, px2, 1); \ | |
| 78 } else { \ | |
| 79 vst1_u8((uint8_t*)dptr, vmovn_u16(resultPixels)); \ | |
| 80 } | |
| 81 | |
| 82 #define INCREMENT_SUMS_DOUBLE(p) sum = vaddw_u8(sum, load_2_pixels(p)) | |
| 83 #define DECREMENT_SUMS_DOUBLE(p) sum = vsubw_u8(sum, load_2_pixels(p)) | |
| 84 | |
| 70 // Fast path for kernel sizes between 2 and 127, working on two rows at a time. | 85 // Fast path for kernel sizes between 2 and 127, working on two rows at a time. |
| 71 template<BlurDirection srcDirection, BlurDirection dstDirection> | 86 template<BlurDirection srcDirection, BlurDirection dstDirection> |
| 72 void box_blur_double(const SkPMColor** src, int srcStride, SkPMColor** dst, int kernelSize, | 87 void box_blur_double(const SkPMColor** src, int srcStride, SkPMColor** dst, int kernelSize, |
| 73 int leftOffset, int rightOffset, int width, int* height) { | 88 int leftOffset, int rightOffset, int width, int* height) { |
| 74 // Load 2 pixels from adjacent rows. | 89 // Load 2 pixels from adjacent rows. |
| 75 auto load_2_pixels = [&](const SkPMColor* s) { | 90 auto load_2_pixels = [&](const SkPMColor* s) { |
| 76 if (srcDirection == BlurDirection::kX) { | 91 if (srcDirection == BlurDirection::kX) { |
| 77 // 10% faster by adding these 2 prefetches | 92 // 10% faster by adding these 2 prefetches |
| 78 SK_PREFETCH(s + 16); | 93 SK_PREFETCH(s + 16); |
| 79 SK_PREFETCH(s + 16 + srcStride); | 94 SK_PREFETCH(s + 16 + srcStride); |
| 80 auto one = vld1_lane_u32(s + 0, vdup_n_u32(0), 0), | 95 auto one = vld1_lane_u32(s + 0, vdup_n_u32(0), 0), |
| 81 two = vld1_lane_u32(s + srcStride, one, 1); | 96 two = vld1_lane_u32(s + srcStride, one, 1); |
| 82 return vreinterpret_u8_u32(two); | 97 return vreinterpret_u8_u32(two); |
| 83 } else { | 98 } else { |
| 84 return vld1_u8((uint8_t*)s); | 99 return vld1_u8((uint8_t*)s); |
| 85 } | 100 } |
| 86 }; | 101 }; |
| 87 const int rightBorder = SkMin32(rightOffset + 1, width); | 102 int incrementStart = SkMax32(-rightOffset - 1, -width); |
| 103 int incrementEnd = SkMax32(width - rightOffset - 1, 0); | |
| 104 int decrementStart = SkMin32(leftOffset, width); | |
| 88 const int srcStrideX = srcDirection == BlurDirection::kX ? 1 : srcStride; | 105 const int srcStrideX = srcDirection == BlurDirection::kX ? 1 : srcStride; |
| 89 const int dstStrideX = dstDirection == BlurDirection::kX ? 1 : *height; | 106 const int dstStrideX = dstDirection == BlurDirection::kX ? 1 : *height; |
| 90 const int srcStrideY = srcDirection == BlurDirection::kX ? srcStride : 1; | 107 const int srcStrideY = srcDirection == BlurDirection::kX ? srcStride : 1; |
| 91 const int dstStrideY = dstDirection == BlurDirection::kX ? width : 1; | 108 const int dstStrideY = dstDirection == BlurDirection::kX ? width : 1; |
| 92 const uint16x8_t scale = vdupq_n_u16((1 << 15) / kernelSize); | 109 const uint16x8_t scale = vdupq_n_u16((1 << 15) / kernelSize); |
| 93 | 110 |
| 94 for (; *height >= 2; *height -= 2) { | 111 for (; *height >= 2; *height -= 2) { |
| 95 uint16x8_t sum = vdupq_n_u16(0); | 112 uint16x8_t sum = vdupq_n_u16(0); |
| 96 const SkPMColor* p = *src; | 113 const SkPMColor* lptr = *src; |
| 97 for (int i = 0; i < rightBorder; i++) { | 114 const SkPMColor* rptr = *src; |
| 98 sum = vaddw_u8(sum, load_2_pixels(p)); | 115 SkPMColor* dptr = *dst; |
| 99 p += srcStrideX; | 116 int x; |
| 117 for (x = incrementStart; x < 0; ++x) { | |
| 118 INCREMENT_SUMS_DOUBLE(rptr); | |
| 119 rptr += srcStrideX; | |
| 100 } | 120 } |
| 101 | 121 for (; x < decrementStart && x < incrementEnd; ++x) { |
| 102 const SkPMColor* sptr = *src; | 122 STORE_SUMS_DOUBLE |
| 103 SkPMColor* dptr = *dst; | |
| 104 for (int x = 0; x < width; x++) { | |
| 105 // val = (sum * scale * 2 + 0x8000) >> 16 | |
| 106 uint16x8_t resultPixels = vreinterpretq_u16_s16(vqrdmulhq_s16( | |
| 107 vreinterpretq_s16_u16(sum), vreinterpretq_s16_u16(scale))); | |
| 108 if (dstDirection == BlurDirection::kX) { | |
| 109 uint32x2_t px2 = vreinterpret_u32_u8(vmovn_u16(resultPixels)); | |
| 110 vst1_lane_u32(dptr + 0, px2, 0); | |
| 111 vst1_lane_u32(dptr + width, px2, 1); | |
| 112 } else { | |
| 113 vst1_u8((uint8_t*)dptr, vmovn_u16(resultPixels)); | |
| 114 } | |
| 115 | |
| 116 if (x >= leftOffset) { | |
| 117 sum = vsubw_u8(sum, load_2_pixels(sptr - leftOffset * srcStrideX )); | |
| 118 } | |
| 119 if (x + rightOffset + 1 < width) { | |
| 120 sum = vaddw_u8(sum, load_2_pixels(sptr + (rightOffset + 1) * src StrideX)); | |
| 121 } | |
| 122 sptr += srcStrideX; | |
| 123 dptr += dstStrideX; | 123 dptr += dstStrideX; |
| 124 INCREMENT_SUMS_DOUBLE(rptr); | |
| 125 rptr += srcStrideX; | |
| 126 } | |
| 127 for (x = decrementStart; x < incrementEnd; ++x) { | |
| 128 STORE_SUMS_DOUBLE | |
| 129 dptr += dstStrideX; | |
| 130 INCREMENT_SUMS_DOUBLE(rptr); | |
| 131 rptr += srcStrideX; | |
| 132 DECREMENT_SUMS_DOUBLE(lptr); | |
| 133 lptr += srcStrideX; | |
| 134 } | |
| 135 for (x = incrementEnd; x < decrementStart; ++x) { | |
| 136 STORE_SUMS_DOUBLE | |
| 137 dptr += dstStrideX; | |
| 138 } | |
| 139 for (; x < width; ++x) { | |
| 140 STORE_SUMS_DOUBLE | |
| 141 dptr += dstStrideX; | |
| 142 DECREMENT_SUMS_DOUBLE(lptr); | |
| 143 lptr += srcStrideX; | |
| 124 } | 144 } |
| 125 *src += srcStrideY * 2; | 145 *src += srcStrideY * 2; |
| 126 *dst += dstStrideY * 2; | 146 *dst += dstStrideY * 2; |
| 127 } | 147 } |
| 128 } | 148 } |
| 129 | 149 |
| 130 // ARGB -> 0A0R 0G0B | 150 // ARGB -> 0A0R 0G0B |
| 131 static inline uint16x4_t expand(SkPMColor p) { | 151 static inline uint16x4_t expand(SkPMColor p) { |
| 132 return vget_low_u16(vmovl_u8(vreinterpret_u8_u32(vdup_n_u32(p)))); | 152 return vget_low_u16(vmovl_u8(vreinterpret_u8_u32(vdup_n_u32(p)))); |
| 133 }; | 153 }; |
| (...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 236 } | 256 } |
| 237 } | 257 } |
| 238 | 258 |
| 239 static auto box_blur_xx = &box_blur<BlurDirection::kX, BlurDirection::kX>, | 259 static auto box_blur_xx = &box_blur<BlurDirection::kX, BlurDirection::kX>, |
| 240 box_blur_xy = &box_blur<BlurDirection::kX, BlurDirection::kY>, | 260 box_blur_xy = &box_blur<BlurDirection::kX, BlurDirection::kY>, |
| 241 box_blur_yx = &box_blur<BlurDirection::kY, BlurDirection::kX>; | 261 box_blur_yx = &box_blur<BlurDirection::kY, BlurDirection::kX>; |
| 242 | 262 |
| 243 } // namespace SK_OPTS_NS | 263 } // namespace SK_OPTS_NS |
| 244 | 264 |
| 245 #endif | 265 #endif |
| OLD | NEW |