Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(40)

Side by Side Diff: src/opts/SkBlurImageFilter_opts.h

Issue 1412793009: SkBlurImageFilter_opts: optimize NEON box_blur_double in separate loops. (Closed) Base URL: https://skia.googlesource.com/skia.git@blur-neon-separate-loops
Patch Set: Upload with correct upstream Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2015 Google Inc. 2 * Copyright 2015 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #ifndef SkBlurImageFilter_opts_DEFINED 8 #ifndef SkBlurImageFilter_opts_DEFINED
9 #define SkBlurImageFilter_opts_DEFINED 9 #define SkBlurImageFilter_opts_DEFINED
10 10
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after
60 #define INCREMENT_SUMS(c) sum = _mm_add_epi32(sum, expand(c)) 60 #define INCREMENT_SUMS(c) sum = _mm_add_epi32(sum, expand(c))
61 #define DECREMENT_SUMS(c) sum = _mm_sub_epi32(sum, expand(c)) 61 #define DECREMENT_SUMS(c) sum = _mm_sub_epi32(sum, expand(c))
62 #define STORE_SUMS \ 62 #define STORE_SUMS \
63 auto result = mullo_epi32(sum, scale); \ 63 auto result = mullo_epi32(sum, scale); \
64 result = _mm_add_epi32(result, half); \ 64 result = _mm_add_epi32(result, half); \
65 *dptr = repack(result); 65 *dptr = repack(result);
66 #define DOUBLE_ROW_OPTIMIZATION 66 #define DOUBLE_ROW_OPTIMIZATION
67 67
68 #elif defined(SK_ARM_HAS_NEON) 68 #elif defined(SK_ARM_HAS_NEON)
69 69
70 // val = (sum * scale * 2 + 0x8000) >> 16
71 #define STORE_SUMS_DOUBLE \
mtklein 2015/10/28 22:31:17 This just symmetry or are you thinking of expandin
Stephen White 2015/10/28 22:38:33 Well, mostly because it's repeated four times belo
72 uint16x8_t resultPixels = vreinterpretq_u16_s16(vqrdmulhq_s16( \
73 vreinterpretq_s16_u16(sum), vreinterpretq_s16_u16(scale))); \
74 if (dstDirection == BlurDirection::kX) { \
75 uint32x2_t px2 = vreinterpret_u32_u8(vmovn_u16(resultPixels)); \
76 vst1_lane_u32(dptr + 0, px2, 0); \
77 vst1_lane_u32(dptr + width, px2, 1); \
78 } else { \
79 vst1_u8((uint8_t*)dptr, vmovn_u16(resultPixels)); \
80 }
81
82 #define INCREMENT_SUMS_DOUBLE(p) sum = vaddw_u8(sum, load_2_pixels(p))
83 #define DECREMENT_SUMS_DOUBLE(p) sum = vsubw_u8(sum, load_2_pixels(p))
84
70 // Fast path for kernel sizes between 2 and 127, working on two rows at a time. 85 // Fast path for kernel sizes between 2 and 127, working on two rows at a time.
71 template<BlurDirection srcDirection, BlurDirection dstDirection> 86 template<BlurDirection srcDirection, BlurDirection dstDirection>
72 void box_blur_double(const SkPMColor** src, int srcStride, SkPMColor** dst, int kernelSize, 87 void box_blur_double(const SkPMColor** src, int srcStride, SkPMColor** dst, int kernelSize,
73 int leftOffset, int rightOffset, int width, int* height) { 88 int leftOffset, int rightOffset, int width, int* height) {
74 // Load 2 pixels from adjacent rows. 89 // Load 2 pixels from adjacent rows.
75 auto load_2_pixels = [&](const SkPMColor* s) { 90 auto load_2_pixels = [&](const SkPMColor* s) {
76 if (srcDirection == BlurDirection::kX) { 91 if (srcDirection == BlurDirection::kX) {
77 // 10% faster by adding these 2 prefetches 92 // 10% faster by adding these 2 prefetches
78 SK_PREFETCH(s + 16); 93 SK_PREFETCH(s + 16);
79 SK_PREFETCH(s + 16 + srcStride); 94 SK_PREFETCH(s + 16 + srcStride);
80 auto one = vld1_lane_u32(s + 0, vdup_n_u32(0), 0), 95 auto one = vld1_lane_u32(s + 0, vdup_n_u32(0), 0),
81 two = vld1_lane_u32(s + srcStride, one, 1); 96 two = vld1_lane_u32(s + srcStride, one, 1);
82 return vreinterpret_u8_u32(two); 97 return vreinterpret_u8_u32(two);
83 } else { 98 } else {
84 return vld1_u8((uint8_t*)s); 99 return vld1_u8((uint8_t*)s);
85 } 100 }
86 }; 101 };
87 const int rightBorder = SkMin32(rightOffset + 1, width); 102 int incrementStart = SkMax32(-rightOffset - 1, -width);
103 int incrementEnd = SkMax32(width - rightOffset - 1, 0);
104 int decrementStart = SkMin32(leftOffset, width);
88 const int srcStrideX = srcDirection == BlurDirection::kX ? 1 : srcStride; 105 const int srcStrideX = srcDirection == BlurDirection::kX ? 1 : srcStride;
89 const int dstStrideX = dstDirection == BlurDirection::kX ? 1 : *height; 106 const int dstStrideX = dstDirection == BlurDirection::kX ? 1 : *height;
90 const int srcStrideY = srcDirection == BlurDirection::kX ? srcStride : 1; 107 const int srcStrideY = srcDirection == BlurDirection::kX ? srcStride : 1;
91 const int dstStrideY = dstDirection == BlurDirection::kX ? width : 1; 108 const int dstStrideY = dstDirection == BlurDirection::kX ? width : 1;
92 const uint16x8_t scale = vdupq_n_u16((1 << 15) / kernelSize); 109 const uint16x8_t scale = vdupq_n_u16((1 << 15) / kernelSize);
93 110
94 for (; *height >= 2; *height -= 2) { 111 for (; *height >= 2; *height -= 2) {
95 uint16x8_t sum = vdupq_n_u16(0); 112 uint16x8_t sum = vdupq_n_u16(0);
96 const SkPMColor* p = *src; 113 const SkPMColor* lptr = *src;
97 for (int i = 0; i < rightBorder; i++) { 114 const SkPMColor* rptr = *src;
98 sum = vaddw_u8(sum, load_2_pixels(p)); 115 SkPMColor* dptr = *dst;
99 p += srcStrideX; 116 int x;
117 for (x = incrementStart; x < 0; ++x) {
118 INCREMENT_SUMS_DOUBLE(rptr);
119 rptr += srcStrideX;
100 } 120 }
101 121 for (; x < decrementStart && x < incrementEnd; ++x) {
102 const SkPMColor* sptr = *src; 122 STORE_SUMS_DOUBLE
103 SkPMColor* dptr = *dst;
104 for (int x = 0; x < width; x++) {
105 // val = (sum * scale * 2 + 0x8000) >> 16
106 uint16x8_t resultPixels = vreinterpretq_u16_s16(vqrdmulhq_s16(
107 vreinterpretq_s16_u16(sum), vreinterpretq_s16_u16(scale)));
108 if (dstDirection == BlurDirection::kX) {
109 uint32x2_t px2 = vreinterpret_u32_u8(vmovn_u16(resultPixels));
110 vst1_lane_u32(dptr + 0, px2, 0);
111 vst1_lane_u32(dptr + width, px2, 1);
112 } else {
113 vst1_u8((uint8_t*)dptr, vmovn_u16(resultPixels));
114 }
115
116 if (x >= leftOffset) {
117 sum = vsubw_u8(sum, load_2_pixels(sptr - leftOffset * srcStrideX ));
118 }
119 if (x + rightOffset + 1 < width) {
120 sum = vaddw_u8(sum, load_2_pixels(sptr + (rightOffset + 1) * src StrideX));
121 }
122 sptr += srcStrideX;
123 dptr += dstStrideX; 123 dptr += dstStrideX;
124 INCREMENT_SUMS_DOUBLE(rptr);
125 rptr += srcStrideX;
126 }
127 for (x = decrementStart; x < incrementEnd; ++x) {
128 STORE_SUMS_DOUBLE
129 dptr += dstStrideX;
130 INCREMENT_SUMS_DOUBLE(rptr);
131 rptr += srcStrideX;
132 DECREMENT_SUMS_DOUBLE(lptr);
133 lptr += srcStrideX;
134 }
135 for (x = incrementEnd; x < decrementStart; ++x) {
136 STORE_SUMS_DOUBLE
137 dptr += dstStrideX;
138 }
139 for (; x < width; ++x) {
140 STORE_SUMS_DOUBLE
141 dptr += dstStrideX;
142 DECREMENT_SUMS_DOUBLE(lptr);
143 lptr += srcStrideX;
124 } 144 }
125 *src += srcStrideY * 2; 145 *src += srcStrideY * 2;
126 *dst += dstStrideY * 2; 146 *dst += dstStrideY * 2;
127 } 147 }
128 } 148 }
129 149
130 // ARGB -> 0A0R 0G0B 150 // ARGB -> 0A0R 0G0B
131 static inline uint16x4_t expand(SkPMColor p) { 151 static inline uint16x4_t expand(SkPMColor p) {
132 return vget_low_u16(vmovl_u8(vreinterpret_u8_u32(vdup_n_u32(p)))); 152 return vget_low_u16(vmovl_u8(vreinterpret_u8_u32(vdup_n_u32(p))));
133 }; 153 };
(...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after
236 } 256 }
237 } 257 }
238 258
239 static auto box_blur_xx = &box_blur<BlurDirection::kX, BlurDirection::kX>, 259 static auto box_blur_xx = &box_blur<BlurDirection::kX, BlurDirection::kX>,
240 box_blur_xy = &box_blur<BlurDirection::kX, BlurDirection::kY>, 260 box_blur_xy = &box_blur<BlurDirection::kX, BlurDirection::kY>,
241 box_blur_yx = &box_blur<BlurDirection::kY, BlurDirection::kX>; 261 box_blur_yx = &box_blur<BlurDirection::kY, BlurDirection::kX>;
242 262
243 } // namespace SK_OPTS_NS 263 } // namespace SK_OPTS_NS
244 264
245 #endif 265 #endif
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698