Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1567)

Side by Side Diff: source/libvpx/vp8/encoder/arm/neon/denoising_neon.c

Issue 341293003: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/
Patch Set: Created 6 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « source/libvpx/vp8/common/x86/variance_ssse3.c ('k') | source/libvpx/vp8/encoder/bitstream.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved. 2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
49 int mc_running_avg_y_stride, 49 int mc_running_avg_y_stride,
50 unsigned char *running_avg_y, 50 unsigned char *running_avg_y,
51 int running_avg_y_stride, 51 int running_avg_y_stride,
52 unsigned char *sig, int sig_stride, 52 unsigned char *sig, int sig_stride,
53 unsigned int motion_magnitude, 53 unsigned int motion_magnitude,
54 int increase_denoising) { 54 int increase_denoising) {
55 /* If motion_magnitude is small, making the denoiser more aggressive by 55 /* If motion_magnitude is small, making the denoiser more aggressive by
56 * increasing the adjustment for each level, level1 adjustment is 56 * increasing the adjustment for each level, level1 adjustment is
57 * increased, the deltas stay the same. 57 * increased, the deltas stay the same.
58 */ 58 */
59 const uint8x16_t v_level1_adjustment = vdupq_n_u8( 59 int shift_inc = (increase_denoising &&
60 (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? 4 : 3); 60 motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? 1 : 0;
61 const uint8x16_t v_level1_adjustment = vmovq_n_u8(
62 (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? 4 + shift_inc : 3);
61 const uint8x16_t v_delta_level_1_and_2 = vdupq_n_u8(1); 63 const uint8x16_t v_delta_level_1_and_2 = vdupq_n_u8(1);
62 const uint8x16_t v_delta_level_2_and_3 = vdupq_n_u8(2); 64 const uint8x16_t v_delta_level_2_and_3 = vdupq_n_u8(2);
63 const uint8x16_t v_level1_threshold = vdupq_n_u8(4); 65 const uint8x16_t v_level1_threshold = vmovq_n_u8(4 + shift_inc);
64 const uint8x16_t v_level2_threshold = vdupq_n_u8(8); 66 const uint8x16_t v_level2_threshold = vdupq_n_u8(8);
65 const uint8x16_t v_level3_threshold = vdupq_n_u8(16); 67 const uint8x16_t v_level3_threshold = vdupq_n_u8(16);
66 int64x2_t v_sum_diff_total = vdupq_n_s64(0); 68 int64x2_t v_sum_diff_total = vdupq_n_s64(0);
67 69
68 /* Go over lines. */ 70 /* Go over lines. */
69 int i; 71 int r;
70 for (i = 0; i < 16; ++i) { 72 for (r = 0; r < 16; ++r) {
71 /* Load inputs. */ 73 /* Load inputs. */
72 const uint8x16_t v_sig = vld1q_u8(sig); 74 const uint8x16_t v_sig = vld1q_u8(sig);
73 const uint8x16_t v_mc_running_avg_y = vld1q_u8(mc_running_avg_y); 75 const uint8x16_t v_mc_running_avg_y = vld1q_u8(mc_running_avg_y);
74 76
75 /* Calculate absolute difference and sign masks. */ 77 /* Calculate absolute difference and sign masks. */
76 const uint8x16_t v_abs_diff = vabdq_u8(v_sig, v_mc_running_avg_y); 78 const uint8x16_t v_abs_diff = vabdq_u8(v_sig, v_mc_running_avg_y);
77 const uint8x16_t v_diff_pos_mask = vcltq_u8(v_sig, v_mc_running_avg_y); 79 const uint8x16_t v_diff_pos_mask = vcltq_u8(v_sig, v_mc_running_avg_y);
78 const uint8x16_t v_diff_neg_mask = vcgtq_u8(v_sig, v_mc_running_avg_y); 80 const uint8x16_t v_diff_neg_mask = vcgtq_u8(v_sig, v_mc_running_avg_y);
79 81
80 /* Figure out which level that put us in. */ 82 /* Figure out which level that put us in. */
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after
136 } 138 }
137 139
138 /* Update pointers for next iteration. */ 140 /* Update pointers for next iteration. */
139 sig += sig_stride; 141 sig += sig_stride;
140 mc_running_avg_y += mc_running_avg_y_stride; 142 mc_running_avg_y += mc_running_avg_y_stride;
141 running_avg_y += running_avg_y_stride; 143 running_avg_y += running_avg_y_stride;
142 } 144 }
143 145
144 /* Too much adjustments => copy block. */ 146 /* Too much adjustments => copy block. */
145 { 147 {
146 const int64x1_t x = vqadd_s64(vget_high_s64(v_sum_diff_total), 148 int64x1_t x = vqadd_s64(vget_high_s64(v_sum_diff_total),
147 vget_low_s64(v_sum_diff_total)); 149 vget_low_s64(v_sum_diff_total));
148 const int s0 = vget_lane_s32(vabs_s32(vreinterpret_s32_s64(x)), 0); 150 int sum_diff = vget_lane_s32(vabs_s32(vreinterpret_s32_s64(x)), 0);
151 int sum_diff_thresh = SUM_DIFF_THRESHOLD;
149 152
150 if (s0 > SUM_DIFF_THRESHOLD) 153 if (increase_denoising) sum_diff_thresh = SUM_DIFF_THRESHOLD_HIGH;
154 if (sum_diff > sum_diff_thresh) {
155 // Before returning to copy the block (i.e., apply no denoising),
156 // checK if we can still apply some (weaker) temporal filtering to
157 // this block, that would otherwise not be denoised at all. Simplest
158 // is to apply an additional adjustment to running_avg_y to bring it
159 // closer to sig. The adjustment is capped by a maximum delta, and
160 // chosen such that in most cases the resulting sum_diff will be
161 // within the accceptable range given by sum_diff_thresh.
162
163 // The delta is set by the excess of absolute pixel diff over the
164 // threshold.
165 int delta = ((sum_diff - sum_diff_thresh) >> 8) + 1;
166 // Only apply the adjustment for max delta up to 3.
167 if (delta < 4) {
168 const uint8x16_t k_delta = vmovq_n_u8(delta);
169 sig -= sig_stride * 16;
170 mc_running_avg_y -= mc_running_avg_y_stride * 16;
171 running_avg_y -= running_avg_y_stride * 16;
172 for (r = 0; r < 16; ++r) {
173 uint8x16_t v_running_avg_y = vld1q_u8(running_avg_y);
174 const uint8x16_t v_sig = vld1q_u8(sig);
175 const uint8x16_t v_mc_running_avg_y = vld1q_u8(mc_running_avg_y);
176
177 /* Calculate absolute difference and sign masks. */
178 const uint8x16_t v_abs_diff = vabdq_u8(v_sig,
179 v_mc_running_avg_y);
180 const uint8x16_t v_diff_pos_mask = vcltq_u8(v_sig,
181 v_mc_running_avg_y);
182 const uint8x16_t v_diff_neg_mask = vcgtq_u8(v_sig,
183 v_mc_running_avg_y);
184 // Clamp absolute difference to delta to get the adjustment.
185 const uint8x16_t v_abs_adjustment =
186 vminq_u8(v_abs_diff, (k_delta));
187
188 const uint8x16_t v_pos_adjustment = vandq_u8(v_diff_pos_mask,
189 v_abs_adjustment);
190 const uint8x16_t v_neg_adjustment = vandq_u8(v_diff_neg_mask,
191 v_abs_adjustment);
192
193 v_running_avg_y = vqsubq_u8(v_running_avg_y, v_pos_adjustment);
194 v_running_avg_y = vqaddq_u8(v_running_avg_y, v_neg_adjustment);
195
196 /* Store results. */
197 vst1q_u8(running_avg_y, v_running_avg_y);
198
199 {
200 const int8x16_t v_sum_diff =
201 vqsubq_s8(vreinterpretq_s8_u8(v_neg_adjustment),
202 vreinterpretq_s8_u8(v_pos_adjustment));
203
204 const int16x8_t fe_dc_ba_98_76_54_32_10 =
205 vpaddlq_s8(v_sum_diff);
206 const int32x4_t fedc_ba98_7654_3210 =
207 vpaddlq_s16(fe_dc_ba_98_76_54_32_10);
208 const int64x2_t fedcba98_76543210 =
209 vpaddlq_s32(fedc_ba98_7654_3210);
210
211 v_sum_diff_total = vqaddq_s64(v_sum_diff_total,
212 fedcba98_76543210);
213 }
214 /* Update pointers for next iteration. */
215 sig += sig_stride;
216 mc_running_avg_y += mc_running_avg_y_stride;
217 running_avg_y += running_avg_y_stride;
218 }
219 {
220 // Update the sum of all pixel differences of this MB.
221 x = vqadd_s64(vget_high_s64(v_sum_diff_total),
222 vget_low_s64(v_sum_diff_total));
223 sum_diff = vget_lane_s32(vabs_s32(vreinterpret_s32_s64(x)), 0);
224
225 if (sum_diff > sum_diff_thresh) {
226 return COPY_BLOCK;
227 }
228 }
229 } else {
151 return COPY_BLOCK; 230 return COPY_BLOCK;
231 }
232 }
152 } 233 }
153 234
154 /* Tell above level that block was filtered. */ 235 /* Tell above level that block was filtered. */
155 running_avg_y -= running_avg_y_stride * 16; 236 running_avg_y -= running_avg_y_stride * 16;
156 sig -= sig_stride * 16; 237 sig -= sig_stride * 16;
157 238
158 vp8_copy_mem16x16(running_avg_y, running_avg_y_stride, sig, sig_stride); 239 vp8_copy_mem16x16(running_avg_y, running_avg_y_stride, sig, sig_stride);
159 240
160 return FILTER_BLOCK; 241 return FILTER_BLOCK;
161 } 242 }
OLDNEW
« no previous file with comments | « source/libvpx/vp8/common/x86/variance_ssse3.c ('k') | source/libvpx/vp8/encoder/bitstream.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698