OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
49 int mc_running_avg_y_stride, | 49 int mc_running_avg_y_stride, |
50 unsigned char *running_avg_y, | 50 unsigned char *running_avg_y, |
51 int running_avg_y_stride, | 51 int running_avg_y_stride, |
52 unsigned char *sig, int sig_stride, | 52 unsigned char *sig, int sig_stride, |
53 unsigned int motion_magnitude, | 53 unsigned int motion_magnitude, |
54 int increase_denoising) { | 54 int increase_denoising) { |
55 /* If motion_magnitude is small, making the denoiser more aggressive by | 55 /* If motion_magnitude is small, making the denoiser more aggressive by |
56 * increasing the adjustment for each level, level1 adjustment is | 56 * increasing the adjustment for each level, level1 adjustment is |
57 * increased, the deltas stay the same. | 57 * increased, the deltas stay the same. |
58 */ | 58 */ |
59 const uint8x16_t v_level1_adjustment = vdupq_n_u8( | 59 int shift_inc = (increase_denoising && |
60 (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? 4 : 3); | 60 motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? 1 : 0; |
| 61 const uint8x16_t v_level1_adjustment = vmovq_n_u8( |
| 62 (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? 4 + shift_inc : 3); |
61 const uint8x16_t v_delta_level_1_and_2 = vdupq_n_u8(1); | 63 const uint8x16_t v_delta_level_1_and_2 = vdupq_n_u8(1); |
62 const uint8x16_t v_delta_level_2_and_3 = vdupq_n_u8(2); | 64 const uint8x16_t v_delta_level_2_and_3 = vdupq_n_u8(2); |
63 const uint8x16_t v_level1_threshold = vdupq_n_u8(4); | 65 const uint8x16_t v_level1_threshold = vmovq_n_u8(4 + shift_inc); |
64 const uint8x16_t v_level2_threshold = vdupq_n_u8(8); | 66 const uint8x16_t v_level2_threshold = vdupq_n_u8(8); |
65 const uint8x16_t v_level3_threshold = vdupq_n_u8(16); | 67 const uint8x16_t v_level3_threshold = vdupq_n_u8(16); |
66 int64x2_t v_sum_diff_total = vdupq_n_s64(0); | 68 int64x2_t v_sum_diff_total = vdupq_n_s64(0); |
67 | 69 |
68 /* Go over lines. */ | 70 /* Go over lines. */ |
69 int i; | 71 int r; |
70 for (i = 0; i < 16; ++i) { | 72 for (r = 0; r < 16; ++r) { |
71 /* Load inputs. */ | 73 /* Load inputs. */ |
72 const uint8x16_t v_sig = vld1q_u8(sig); | 74 const uint8x16_t v_sig = vld1q_u8(sig); |
73 const uint8x16_t v_mc_running_avg_y = vld1q_u8(mc_running_avg_y); | 75 const uint8x16_t v_mc_running_avg_y = vld1q_u8(mc_running_avg_y); |
74 | 76 |
75 /* Calculate absolute difference and sign masks. */ | 77 /* Calculate absolute difference and sign masks. */ |
76 const uint8x16_t v_abs_diff = vabdq_u8(v_sig, v_mc_running_avg_y); | 78 const uint8x16_t v_abs_diff = vabdq_u8(v_sig, v_mc_running_avg_y); |
77 const uint8x16_t v_diff_pos_mask = vcltq_u8(v_sig, v_mc_running_avg_y); | 79 const uint8x16_t v_diff_pos_mask = vcltq_u8(v_sig, v_mc_running_avg_y); |
78 const uint8x16_t v_diff_neg_mask = vcgtq_u8(v_sig, v_mc_running_avg_y); | 80 const uint8x16_t v_diff_neg_mask = vcgtq_u8(v_sig, v_mc_running_avg_y); |
79 | 81 |
80 /* Figure out which level that put us in. */ | 82 /* Figure out which level that put us in. */ |
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
136 } | 138 } |
137 | 139 |
138 /* Update pointers for next iteration. */ | 140 /* Update pointers for next iteration. */ |
139 sig += sig_stride; | 141 sig += sig_stride; |
140 mc_running_avg_y += mc_running_avg_y_stride; | 142 mc_running_avg_y += mc_running_avg_y_stride; |
141 running_avg_y += running_avg_y_stride; | 143 running_avg_y += running_avg_y_stride; |
142 } | 144 } |
143 | 145 |
144 /* Too much adjustments => copy block. */ | 146 /* Too much adjustments => copy block. */ |
145 { | 147 { |
146 const int64x1_t x = vqadd_s64(vget_high_s64(v_sum_diff_total), | 148 int64x1_t x = vqadd_s64(vget_high_s64(v_sum_diff_total), |
147 vget_low_s64(v_sum_diff_total)); | 149 vget_low_s64(v_sum_diff_total)); |
148 const int s0 = vget_lane_s32(vabs_s32(vreinterpret_s32_s64(x)), 0); | 150 int sum_diff = vget_lane_s32(vabs_s32(vreinterpret_s32_s64(x)), 0); |
| 151 int sum_diff_thresh = SUM_DIFF_THRESHOLD; |
149 | 152 |
150 if (s0 > SUM_DIFF_THRESHOLD) | 153 if (increase_denoising) sum_diff_thresh = SUM_DIFF_THRESHOLD_HIGH; |
| 154 if (sum_diff > sum_diff_thresh) { |
| 155 // Before returning to copy the block (i.e., apply no denoising), |
| 156 // checK if we can still apply some (weaker) temporal filtering to |
| 157 // this block, that would otherwise not be denoised at all. Simplest |
| 158 // is to apply an additional adjustment to running_avg_y to bring it |
| 159 // closer to sig. The adjustment is capped by a maximum delta, and |
| 160 // chosen such that in most cases the resulting sum_diff will be |
| 161 // within the accceptable range given by sum_diff_thresh. |
| 162 |
| 163 // The delta is set by the excess of absolute pixel diff over the |
| 164 // threshold. |
| 165 int delta = ((sum_diff - sum_diff_thresh) >> 8) + 1; |
| 166 // Only apply the adjustment for max delta up to 3. |
| 167 if (delta < 4) { |
| 168 const uint8x16_t k_delta = vmovq_n_u8(delta); |
| 169 sig -= sig_stride * 16; |
| 170 mc_running_avg_y -= mc_running_avg_y_stride * 16; |
| 171 running_avg_y -= running_avg_y_stride * 16; |
| 172 for (r = 0; r < 16; ++r) { |
| 173 uint8x16_t v_running_avg_y = vld1q_u8(running_avg_y); |
| 174 const uint8x16_t v_sig = vld1q_u8(sig); |
| 175 const uint8x16_t v_mc_running_avg_y = vld1q_u8(mc_running_avg_y); |
| 176 |
| 177 /* Calculate absolute difference and sign masks. */ |
| 178 const uint8x16_t v_abs_diff = vabdq_u8(v_sig, |
| 179 v_mc_running_avg_y); |
| 180 const uint8x16_t v_diff_pos_mask = vcltq_u8(v_sig, |
| 181 v_mc_running_avg_y); |
| 182 const uint8x16_t v_diff_neg_mask = vcgtq_u8(v_sig, |
| 183 v_mc_running_avg_y); |
| 184 // Clamp absolute difference to delta to get the adjustment. |
| 185 const uint8x16_t v_abs_adjustment = |
| 186 vminq_u8(v_abs_diff, (k_delta)); |
| 187 |
| 188 const uint8x16_t v_pos_adjustment = vandq_u8(v_diff_pos_mask, |
| 189 v_abs_adjustment); |
| 190 const uint8x16_t v_neg_adjustment = vandq_u8(v_diff_neg_mask, |
| 191 v_abs_adjustment); |
| 192 |
| 193 v_running_avg_y = vqsubq_u8(v_running_avg_y, v_pos_adjustment); |
| 194 v_running_avg_y = vqaddq_u8(v_running_avg_y, v_neg_adjustment); |
| 195 |
| 196 /* Store results. */ |
| 197 vst1q_u8(running_avg_y, v_running_avg_y); |
| 198 |
| 199 { |
| 200 const int8x16_t v_sum_diff = |
| 201 vqsubq_s8(vreinterpretq_s8_u8(v_neg_adjustment), |
| 202 vreinterpretq_s8_u8(v_pos_adjustment)); |
| 203 |
| 204 const int16x8_t fe_dc_ba_98_76_54_32_10 = |
| 205 vpaddlq_s8(v_sum_diff); |
| 206 const int32x4_t fedc_ba98_7654_3210 = |
| 207 vpaddlq_s16(fe_dc_ba_98_76_54_32_10); |
| 208 const int64x2_t fedcba98_76543210 = |
| 209 vpaddlq_s32(fedc_ba98_7654_3210); |
| 210 |
| 211 v_sum_diff_total = vqaddq_s64(v_sum_diff_total, |
| 212 fedcba98_76543210); |
| 213 } |
| 214 /* Update pointers for next iteration. */ |
| 215 sig += sig_stride; |
| 216 mc_running_avg_y += mc_running_avg_y_stride; |
| 217 running_avg_y += running_avg_y_stride; |
| 218 } |
| 219 { |
| 220 // Update the sum of all pixel differences of this MB. |
| 221 x = vqadd_s64(vget_high_s64(v_sum_diff_total), |
| 222 vget_low_s64(v_sum_diff_total)); |
| 223 sum_diff = vget_lane_s32(vabs_s32(vreinterpret_s32_s64(x)), 0); |
| 224 |
| 225 if (sum_diff > sum_diff_thresh) { |
| 226 return COPY_BLOCK; |
| 227 } |
| 228 } |
| 229 } else { |
151 return COPY_BLOCK; | 230 return COPY_BLOCK; |
| 231 } |
| 232 } |
152 } | 233 } |
153 | 234 |
154 /* Tell above level that block was filtered. */ | 235 /* Tell above level that block was filtered. */ |
155 running_avg_y -= running_avg_y_stride * 16; | 236 running_avg_y -= running_avg_y_stride * 16; |
156 sig -= sig_stride * 16; | 237 sig -= sig_stride * 16; |
157 | 238 |
158 vp8_copy_mem16x16(running_avg_y, running_avg_y_stride, sig, sig_stride); | 239 vp8_copy_mem16x16(running_avg_y, running_avg_y_stride, sig, sig_stride); |
159 | 240 |
160 return FILTER_BLOCK; | 241 return FILTER_BLOCK; |
161 } | 242 } |
OLD | NEW |