source/libvpx/vp8/encoder/arm/neon/denoising_neon.c - Issue 341293003: libvpx: Pull from upstream

Side by Side Diff: source/libvpx/vp8/encoder/arm/neon/denoising_neon.c

Issue 341293003: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/

Patch Set: Created 6 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved.	2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

(...skipping 38 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
49 int mc_running_avg_y_stride,	49 int mc_running_avg_y_stride,

50 unsigned char *running_avg_y,	50 unsigned char *running_avg_y,

51 int running_avg_y_stride,	51 int running_avg_y_stride,

52 unsigned char *sig, int sig_stride,	52 unsigned char *sig, int sig_stride,

53 unsigned int motion_magnitude,	53 unsigned int motion_magnitude,

54 int increase_denoising) {	54 int increase_denoising) {

55 /* If motion_magnitude is small, making the denoiser more aggressive by	55 /* If motion_magnitude is small, making the denoiser more aggressive by

56 * increasing the adjustment for each level, level1 adjustment is	56 * increasing the adjustment for each level, level1 adjustment is

57 * increased, the deltas stay the same.	57 * increased, the deltas stay the same.

58 */	58 */

59 const uint8x16_t v_level1_adjustment = vdupq_n_u8(	59 int shift_inc = (increase_denoising &&

60 (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? 4 : 3);	60 motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? 1 : 0;

	61 const uint8x16_t v_level1_adjustment = vmovq_n_u8(

	62 (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? 4 + shift_inc : 3);

61 const uint8x16_t v_delta_level_1_and_2 = vdupq_n_u8(1);	63 const uint8x16_t v_delta_level_1_and_2 = vdupq_n_u8(1);

62 const uint8x16_t v_delta_level_2_and_3 = vdupq_n_u8(2);	64 const uint8x16_t v_delta_level_2_and_3 = vdupq_n_u8(2);

63 const uint8x16_t v_level1_threshold = vdupq_n_u8(4);	65 const uint8x16_t v_level1_threshold = vmovq_n_u8(4 + shift_inc);

64 const uint8x16_t v_level2_threshold = vdupq_n_u8(8);	66 const uint8x16_t v_level2_threshold = vdupq_n_u8(8);

65 const uint8x16_t v_level3_threshold = vdupq_n_u8(16);	67 const uint8x16_t v_level3_threshold = vdupq_n_u8(16);

66 int64x2_t v_sum_diff_total = vdupq_n_s64(0);	68 int64x2_t v_sum_diff_total = vdupq_n_s64(0);

67	69

68 /* Go over lines. */	70 /* Go over lines. */

69 int i;	71 int r;

70 for (i = 0; i < 16; ++i) {	72 for (r = 0; r < 16; ++r) {

71 /* Load inputs. */	73 /* Load inputs. */

72 const uint8x16_t v_sig = vld1q_u8(sig);	74 const uint8x16_t v_sig = vld1q_u8(sig);

73 const uint8x16_t v_mc_running_avg_y = vld1q_u8(mc_running_avg_y);	75 const uint8x16_t v_mc_running_avg_y = vld1q_u8(mc_running_avg_y);

74	76

75 /* Calculate absolute difference and sign masks. */	77 /* Calculate absolute difference and sign masks. */

76 const uint8x16_t v_abs_diff = vabdq_u8(v_sig, v_mc_running_avg_y);	78 const uint8x16_t v_abs_diff = vabdq_u8(v_sig, v_mc_running_avg_y);

77 const uint8x16_t v_diff_pos_mask = vcltq_u8(v_sig, v_mc_running_avg_y);	79 const uint8x16_t v_diff_pos_mask = vcltq_u8(v_sig, v_mc_running_avg_y);

78 const uint8x16_t v_diff_neg_mask = vcgtq_u8(v_sig, v_mc_running_avg_y);	80 const uint8x16_t v_diff_neg_mask = vcgtq_u8(v_sig, v_mc_running_avg_y);

79	81

80 /* Figure out which level that put us in. */	82 /* Figure out which level that put us in. */

(...skipping 55 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
136 }	138 }

137	139

138 /* Update pointers for next iteration. */	140 /* Update pointers for next iteration. */

139 sig += sig_stride;	141 sig += sig_stride;

140 mc_running_avg_y += mc_running_avg_y_stride;	142 mc_running_avg_y += mc_running_avg_y_stride;

141 running_avg_y += running_avg_y_stride;	143 running_avg_y += running_avg_y_stride;

142 }	144 }

143	145

144 /* Too much adjustments => copy block. */	146 /* Too much adjustments => copy block. */

145 {	147 {

146 const int64x1_t x = vqadd_s64(vget_high_s64(v_sum_diff_total),	148 int64x1_t x = vqadd_s64(vget_high_s64(v_sum_diff_total),

147 vget_low_s64(v_sum_diff_total));	149 vget_low_s64(v_sum_diff_total));

148 const int s0 = vget_lane_s32(vabs_s32(vreinterpret_s32_s64(x)), 0);	150 int sum_diff = vget_lane_s32(vabs_s32(vreinterpret_s32_s64(x)), 0);

	151 int sum_diff_thresh = SUM_DIFF_THRESHOLD;

149	152

150 if (s0 > SUM_DIFF_THRESHOLD)	153 if (increase_denoising) sum_diff_thresh = SUM_DIFF_THRESHOLD_HIGH;

	154 if (sum_diff > sum_diff_thresh) {

	155 // Before returning to copy the block (i.e., apply no denoising),

	156 // checK if we can still apply some (weaker) temporal filtering to

	157 // this block, that would otherwise not be denoised at all. Simplest

	158 // is to apply an additional adjustment to running_avg_y to bring it

	159 // closer to sig. The adjustment is capped by a maximum delta, and

	160 // chosen such that in most cases the resulting sum_diff will be

	161 // within the accceptable range given by sum_diff_thresh.

	162

	163 // The delta is set by the excess of absolute pixel diff over the

	164 // threshold.

	165 int delta = ((sum_diff - sum_diff_thresh) >> 8) + 1;

	166 // Only apply the adjustment for max delta up to 3.

	167 if (delta < 4) {

	168 const uint8x16_t k_delta = vmovq_n_u8(delta);

	169 sig -= sig_stride * 16;

	170 mc_running_avg_y -= mc_running_avg_y_stride * 16;

	171 running_avg_y -= running_avg_y_stride * 16;

	172 for (r = 0; r < 16; ++r) {

	173 uint8x16_t v_running_avg_y = vld1q_u8(running_avg_y);

	174 const uint8x16_t v_sig = vld1q_u8(sig);

	175 const uint8x16_t v_mc_running_avg_y = vld1q_u8(mc_running_avg_y);

	176

	177 /* Calculate absolute difference and sign masks. */

	178 const uint8x16_t v_abs_diff = vabdq_u8(v_sig,

	179 v_mc_running_avg_y);

	180 const uint8x16_t v_diff_pos_mask = vcltq_u8(v_sig,

	181 v_mc_running_avg_y);

	182 const uint8x16_t v_diff_neg_mask = vcgtq_u8(v_sig,

	183 v_mc_running_avg_y);

	184 // Clamp absolute difference to delta to get the adjustment.

	185 const uint8x16_t v_abs_adjustment =

	186 vminq_u8(v_abs_diff, (k_delta));

	187

	188 const uint8x16_t v_pos_adjustment = vandq_u8(v_diff_pos_mask,

	189 v_abs_adjustment);

	190 const uint8x16_t v_neg_adjustment = vandq_u8(v_diff_neg_mask,

	191 v_abs_adjustment);

	192

	193 v_running_avg_y = vqsubq_u8(v_running_avg_y, v_pos_adjustment);

	194 v_running_avg_y = vqaddq_u8(v_running_avg_y, v_neg_adjustment);

	195

	196 /* Store results. */

	197 vst1q_u8(running_avg_y, v_running_avg_y);

	198

	199 {

	200 const int8x16_t v_sum_diff =

	201 vqsubq_s8(vreinterpretq_s8_u8(v_neg_adjustment),

	202 vreinterpretq_s8_u8(v_pos_adjustment));

	203

	204 const int16x8_t fe_dc_ba_98_76_54_32_10 =

	205 vpaddlq_s8(v_sum_diff);

	206 const int32x4_t fedc_ba98_7654_3210 =

	207 vpaddlq_s16(fe_dc_ba_98_76_54_32_10);

	208 const int64x2_t fedcba98_76543210 =

	209 vpaddlq_s32(fedc_ba98_7654_3210);

	210

	211 v_sum_diff_total = vqaddq_s64(v_sum_diff_total,

	212 fedcba98_76543210);

	213 }

	214 /* Update pointers for next iteration. */

	215 sig += sig_stride;

	216 mc_running_avg_y += mc_running_avg_y_stride;

	217 running_avg_y += running_avg_y_stride;

	218 }

	219 {

	220 // Update the sum of all pixel differences of this MB.

	221 x = vqadd_s64(vget_high_s64(v_sum_diff_total),

	222 vget_low_s64(v_sum_diff_total));

	223 sum_diff = vget_lane_s32(vabs_s32(vreinterpret_s32_s64(x)), 0);

	224

	225 if (sum_diff > sum_diff_thresh) {

	226 return COPY_BLOCK;

	227 }

	228 }

	229 } else {

151 return COPY_BLOCK;	230 return COPY_BLOCK;

	231 }

	232 }

152 }	233 }

153	234

154 /* Tell above level that block was filtered. */	235 /* Tell above level that block was filtered. */

155 running_avg_y -= running_avg_y_stride * 16;	236 running_avg_y -= running_avg_y_stride * 16;

156 sig -= sig_stride * 16;	237 sig -= sig_stride * 16;

157	238

158 vp8_copy_mem16x16(running_avg_y, running_avg_y_stride, sig, sig_stride);	239 vp8_copy_mem16x16(running_avg_y, running_avg_y_stride, sig, sig_stride);

159	240

160 return FILTER_BLOCK;	241 return FILTER_BLOCK;

161 }	242 }

OLD	NEW

« no previous file with comments | « source/libvpx/vp8/common/x86/variance_ssse3.c ('k') | source/libvpx/vp8/encoder/bitstream.c » ('j') | no next file with comments »