source/libvpx/vp8/encoder/arm/neon/denoising_neon.c - Issue 375983002: libvpx: Pull from upstream

Side by Side Diff: source/libvpx/vp8/encoder/arm/neon/denoising_neon.c

Issue 375983002: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/

Patch Set: Created 6 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved.	2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

(...skipping 222 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
233 }	233 }

234	234

235 /* Tell above level that block was filtered. */	235 /* Tell above level that block was filtered. */

236 running_avg_y -= running_avg_y_stride * 16;	236 running_avg_y -= running_avg_y_stride * 16;

237 sig -= sig_stride * 16;	237 sig -= sig_stride * 16;

238	238

239 vp8_copy_mem16x16(running_avg_y, running_avg_y_stride, sig, sig_stride);	239 vp8_copy_mem16x16(running_avg_y, running_avg_y_stride, sig, sig_stride);

240	240

241 return FILTER_BLOCK;	241 return FILTER_BLOCK;

242 }	242 }

	243

	244 int vp8_denoiser_filter_uv_neon(unsigned char *mc_running_avg,

	245 int mc_running_avg_stride,

	246 unsigned char *running_avg,

	247 int running_avg_stride,

	248 unsigned char *sig, int sig_stride,

	249 unsigned int motion_magnitude,

	250 int increase_denoising) {

	251 /* If motion_magnitude is small, making the denoiser more aggressive by

	252 * increasing the adjustment for each level, level1 adjustment is

	253 * increased, the deltas stay the same.

	254 */

	255 int shift_inc = (increase_denoising &&

	256 motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD_UV) ? 1 : 0;

	257 const uint8x16_t v_level1_adjustment = vmovq_n_u8(

	258 (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD_UV) ? 4 + shift_inc : 3) ;

	259

	260 const uint8x16_t v_delta_level_1_and_2 = vdupq_n_u8(1);

	261 const uint8x16_t v_delta_level_2_and_3 = vdupq_n_u8(2);

	262 const uint8x16_t v_level1_threshold = vmovq_n_u8(4 + shift_inc);

	263 const uint8x16_t v_level2_threshold = vdupq_n_u8(8);

	264 const uint8x16_t v_level3_threshold = vdupq_n_u8(16);

	265 int64x2_t v_sum_diff_total = vdupq_n_s64(0);

	266 int r;

	267

	268 {

	269 uint16x4_t v_sum_block = vdup_n_u16(0);

	270

	271 // Avoid denoising color signal if its close to average level.

	272 for (r = 0; r < 8; ++r) {

	273 const uint8x8_t v_sig = vld1_u8(sig);

	274 const uint16x4_t _76_54_32_10 = vpaddl_u8(v_sig);

	275 v_sum_block = vqadd_u16(v_sum_block, _76_54_32_10);

	276 sig += sig_stride;

	277 }

	278 sig -= sig_stride * 8;

	279 {

	280 const uint32x2_t _7654_3210 = vpaddl_u16(v_sum_block);

	281 const uint64x1_t _76543210 = vpaddl_u32(_7654_3210);

	282 const unsigned int sum_block =

	283 vget_lane_u32(vreinterpret_u32_u64(_76543210), 0);

	284 if (abs(sum_block - (128 * 8 * 8)) < SUM_DIFF_FROM_AVG_THRESH_UV) {

	285 return COPY_BLOCK;

	286 }

	287 }

	288 }

	289

	290 /* Go over lines. */

	291 for (r = 0; r < 4; ++r) {

	292 /* Load inputs. */

	293 const uint8x8_t v_sig_lo = vld1_u8(sig);

	294 const uint8x8_t v_sig_hi = vld1_u8(&sig[sig_stride]);

	295 const uint8x16_t v_sig = vcombine_u8(v_sig_lo, v_sig_hi);

	296 const uint8x8_t v_mc_running_avg_lo = vld1_u8(mc_running_avg);

	297 const uint8x8_t v_mc_running_avg_hi =

	298 vld1_u8(&mc_running_avg[mc_running_avg_stride]);

	299 const uint8x16_t v_mc_running_avg =

	300 vcombine_u8(v_mc_running_avg_lo, v_mc_running_avg_hi);

	301 /* Calculate absolute difference and sign masks. */

	302 const uint8x16_t v_abs_diff = vabdq_u8(v_sig, v_mc_running_avg);

	303 const uint8x16_t v_diff_pos_mask = vcltq_u8(v_sig, v_mc_running_avg);

	304 const uint8x16_t v_diff_neg_mask = vcgtq_u8(v_sig, v_mc_running_avg);

	305

	306 /* Figure out which level that put us in. */

	307 const uint8x16_t v_level1_mask = vcleq_u8(v_level1_threshold,

	308 v_abs_diff);

	309 const uint8x16_t v_level2_mask = vcleq_u8(v_level2_threshold,

	310 v_abs_diff);

	311 const uint8x16_t v_level3_mask = vcleq_u8(v_level3_threshold,

	312 v_abs_diff);

	313

	314 /* Calculate absolute adjustments for level 1, 2 and 3. */

	315 const uint8x16_t v_level2_adjustment = vandq_u8(v_level2_mask,

	316 v_delta_level_1_and_2);

	317 const uint8x16_t v_level3_adjustment = vandq_u8(v_level3_mask,

	318 v_delta_level_2_and_3);

	319 const uint8x16_t v_level1and2_adjustment = vaddq_u8(v_level1_adjustment,

	320 v_level2_adjustment);

	321 const uint8x16_t v_level1and2and3_adjustment = vaddq_u8(

	322 v_level1and2_adjustment, v_level3_adjustment);

	323

	324 /* Figure adjustment absolute value by selecting between the absolute

	325 * difference if in level0 or the value for level 1, 2 and 3.

	326 */

	327 const uint8x16_t v_abs_adjustment = vbslq_u8(v_level1_mask,

	328 v_level1and2and3_adjustment, v_abs_diff);

	329

	330 /* Calculate positive and negative adjustments. Apply them to the signal

	331 * and accumulate them. Adjustments are less than eight and the maximum

	332 * sum of them (7 * 16) can fit in a signed char.

	333 */

	334 const uint8x16_t v_pos_adjustment = vandq_u8(v_diff_pos_mask,

	335 v_abs_adjustment);

	336 const uint8x16_t v_neg_adjustment = vandq_u8(v_diff_neg_mask,

	337 v_abs_adjustment);

	338

	339 uint8x16_t v_running_avg = vqaddq_u8(v_sig, v_pos_adjustment);

	340 v_running_avg = vqsubq_u8(v_running_avg, v_neg_adjustment);

	341

	342 /* Store results. */

	343 vst1_u8(running_avg, vget_low_u8(v_running_avg));

	344 vst1_u8(&running_avg[running_avg_stride], vget_high_u8(v_running_avg));

	345

	346 /* Sum all the accumulators to have the sum of all pixel differences

	347 * for this macroblock.

	348 */

	349 {

	350 const int8x16_t v_sum_diff =

	351 vqsubq_s8(vreinterpretq_s8_u8(v_pos_adjustment),

	352 vreinterpretq_s8_u8(v_neg_adjustment));

	353

	354 const int16x8_t fe_dc_ba_98_76_54_32_10 = vpaddlq_s8(v_sum_diff);

	355

	356 const int32x4_t fedc_ba98_7654_3210 =

	357 vpaddlq_s16(fe_dc_ba_98_76_54_32_10);

	358

	359 const int64x2_t fedcba98_76543210 =

	360 vpaddlq_s32(fedc_ba98_7654_3210);

	361

	362 v_sum_diff_total = vqaddq_s64(v_sum_diff_total, fedcba98_76543210);

	363 }

	364

	365 /* Update pointers for next iteration. */

	366 sig += sig_stride * 2;

	367 mc_running_avg += mc_running_avg_stride * 2;

	368 running_avg += running_avg_stride * 2;

	369 }

	370

	371

	372 /* Too much adjustments => copy block. */

	373 {

	374 int64x1_t x = vqadd_s64(vget_high_s64(v_sum_diff_total),

	375 vget_low_s64(v_sum_diff_total));

	376 int sum_diff = vget_lane_s32(vabs_s32(vreinterpret_s32_s64(x)), 0);

	377 int sum_diff_thresh = SUM_DIFF_THRESHOLD_UV;

	378 if (increase_denoising) sum_diff_thresh = SUM_DIFF_THRESHOLD_HIGH_UV;

	379 if (sum_diff > sum_diff_thresh) {

	380 // Before returning to copy the block (i.e., apply no denoising),

	381 // checK if we can still apply some (weaker) temporal filtering to

	382 // this block, that would otherwise not be denoised at all. Simplest

	383 // is to apply an additional adjustment to running_avg_y to bring it

	384 // closer to sig. The adjustment is capped by a maximum delta, and

	385 // chosen such that in most cases the resulting sum_diff will be

	386 // within the accceptable range given by sum_diff_thresh.

	387

	388 // The delta is set by the excess of absolute pixel diff over the

	389 // threshold.

	390 int delta = ((sum_diff - sum_diff_thresh) >> 8) + 1;

	391 // Only apply the adjustment for max delta up to 3.

	392 if (delta < 4) {

	393 const uint8x16_t k_delta = vmovq_n_u8(delta);

	394 sig -= sig_stride * 8;

	395 mc_running_avg -= mc_running_avg_stride * 8;

	396 running_avg -= running_avg_stride * 8;

	397 for (r = 0; r < 4; ++r) {

	398 const uint8x8_t v_sig_lo = vld1_u8(sig);

	399 const uint8x8_t v_sig_hi = vld1_u8(&sig[sig_stride]);

	400 const uint8x16_t v_sig = vcombine_u8(v_sig_lo, v_sig_hi);

	401 const uint8x8_t v_mc_running_avg_lo = vld1_u8(mc_running_avg);

	402 const uint8x8_t v_mc_running_avg_hi =

	403 vld1_u8(&mc_running_avg[mc_running_avg_stride]);

	404 const uint8x16_t v_mc_running_avg =

	405 vcombine_u8(v_mc_running_avg_lo, v_mc_running_avg_hi);

	406 /* Calculate absolute difference and sign masks. */

	407 const uint8x16_t v_abs_diff = vabdq_u8(v_sig,

	408 v_mc_running_avg);

	409 const uint8x16_t v_diff_pos_mask = vcltq_u8(v_sig,

	410 v_mc_running_avg);

	411 const uint8x16_t v_diff_neg_mask = vcgtq_u8(v_sig,

	412 v_mc_running_avg);

	413 // Clamp absolute difference to delta to get the adjustment.

	414 const uint8x16_t v_abs_adjustment =

	415 vminq_u8(v_abs_diff, (k_delta));

	416

	417 const uint8x16_t v_pos_adjustment = vandq_u8(v_diff_pos_mask,

	418 v_abs_adjustment);

	419 const uint8x16_t v_neg_adjustment = vandq_u8(v_diff_neg_mask,

	420 v_abs_adjustment);

	421 const uint8x8_t v_running_avg_lo = vld1_u8(running_avg);

	422 const uint8x8_t v_running_avg_hi =

	423 vld1_u8(&running_avg[running_avg_stride]);

	424 uint8x16_t v_running_avg =

	425 vcombine_u8(v_running_avg_lo, v_running_avg_hi);

	426

	427 v_running_avg = vqsubq_u8(v_running_avg, v_pos_adjustment);

	428 v_running_avg = vqaddq_u8(v_running_avg, v_neg_adjustment);

	429

	430 /* Store results. */

	431 vst1_u8(running_avg, vget_low_u8(v_running_avg));

	432 vst1_u8(&running_avg[running_avg_stride],

	433 vget_high_u8(v_running_avg));

	434

	435 {

	436 const int8x16_t v_sum_diff =

	437 vqsubq_s8(vreinterpretq_s8_u8(v_neg_adjustment),

	438 vreinterpretq_s8_u8(v_pos_adjustment));

	439

	440 const int16x8_t fe_dc_ba_98_76_54_32_10 =

	441 vpaddlq_s8(v_sum_diff);

	442 const int32x4_t fedc_ba98_7654_3210 =

	443 vpaddlq_s16(fe_dc_ba_98_76_54_32_10);

	444 const int64x2_t fedcba98_76543210 =

	445 vpaddlq_s32(fedc_ba98_7654_3210);

	446

	447 v_sum_diff_total = vqaddq_s64(v_sum_diff_total,

	448 fedcba98_76543210);

	449 }

	450 /* Update pointers for next iteration. */

	451 sig += sig_stride * 2;

	452 mc_running_avg += mc_running_avg_stride * 2;

	453 running_avg += running_avg_stride * 2;

	454 }

	455 {

	456 // Update the sum of all pixel differences of this MB.

	457 x = vqadd_s64(vget_high_s64(v_sum_diff_total),

	458 vget_low_s64(v_sum_diff_total));

	459 sum_diff = vget_lane_s32(vabs_s32(vreinterpret_s32_s64(x)), 0);

	460

	461 if (sum_diff > sum_diff_thresh) {

	462 return COPY_BLOCK;

	463 }

	464 }

	465 } else {

	466 return COPY_BLOCK;

	467 }

	468 }

	469 }

	470

	471 /* Tell above level that block was filtered. */

	472 running_avg -= running_avg_stride * 8;

	473 sig -= sig_stride * 8;

	474

	475 vp8_copy_mem8x8(running_avg, running_avg_stride, sig, sig_stride);

	476

	477 return FILTER_BLOCK;

	478 }

OLD	NEW

« no previous file with comments | « source/libvpx/vp8/common/rtcd_defs.pl ('k') | source/libvpx/vp8/encoder/bitstream.h » ('j') | no next file with comments »