source/libvpx/vp9/common/vp9_mfqe.c - Issue 812033011: libvpx: Pull from upstream

Side by Side Diff: source/libvpx/vp9/common/vp9_mfqe.c

Issue 812033011: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/

Patch Set: Created 5 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 /*

	2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved.

	3 *

	4 * Use of this source code is governed by a BSD-style license

	5 * that can be found in the LICENSE file in the root of the source

	6 * tree. An additional intellectual property rights grant can be found

	7 * in the file PATENTS. All contributing project authors may

	8 * be found in the AUTHORS file in the root of the source tree.

	9 */

	10

	11 #include "./vpx_config.h"

	12 #include "./vpx_scale_rtcd.h"

	13 #include "./vp9_rtcd.h"

	14

	15 #include "vp9/common/vp9_onyxc_int.h"

	16 #include "vp9/common/vp9_postproc.h"

	17

	18 // TODO(jackychen): Replace this function with SSE2 code. There is

	19 // one SSE2 implementation in vp8, so will consider how to share it

	20 // between vp8 and vp9.

	21 static void filter_by_weight(const uint8_t *src, int src_stride,

	22 uint8_t *dst, int dst_stride,

	23 int block_size, int src_weight) {

	24 const int dst_weight = (1 << MFQE_PRECISION) - src_weight;

	25 const int rounding_bit = 1 << (MFQE_PRECISION - 1);

	26 int r, c;

	27

	28 for (r = 0; r < block_size; r++) {

	29 for (c = 0; c < block_size; c++) {

	30 dst[c] = (src[c] * src_weight + dst[c] * dst_weight + rounding_bit)

	31 >> MFQE_PRECISION;

	32 }

	33 src += src_stride;

	34 dst += dst_stride;

	35 }

	36 }

	37

	38 static void filter_by_weight32x32(const uint8_t *src, int src_stride,

	39 uint8_t *dst, int dst_stride, int weight) {

	40 filter_by_weight(src, src_stride, dst, dst_stride, 16, weight);

	41 filter_by_weight(src + 16, src_stride, dst + 16, dst_stride, 16, weight);

	42 filter_by_weight(src + src_stride * 16, src_stride, dst + dst_stride * 16,

	43 dst_stride, 16, weight);

	44 filter_by_weight(src + src_stride * 16 + 16, src_stride,

	45 dst + dst_stride * 16 + 16, dst_stride, 16, weight);

	46 }

	47

	48 static void filter_by_weight64x64(const uint8_t *src, int src_stride,

	49 uint8_t *dst, int dst_stride, int weight) {

	50 filter_by_weight32x32(src, src_stride, dst, dst_stride, weight);

	51 filter_by_weight32x32(src + 32, src_stride, dst + 32,

	52 dst_stride, weight);

	53 filter_by_weight32x32(src + src_stride * 32, src_stride,

	54 dst + dst_stride * 32, dst_stride, weight);

	55 filter_by_weight32x32(src + src_stride * 32 + 32, src_stride,

	56 dst + dst_stride * 32 + 32, dst_stride, weight);

	57 }

	58

	59 static void apply_ifactor(const uint8_t y, int y_stride, uint8_t yd,

	60 int yd_stride, const uint8_t u, const uint8_t v,

	61 int uv_stride, uint8_t ud, uint8_t vd,

	62 int uvd_stride, BLOCK_SIZE block_size,

	63 int weight) {

	64 if (block_size == BLOCK_16X16) {

	65 filter_by_weight(y, y_stride, yd, yd_stride, 16, weight);

	66 filter_by_weight(u, uv_stride, ud, uvd_stride, 8, weight);

	67 filter_by_weight(v, uv_stride, vd, uvd_stride, 8, weight);

	68 } else if (block_size == BLOCK_32X32) {

	69 filter_by_weight32x32(y, y_stride, yd, yd_stride, weight);

	70 filter_by_weight(u, uv_stride, ud, uvd_stride, 16, weight);

	71 filter_by_weight(v, uv_stride, vd, uvd_stride, 16, weight);

	72 } else if (block_size == BLOCK_64X64) {

	73 filter_by_weight64x64(y, y_stride, yd, yd_stride, weight);

	74 filter_by_weight32x32(u, uv_stride, ud, uvd_stride, weight);

	75 filter_by_weight32x32(v, uv_stride, vd, uvd_stride, weight);

	76 }

	77 }

	78

	79 // TODO(jackychen): Determine whether replace it with assembly code.

	80 static void copy_mem8x8(const uint8_t *src, int src_stride,

	81 uint8_t *dst, int dst_stride) {

	82 int r;

	83 for (r = 0; r < 8; r++) {

	84 memcpy(dst, src, 8);

	85 src += src_stride;

	86 dst += dst_stride;

	87 }

	88 }

	89

	90 static void copy_mem16x16(const uint8_t *src, int src_stride,

	91 uint8_t *dst, int dst_stride) {

	92 int r;

	93 for (r = 0; r < 16; r++) {

	94 memcpy(dst, src, 16);

	95 src += src_stride;

	96 dst += dst_stride;

	97 }

	98 }

	99

	100 static void copy_mem32x32(const uint8_t *src, int src_stride,

	101 uint8_t *dst, int dst_stride) {

	102 copy_mem16x16(src, src_stride, dst, dst_stride);

	103 copy_mem16x16(src + 16, src_stride, dst + 16, dst_stride);

	104 copy_mem16x16(src + src_stride * 16, src_stride,

	105 dst + dst_stride * 16, dst_stride);

	106 copy_mem16x16(src + src_stride * 16 + 16, src_stride,

	107 dst + dst_stride * 16 + 16, dst_stride);

	108 }

	109

	110 void copy_mem64x64(const uint8_t *src, int src_stride,

	111 uint8_t *dst, int dst_stride) {

	112 copy_mem32x32(src, src_stride, dst, dst_stride);

	113 copy_mem32x32(src + 32, src_stride, dst + 32, dst_stride);

	114 copy_mem32x32(src + src_stride * 32, src_stride,

	115 dst + src_stride * 32, dst_stride);

	116 copy_mem32x32(src + src_stride * 32 + 32, src_stride,

	117 dst + src_stride * 32 + 32, dst_stride);

	118 }

	119

	120 static void copy_block(const uint8_t y, const uint8_t u, const uint8_t *v,

	121 int y_stride, int uv_stride, uint8_t yd, uint8_t ud,

	122 uint8_t *vd, int yd_stride, int uvd_stride,

	123 BLOCK_SIZE bs) {

	124 if (bs == BLOCK_16X16) {

	125 copy_mem16x16(y, y_stride, yd, yd_stride);

	126 copy_mem8x8(u, uv_stride, ud, uvd_stride);

	127 copy_mem8x8(v, uv_stride, vd, uvd_stride);

	128 } else if (bs == BLOCK_32X32) {

	129 copy_mem32x32(y, y_stride, yd, yd_stride);

	130 copy_mem16x16(u, uv_stride, ud, uvd_stride);

	131 copy_mem16x16(v, uv_stride, vd, uvd_stride);

	132 } else {

	133 copy_mem64x64(y, y_stride, yd, yd_stride);

	134 copy_mem32x32(u, uv_stride, ud, uvd_stride);

	135 copy_mem32x32(v, uv_stride, vd, uvd_stride);

	136 }

	137 }

	138

	139 static void get_thr(BLOCK_SIZE bs, int qdiff, int sad_thr, int vdiff_thr) {

	140 const int adj = qdiff >> MFQE_PRECISION;

	141 if (bs == BLOCK_16X16) {

	142 *sad_thr = 7 + adj;

	143 } else if (bs == BLOCK_32X32) {

	144 *sad_thr = 6 + adj;

	145 } else { // BLOCK_64X64

	146 *sad_thr = 5 + adj;

	147 }

	148 *vdiff_thr = 125 + qdiff;

	149 }

	150

	151 static void mfqe_block(BLOCK_SIZE bs, const uint8_t y, const uint8_t u,

	152 const uint8_t *v, int y_stride, int uv_stride,

	153 uint8_t yd, uint8_t ud, uint8_t *vd, int yd_stride,

	154 int uvd_stride, int qdiff) {

	155 int sad, sad_thr, vdiff, vdiff_thr;

	156 uint32_t sse;

	157

	158 get_thr(bs, qdiff, &sad_thr, &vdiff_thr);

	159

	160 if (bs == BLOCK_16X16) {

	161 vdiff = (vp9_variance16x16(y, y_stride, yd, yd_stride, &sse) + 128) >> 8;

	162 sad = (vp9_sad16x16(y, y_stride, yd, yd_stride) + 128) >> 8;

	163 } else if (bs == BLOCK_32X32) {

	164 vdiff = (vp9_variance32x32(y, y_stride, yd, yd_stride, &sse) + 512) >> 10;

	165 sad = (vp9_sad32x32(y, y_stride, yd, yd_stride) + 512) >> 10;

	166 } else /* if (bs == BLOCK_64X64) */ {

	167 vdiff = (vp9_variance64x64(y, y_stride, yd, yd_stride, &sse) + 2048) >> 12;

	168 sad = (vp9_sad64x64(y, y_stride, yd, yd_stride) + 2048) >> 12;

	169 }

	170

	171 // vdiff > sad * 3 means vdiff should not be too small, otherwise,

	172 // it might be a lighting change in smooth area. When there is a

	173 // lighting change in smooth area, it is dangerous to do MFQE.

	174 if (sad > 1 && vdiff > sad * 3) {

	175 const int weight = 1 << MFQE_PRECISION;

	176 int ifactor = weight * sad * vdiff / (sad_thr * vdiff_thr);

	177 // When ifactor equals weight, no MFQE is done.

	178 if (ifactor > weight) {

	179 ifactor = weight;

	180 }

	181 apply_ifactor(y, y_stride, yd, yd_stride, u, v, uv_stride, ud, vd,

	182 uvd_stride, bs, ifactor);

	183 } else {

	184 // Copy the block from current frame (i.e., no mfqe is done).

	185 copy_block(y, u, v, y_stride, uv_stride, yd, ud, vd,

	186 yd_stride, uvd_stride, bs);

	187 }

	188 }

	189

	190 static int mfqe_decision(MODE_INFO *mi, BLOCK_SIZE cur_bs) {

	191 // Check the motion in current block(for inter frame),

	192 // or check the motion in the correlated block in last frame (for keyframe).

	193 const int mv_len_square = mi->mbmi.mv[0].as_mv.row *

	194 mi->mbmi.mv[0].as_mv.row +

	195 mi->mbmi.mv[0].as_mv.col *

	196 mi->mbmi.mv[0].as_mv.col;

	197 const int mv_threshold = 100;

	198 return mi->mbmi.mode >= NEARESTMV && // Not an intra block

	199 cur_bs >= BLOCK_16X16 &&

	200 mv_len_square <= mv_threshold;

	201 }

	202

	203 // Process each partiton in a super block, recursively.

	204 static void mfqe_partition(VP9_COMMON cm, MODE_INFO mi, BLOCK_SIZE bs,

	205 const uint8_t y, const uint8_t u,

	206 const uint8_t *v, int y_stride, int uv_stride,

	207 uint8_t yd, uint8_t ud, uint8_t *vd,

	208 int yd_stride, int uvd_stride) {

	209 int mi_offset, y_offset, uv_offset;

	210 const BLOCK_SIZE cur_bs = mi->mbmi.sb_type;

	211 const int qdiff = cm->base_qindex - cm->postproc_state.last_base_qindex;

	212 const int bsl = b_width_log2_lookup[bs];

	213 PARTITION_TYPE partition = partition_lookup[bsl][cur_bs];

	214 const BLOCK_SIZE subsize = get_subsize(bs, partition);

	215

	216 if (cur_bs < BLOCK_8X8) {

	217 // If there are blocks smaller than 8x8, it must be on the boundary.

	218 return;

	219 }

	220 // No MFQE on blocks smaller than 16x16

	221 if (bs == BLOCK_16X16) {

	222 partition = PARTITION_NONE;

	223 }

	224 if (bs == BLOCK_64X64) {

	225 mi_offset = 4;

	226 y_offset = 32;

	227 uv_offset = 16;

	228 } else {

	229 mi_offset = 2;

	230 y_offset = 16;

	231 uv_offset = 8;

	232 }

	233 switch (partition) {

	234 BLOCK_SIZE mfqe_bs, bs_tmp;

	235 case PARTITION_HORZ:

	236 if (bs == BLOCK_64X64) {

	237 mfqe_bs = BLOCK_64X32;

	238 bs_tmp = BLOCK_32X32;

	239 } else {

	240 mfqe_bs = BLOCK_32X16;

	241 bs_tmp = BLOCK_16X16;

	242 }

	243 if (mfqe_decision(mi, mfqe_bs)) {

	244 // Do mfqe on the first square partition.

	245 mfqe_block(bs_tmp, y, u, v, y_stride, uv_stride,

	246 yd, ud, vd, yd_stride, uvd_stride, qdiff);

	247 // Do mfqe on the second square partition.

	248 mfqe_block(bs_tmp, y + y_offset, u + uv_offset, v + uv_offset,

	249 y_stride, uv_stride, yd + y_offset, ud + uv_offset,

	250 vd + uv_offset, yd_stride, uvd_stride, qdiff);

	251 }

	252 if (mfqe_decision(mi + mi_offset * cm->mi_stride, mfqe_bs)) {

	253 // Do mfqe on the first square partition.

	254 mfqe_block(bs_tmp, y + y_offset * y_stride, u + uv_offset * uv_stride,

	255 v + uv_offset * uv_stride, y_stride, uv_stride,

	256 yd + y_offset * yd_stride, ud + uv_offset * uvd_stride,

	257 vd + uv_offset * uvd_stride, yd_stride, uvd_stride, qdiff);

	258 // Do mfqe on the second square partition.

	259 mfqe_block(bs_tmp, y + y_offset * y_stride + y_offset,

	260 u + uv_offset * uv_stride + uv_offset,

	261 v + uv_offset * uv_stride + uv_offset, y_stride,

	262 uv_stride, yd + y_offset * yd_stride + y_offset,

	263 ud + uv_offset * uvd_stride + uv_offset,

	264 vd + uv_offset * uvd_stride + uv_offset,

	265 yd_stride, uvd_stride, qdiff);

	266 }

	267 break;

	268 case PARTITION_VERT:

	269 if (bs == BLOCK_64X64) {

	270 mfqe_bs = BLOCK_32X64;

	271 bs_tmp = BLOCK_32X32;

	272 } else {

	273 mfqe_bs = BLOCK_16X32;

	274 bs_tmp = BLOCK_16X16;

	275 }

	276 if (mfqe_decision(mi, mfqe_bs)) {

	277 // Do mfqe on the first square partition.

	278 mfqe_block(bs_tmp, y, u, v, y_stride, uv_stride,

	279 yd, ud, vd, yd_stride, uvd_stride, qdiff);

	280 // Do mfqe on the second square partition.

	281 mfqe_block(bs_tmp, y + y_offset * y_stride, u + uv_offset * uv_stride,

	282 v + uv_offset * uv_stride, y_stride, uv_stride,

	283 yd + y_offset * yd_stride, ud + uv_offset * uvd_stride,

	284 vd + uv_offset * uvd_stride, yd_stride, uvd_stride, qdiff);

	285 }

	286 if (mfqe_decision(mi + mi_offset, mfqe_bs)) {

	287 // Do mfqe on the first square partition.

	288 mfqe_block(bs_tmp, y + y_offset, u + uv_offset, v + uv_offset,

	289 y_stride, uv_stride, yd + y_offset, ud + uv_offset,

	290 vd + uv_offset, yd_stride, uvd_stride, qdiff);

	291 // Do mfqe on the second square partition.

	292 mfqe_block(bs_tmp, y + y_offset * y_stride + y_offset,

	293 u + uv_offset * uv_stride + uv_offset,

	294 v + uv_offset * uv_stride + uv_offset, y_stride,

	295 uv_stride, yd + y_offset * yd_stride + y_offset,

	296 ud + uv_offset * uvd_stride + uv_offset,

	297 vd + uv_offset * uvd_stride + uv_offset,

	298 yd_stride, uvd_stride, qdiff);

	299 }

	300 break;

	301 case PARTITION_NONE:

	302 if (mfqe_decision(mi, cur_bs)) {

	303 // Do mfqe on this partition.

	304 mfqe_block(cur_bs, y, u, v, y_stride, uv_stride,

	305 yd, ud, vd, yd_stride, uvd_stride, qdiff);

	306 } else {

	307 // Copy the block from current frame(i.e., no mfqe is done).

	308 copy_block(y, u, v, y_stride, uv_stride, yd, ud, vd,

	309 yd_stride, uvd_stride, bs);

	310 }

	311 break;

	312 case PARTITION_SPLIT:

	313 // Recursion on four square partitions, e.g. if bs is 64X64,

	314 // then look into four 32X32 blocks in it.

	315 mfqe_partition(cm, mi, subsize, y, u, v, y_stride, uv_stride, yd, ud, vd,

	316 yd_stride, uvd_stride);

	317 mfqe_partition(cm, mi + mi_offset, subsize, y + y_offset, u + uv_offset,

	318 v + uv_offset, y_stride, uv_stride, yd + y_offset,

	319 ud + uv_offset, vd + uv_offset, yd_stride, uvd_stride);

	320 mfqe_partition(cm, mi + mi_offset * cm->mi_stride, subsize,

	321 y + y_offset * y_stride, u + uv_offset * uv_stride,

	322 v + uv_offset * uv_stride, y_stride, uv_stride,

	323 yd + y_offset * yd_stride, ud + uv_offset * uvd_stride,

	324 vd + uv_offset * uvd_stride, yd_stride, uvd_stride);

	325 mfqe_partition(cm, mi + mi_offset * cm->mi_stride + mi_offset,

	326 subsize, y + y_offset * y_stride + y_offset,

	327 u + uv_offset * uv_stride + uv_offset,

	328 v + uv_offset * uv_stride + uv_offset, y_stride,

	329 uv_stride, yd + y_offset * yd_stride + y_offset,

	330 ud + uv_offset * uvd_stride + uv_offset,

	331 vd + uv_offset * uvd_stride + uv_offset,

	332 yd_stride, uvd_stride);

	333 break;

	334 default:

	335 assert(0);

	336 }

	337 }

	338

	339 void vp9_mfqe(VP9_COMMON *cm) {

	340 int mi_row, mi_col;

	341 // Current decoded frame.

	342 const YV12_BUFFER_CONFIG *show = cm->frame_to_show;

	343 // Last decoded frame and will store the MFQE result.

	344 YV12_BUFFER_CONFIG *dest = &cm->post_proc_buffer;

	345 // Loop through each super block.

	346 for (mi_row = 0; mi_row < cm->mi_rows; mi_row += MI_BLOCK_SIZE) {

	347 for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) {

	348 MODE_INFO *mi;

	349 MODE_INFO mi_local = cm->mi + (mi_row cm->mi_stride + mi_col);

	350 // Motion Info in last frame.

	351 MODE_INFO *mi_prev = cm->postproc_state.prev_mi +

	352 (mi_row * cm->mi_stride + mi_col);

	353 const uint32_t y_stride = show->y_stride;

	354 const uint32_t uv_stride = show->uv_stride;

	355 const uint32_t yd_stride = dest->y_stride;

	356 const uint32_t uvd_stride = dest->uv_stride;

	357 const uint32_t row_offset_y = mi_row << 3;

	358 const uint32_t row_offset_uv = mi_row << 2;

	359 const uint32_t col_offset_y = mi_col << 3;

	360 const uint32_t col_offset_uv = mi_col << 2;

	361 const uint8_t y = show->y_buffer + row_offset_y y_stride +

	362 col_offset_y;

	363 const uint8_t u = show->u_buffer + row_offset_uv uv_stride +

	364 col_offset_uv;

	365 const uint8_t v = show->v_buffer + row_offset_uv uv_stride +

	366 col_offset_uv;

	367 uint8_t yd = dest->y_buffer + row_offset_y yd_stride + col_offset_y;

	368 uint8_t ud = dest->u_buffer + row_offset_uv uvd_stride +

	369 col_offset_uv;

	370 uint8_t vd = dest->v_buffer + row_offset_uv uvd_stride +

	371 col_offset_uv;

	372 if (frame_is_intra_only(cm)) {

	373 mi = mi_prev;

	374 } else {

	375 mi = mi_local;

	376 }

	377 mfqe_partition(cm, mi, BLOCK_64X64, y, u, v, y_stride, uv_stride, yd, ud,

	378 vd, yd_stride, uvd_stride);

	379 }

	380 }

	381 }

OLD	NEW

« no previous file with comments | « source/libvpx/vp9/common/vp9_mfqe.h ('k') | source/libvpx/vp9/common/vp9_onyxc_int.h » ('j') | no next file with comments »