OLD | NEW |
(Empty) | |
| 1 /* |
| 2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. |
| 3 * |
| 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ |
| 10 |
| 11 #include "./vpx_config.h" |
| 12 #include "./vpx_scale_rtcd.h" |
| 13 #include "./vp9_rtcd.h" |
| 14 |
| 15 #include "vp9/common/vp9_onyxc_int.h" |
| 16 #include "vp9/common/vp9_postproc.h" |
| 17 |
| 18 // TODO(jackychen): Replace this function with SSE2 code. There is |
| 19 // one SSE2 implementation in vp8, so will consider how to share it |
| 20 // between vp8 and vp9. |
| 21 static void filter_by_weight(const uint8_t *src, int src_stride, |
| 22 uint8_t *dst, int dst_stride, |
| 23 int block_size, int src_weight) { |
| 24 const int dst_weight = (1 << MFQE_PRECISION) - src_weight; |
| 25 const int rounding_bit = 1 << (MFQE_PRECISION - 1); |
| 26 int r, c; |
| 27 |
| 28 for (r = 0; r < block_size; r++) { |
| 29 for (c = 0; c < block_size; c++) { |
| 30 dst[c] = (src[c] * src_weight + dst[c] * dst_weight + rounding_bit) |
| 31 >> MFQE_PRECISION; |
| 32 } |
| 33 src += src_stride; |
| 34 dst += dst_stride; |
| 35 } |
| 36 } |
| 37 |
| 38 static void filter_by_weight32x32(const uint8_t *src, int src_stride, |
| 39 uint8_t *dst, int dst_stride, int weight) { |
| 40 filter_by_weight(src, src_stride, dst, dst_stride, 16, weight); |
| 41 filter_by_weight(src + 16, src_stride, dst + 16, dst_stride, 16, weight); |
| 42 filter_by_weight(src + src_stride * 16, src_stride, dst + dst_stride * 16, |
| 43 dst_stride, 16, weight); |
| 44 filter_by_weight(src + src_stride * 16 + 16, src_stride, |
| 45 dst + dst_stride * 16 + 16, dst_stride, 16, weight); |
| 46 } |
| 47 |
| 48 static void filter_by_weight64x64(const uint8_t *src, int src_stride, |
| 49 uint8_t *dst, int dst_stride, int weight) { |
| 50 filter_by_weight32x32(src, src_stride, dst, dst_stride, weight); |
| 51 filter_by_weight32x32(src + 32, src_stride, dst + 32, |
| 52 dst_stride, weight); |
| 53 filter_by_weight32x32(src + src_stride * 32, src_stride, |
| 54 dst + dst_stride * 32, dst_stride, weight); |
| 55 filter_by_weight32x32(src + src_stride * 32 + 32, src_stride, |
| 56 dst + dst_stride * 32 + 32, dst_stride, weight); |
| 57 } |
| 58 |
| 59 static void apply_ifactor(const uint8_t *y, int y_stride, uint8_t *yd, |
| 60 int yd_stride, const uint8_t *u, const uint8_t *v, |
| 61 int uv_stride, uint8_t *ud, uint8_t *vd, |
| 62 int uvd_stride, BLOCK_SIZE block_size, |
| 63 int weight) { |
| 64 if (block_size == BLOCK_16X16) { |
| 65 filter_by_weight(y, y_stride, yd, yd_stride, 16, weight); |
| 66 filter_by_weight(u, uv_stride, ud, uvd_stride, 8, weight); |
| 67 filter_by_weight(v, uv_stride, vd, uvd_stride, 8, weight); |
| 68 } else if (block_size == BLOCK_32X32) { |
| 69 filter_by_weight32x32(y, y_stride, yd, yd_stride, weight); |
| 70 filter_by_weight(u, uv_stride, ud, uvd_stride, 16, weight); |
| 71 filter_by_weight(v, uv_stride, vd, uvd_stride, 16, weight); |
| 72 } else if (block_size == BLOCK_64X64) { |
| 73 filter_by_weight64x64(y, y_stride, yd, yd_stride, weight); |
| 74 filter_by_weight32x32(u, uv_stride, ud, uvd_stride, weight); |
| 75 filter_by_weight32x32(v, uv_stride, vd, uvd_stride, weight); |
| 76 } |
| 77 } |
| 78 |
| 79 // TODO(jackychen): Determine whether replace it with assembly code. |
| 80 static void copy_mem8x8(const uint8_t *src, int src_stride, |
| 81 uint8_t *dst, int dst_stride) { |
| 82 int r; |
| 83 for (r = 0; r < 8; r++) { |
| 84 memcpy(dst, src, 8); |
| 85 src += src_stride; |
| 86 dst += dst_stride; |
| 87 } |
| 88 } |
| 89 |
| 90 static void copy_mem16x16(const uint8_t *src, int src_stride, |
| 91 uint8_t *dst, int dst_stride) { |
| 92 int r; |
| 93 for (r = 0; r < 16; r++) { |
| 94 memcpy(dst, src, 16); |
| 95 src += src_stride; |
| 96 dst += dst_stride; |
| 97 } |
| 98 } |
| 99 |
| 100 static void copy_mem32x32(const uint8_t *src, int src_stride, |
| 101 uint8_t *dst, int dst_stride) { |
| 102 copy_mem16x16(src, src_stride, dst, dst_stride); |
| 103 copy_mem16x16(src + 16, src_stride, dst + 16, dst_stride); |
| 104 copy_mem16x16(src + src_stride * 16, src_stride, |
| 105 dst + dst_stride * 16, dst_stride); |
| 106 copy_mem16x16(src + src_stride * 16 + 16, src_stride, |
| 107 dst + dst_stride * 16 + 16, dst_stride); |
| 108 } |
| 109 |
| 110 void copy_mem64x64(const uint8_t *src, int src_stride, |
| 111 uint8_t *dst, int dst_stride) { |
| 112 copy_mem32x32(src, src_stride, dst, dst_stride); |
| 113 copy_mem32x32(src + 32, src_stride, dst + 32, dst_stride); |
| 114 copy_mem32x32(src + src_stride * 32, src_stride, |
| 115 dst + src_stride * 32, dst_stride); |
| 116 copy_mem32x32(src + src_stride * 32 + 32, src_stride, |
| 117 dst + src_stride * 32 + 32, dst_stride); |
| 118 } |
| 119 |
| 120 static void copy_block(const uint8_t *y, const uint8_t *u, const uint8_t *v, |
| 121 int y_stride, int uv_stride, uint8_t *yd, uint8_t *ud, |
| 122 uint8_t *vd, int yd_stride, int uvd_stride, |
| 123 BLOCK_SIZE bs) { |
| 124 if (bs == BLOCK_16X16) { |
| 125 copy_mem16x16(y, y_stride, yd, yd_stride); |
| 126 copy_mem8x8(u, uv_stride, ud, uvd_stride); |
| 127 copy_mem8x8(v, uv_stride, vd, uvd_stride); |
| 128 } else if (bs == BLOCK_32X32) { |
| 129 copy_mem32x32(y, y_stride, yd, yd_stride); |
| 130 copy_mem16x16(u, uv_stride, ud, uvd_stride); |
| 131 copy_mem16x16(v, uv_stride, vd, uvd_stride); |
| 132 } else { |
| 133 copy_mem64x64(y, y_stride, yd, yd_stride); |
| 134 copy_mem32x32(u, uv_stride, ud, uvd_stride); |
| 135 copy_mem32x32(v, uv_stride, vd, uvd_stride); |
| 136 } |
| 137 } |
| 138 |
| 139 static void get_thr(BLOCK_SIZE bs, int qdiff, int *sad_thr, int *vdiff_thr) { |
| 140 const int adj = qdiff >> MFQE_PRECISION; |
| 141 if (bs == BLOCK_16X16) { |
| 142 *sad_thr = 7 + adj; |
| 143 } else if (bs == BLOCK_32X32) { |
| 144 *sad_thr = 6 + adj; |
| 145 } else { // BLOCK_64X64 |
| 146 *sad_thr = 5 + adj; |
| 147 } |
| 148 *vdiff_thr = 125 + qdiff; |
| 149 } |
| 150 |
| 151 static void mfqe_block(BLOCK_SIZE bs, const uint8_t *y, const uint8_t *u, |
| 152 const uint8_t *v, int y_stride, int uv_stride, |
| 153 uint8_t *yd, uint8_t *ud, uint8_t *vd, int yd_stride, |
| 154 int uvd_stride, int qdiff) { |
| 155 int sad, sad_thr, vdiff, vdiff_thr; |
| 156 uint32_t sse; |
| 157 |
| 158 get_thr(bs, qdiff, &sad_thr, &vdiff_thr); |
| 159 |
| 160 if (bs == BLOCK_16X16) { |
| 161 vdiff = (vp9_variance16x16(y, y_stride, yd, yd_stride, &sse) + 128) >> 8; |
| 162 sad = (vp9_sad16x16(y, y_stride, yd, yd_stride) + 128) >> 8; |
| 163 } else if (bs == BLOCK_32X32) { |
| 164 vdiff = (vp9_variance32x32(y, y_stride, yd, yd_stride, &sse) + 512) >> 10; |
| 165 sad = (vp9_sad32x32(y, y_stride, yd, yd_stride) + 512) >> 10; |
| 166 } else /* if (bs == BLOCK_64X64) */ { |
| 167 vdiff = (vp9_variance64x64(y, y_stride, yd, yd_stride, &sse) + 2048) >> 12; |
| 168 sad = (vp9_sad64x64(y, y_stride, yd, yd_stride) + 2048) >> 12; |
| 169 } |
| 170 |
| 171 // vdiff > sad * 3 means vdiff should not be too small, otherwise, |
| 172 // it might be a lighting change in smooth area. When there is a |
| 173 // lighting change in smooth area, it is dangerous to do MFQE. |
| 174 if (sad > 1 && vdiff > sad * 3) { |
| 175 const int weight = 1 << MFQE_PRECISION; |
| 176 int ifactor = weight * sad * vdiff / (sad_thr * vdiff_thr); |
| 177 // When ifactor equals weight, no MFQE is done. |
| 178 if (ifactor > weight) { |
| 179 ifactor = weight; |
| 180 } |
| 181 apply_ifactor(y, y_stride, yd, yd_stride, u, v, uv_stride, ud, vd, |
| 182 uvd_stride, bs, ifactor); |
| 183 } else { |
| 184 // Copy the block from current frame (i.e., no mfqe is done). |
| 185 copy_block(y, u, v, y_stride, uv_stride, yd, ud, vd, |
| 186 yd_stride, uvd_stride, bs); |
| 187 } |
| 188 } |
| 189 |
| 190 static int mfqe_decision(MODE_INFO *mi, BLOCK_SIZE cur_bs) { |
| 191 // Check the motion in current block(for inter frame), |
| 192 // or check the motion in the correlated block in last frame (for keyframe). |
| 193 const int mv_len_square = mi->mbmi.mv[0].as_mv.row * |
| 194 mi->mbmi.mv[0].as_mv.row + |
| 195 mi->mbmi.mv[0].as_mv.col * |
| 196 mi->mbmi.mv[0].as_mv.col; |
| 197 const int mv_threshold = 100; |
| 198 return mi->mbmi.mode >= NEARESTMV && // Not an intra block |
| 199 cur_bs >= BLOCK_16X16 && |
| 200 mv_len_square <= mv_threshold; |
| 201 } |
| 202 |
| 203 // Process each partiton in a super block, recursively. |
| 204 static void mfqe_partition(VP9_COMMON *cm, MODE_INFO *mi, BLOCK_SIZE bs, |
| 205 const uint8_t *y, const uint8_t *u, |
| 206 const uint8_t *v, int y_stride, int uv_stride, |
| 207 uint8_t *yd, uint8_t *ud, uint8_t *vd, |
| 208 int yd_stride, int uvd_stride) { |
| 209 int mi_offset, y_offset, uv_offset; |
| 210 const BLOCK_SIZE cur_bs = mi->mbmi.sb_type; |
| 211 const int qdiff = cm->base_qindex - cm->postproc_state.last_base_qindex; |
| 212 const int bsl = b_width_log2_lookup[bs]; |
| 213 PARTITION_TYPE partition = partition_lookup[bsl][cur_bs]; |
| 214 const BLOCK_SIZE subsize = get_subsize(bs, partition); |
| 215 |
| 216 if (cur_bs < BLOCK_8X8) { |
| 217 // If there are blocks smaller than 8x8, it must be on the boundary. |
| 218 return; |
| 219 } |
| 220 // No MFQE on blocks smaller than 16x16 |
| 221 if (bs == BLOCK_16X16) { |
| 222 partition = PARTITION_NONE; |
| 223 } |
| 224 if (bs == BLOCK_64X64) { |
| 225 mi_offset = 4; |
| 226 y_offset = 32; |
| 227 uv_offset = 16; |
| 228 } else { |
| 229 mi_offset = 2; |
| 230 y_offset = 16; |
| 231 uv_offset = 8; |
| 232 } |
| 233 switch (partition) { |
| 234 BLOCK_SIZE mfqe_bs, bs_tmp; |
| 235 case PARTITION_HORZ: |
| 236 if (bs == BLOCK_64X64) { |
| 237 mfqe_bs = BLOCK_64X32; |
| 238 bs_tmp = BLOCK_32X32; |
| 239 } else { |
| 240 mfqe_bs = BLOCK_32X16; |
| 241 bs_tmp = BLOCK_16X16; |
| 242 } |
| 243 if (mfqe_decision(mi, mfqe_bs)) { |
| 244 // Do mfqe on the first square partition. |
| 245 mfqe_block(bs_tmp, y, u, v, y_stride, uv_stride, |
| 246 yd, ud, vd, yd_stride, uvd_stride, qdiff); |
| 247 // Do mfqe on the second square partition. |
| 248 mfqe_block(bs_tmp, y + y_offset, u + uv_offset, v + uv_offset, |
| 249 y_stride, uv_stride, yd + y_offset, ud + uv_offset, |
| 250 vd + uv_offset, yd_stride, uvd_stride, qdiff); |
| 251 } |
| 252 if (mfqe_decision(mi + mi_offset * cm->mi_stride, mfqe_bs)) { |
| 253 // Do mfqe on the first square partition. |
| 254 mfqe_block(bs_tmp, y + y_offset * y_stride, u + uv_offset * uv_stride, |
| 255 v + uv_offset * uv_stride, y_stride, uv_stride, |
| 256 yd + y_offset * yd_stride, ud + uv_offset * uvd_stride, |
| 257 vd + uv_offset * uvd_stride, yd_stride, uvd_stride, qdiff); |
| 258 // Do mfqe on the second square partition. |
| 259 mfqe_block(bs_tmp, y + y_offset * y_stride + y_offset, |
| 260 u + uv_offset * uv_stride + uv_offset, |
| 261 v + uv_offset * uv_stride + uv_offset, y_stride, |
| 262 uv_stride, yd + y_offset * yd_stride + y_offset, |
| 263 ud + uv_offset * uvd_stride + uv_offset, |
| 264 vd + uv_offset * uvd_stride + uv_offset, |
| 265 yd_stride, uvd_stride, qdiff); |
| 266 } |
| 267 break; |
| 268 case PARTITION_VERT: |
| 269 if (bs == BLOCK_64X64) { |
| 270 mfqe_bs = BLOCK_32X64; |
| 271 bs_tmp = BLOCK_32X32; |
| 272 } else { |
| 273 mfqe_bs = BLOCK_16X32; |
| 274 bs_tmp = BLOCK_16X16; |
| 275 } |
| 276 if (mfqe_decision(mi, mfqe_bs)) { |
| 277 // Do mfqe on the first square partition. |
| 278 mfqe_block(bs_tmp, y, u, v, y_stride, uv_stride, |
| 279 yd, ud, vd, yd_stride, uvd_stride, qdiff); |
| 280 // Do mfqe on the second square partition. |
| 281 mfqe_block(bs_tmp, y + y_offset * y_stride, u + uv_offset * uv_stride, |
| 282 v + uv_offset * uv_stride, y_stride, uv_stride, |
| 283 yd + y_offset * yd_stride, ud + uv_offset * uvd_stride, |
| 284 vd + uv_offset * uvd_stride, yd_stride, uvd_stride, qdiff); |
| 285 } |
| 286 if (mfqe_decision(mi + mi_offset, mfqe_bs)) { |
| 287 // Do mfqe on the first square partition. |
| 288 mfqe_block(bs_tmp, y + y_offset, u + uv_offset, v + uv_offset, |
| 289 y_stride, uv_stride, yd + y_offset, ud + uv_offset, |
| 290 vd + uv_offset, yd_stride, uvd_stride, qdiff); |
| 291 // Do mfqe on the second square partition. |
| 292 mfqe_block(bs_tmp, y + y_offset * y_stride + y_offset, |
| 293 u + uv_offset * uv_stride + uv_offset, |
| 294 v + uv_offset * uv_stride + uv_offset, y_stride, |
| 295 uv_stride, yd + y_offset * yd_stride + y_offset, |
| 296 ud + uv_offset * uvd_stride + uv_offset, |
| 297 vd + uv_offset * uvd_stride + uv_offset, |
| 298 yd_stride, uvd_stride, qdiff); |
| 299 } |
| 300 break; |
| 301 case PARTITION_NONE: |
| 302 if (mfqe_decision(mi, cur_bs)) { |
| 303 // Do mfqe on this partition. |
| 304 mfqe_block(cur_bs, y, u, v, y_stride, uv_stride, |
| 305 yd, ud, vd, yd_stride, uvd_stride, qdiff); |
| 306 } else { |
| 307 // Copy the block from current frame(i.e., no mfqe is done). |
| 308 copy_block(y, u, v, y_stride, uv_stride, yd, ud, vd, |
| 309 yd_stride, uvd_stride, bs); |
| 310 } |
| 311 break; |
| 312 case PARTITION_SPLIT: |
| 313 // Recursion on four square partitions, e.g. if bs is 64X64, |
| 314 // then look into four 32X32 blocks in it. |
| 315 mfqe_partition(cm, mi, subsize, y, u, v, y_stride, uv_stride, yd, ud, vd, |
| 316 yd_stride, uvd_stride); |
| 317 mfqe_partition(cm, mi + mi_offset, subsize, y + y_offset, u + uv_offset, |
| 318 v + uv_offset, y_stride, uv_stride, yd + y_offset, |
| 319 ud + uv_offset, vd + uv_offset, yd_stride, uvd_stride); |
| 320 mfqe_partition(cm, mi + mi_offset * cm->mi_stride, subsize, |
| 321 y + y_offset * y_stride, u + uv_offset * uv_stride, |
| 322 v + uv_offset * uv_stride, y_stride, uv_stride, |
| 323 yd + y_offset * yd_stride, ud + uv_offset * uvd_stride, |
| 324 vd + uv_offset * uvd_stride, yd_stride, uvd_stride); |
| 325 mfqe_partition(cm, mi + mi_offset * cm->mi_stride + mi_offset, |
| 326 subsize, y + y_offset * y_stride + y_offset, |
| 327 u + uv_offset * uv_stride + uv_offset, |
| 328 v + uv_offset * uv_stride + uv_offset, y_stride, |
| 329 uv_stride, yd + y_offset * yd_stride + y_offset, |
| 330 ud + uv_offset * uvd_stride + uv_offset, |
| 331 vd + uv_offset * uvd_stride + uv_offset, |
| 332 yd_stride, uvd_stride); |
| 333 break; |
| 334 default: |
| 335 assert(0); |
| 336 } |
| 337 } |
| 338 |
| 339 void vp9_mfqe(VP9_COMMON *cm) { |
| 340 int mi_row, mi_col; |
| 341 // Current decoded frame. |
| 342 const YV12_BUFFER_CONFIG *show = cm->frame_to_show; |
| 343 // Last decoded frame and will store the MFQE result. |
| 344 YV12_BUFFER_CONFIG *dest = &cm->post_proc_buffer; |
| 345 // Loop through each super block. |
| 346 for (mi_row = 0; mi_row < cm->mi_rows; mi_row += MI_BLOCK_SIZE) { |
| 347 for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) { |
| 348 MODE_INFO *mi; |
| 349 MODE_INFO *mi_local = cm->mi + (mi_row * cm->mi_stride + mi_col); |
| 350 // Motion Info in last frame. |
| 351 MODE_INFO *mi_prev = cm->postproc_state.prev_mi + |
| 352 (mi_row * cm->mi_stride + mi_col); |
| 353 const uint32_t y_stride = show->y_stride; |
| 354 const uint32_t uv_stride = show->uv_stride; |
| 355 const uint32_t yd_stride = dest->y_stride; |
| 356 const uint32_t uvd_stride = dest->uv_stride; |
| 357 const uint32_t row_offset_y = mi_row << 3; |
| 358 const uint32_t row_offset_uv = mi_row << 2; |
| 359 const uint32_t col_offset_y = mi_col << 3; |
| 360 const uint32_t col_offset_uv = mi_col << 2; |
| 361 const uint8_t *y = show->y_buffer + row_offset_y * y_stride + |
| 362 col_offset_y; |
| 363 const uint8_t *u = show->u_buffer + row_offset_uv * uv_stride + |
| 364 col_offset_uv; |
| 365 const uint8_t *v = show->v_buffer + row_offset_uv * uv_stride + |
| 366 col_offset_uv; |
| 367 uint8_t *yd = dest->y_buffer + row_offset_y * yd_stride + col_offset_y; |
| 368 uint8_t *ud = dest->u_buffer + row_offset_uv * uvd_stride + |
| 369 col_offset_uv; |
| 370 uint8_t *vd = dest->v_buffer + row_offset_uv * uvd_stride + |
| 371 col_offset_uv; |
| 372 if (frame_is_intra_only(cm)) { |
| 373 mi = mi_prev; |
| 374 } else { |
| 375 mi = mi_local; |
| 376 } |
| 377 mfqe_partition(cm, mi, BLOCK_64X64, y, u, v, y_stride, uv_stride, yd, ud, |
| 378 vd, yd_stride, uvd_stride); |
| 379 } |
| 380 } |
| 381 } |
OLD | NEW |