Index: source/libvpx/vp9/common/vp9_loopfilter.c |
=================================================================== |
--- source/libvpx/vp9/common/vp9_loopfilter.c (revision 292608) |
+++ source/libvpx/vp9/common/vp9_loopfilter.c (working copy) |
@@ -392,6 +392,107 @@ |
} |
} |
+#if CONFIG_VP9_HIGHBITDEPTH |
+static void highbd_filter_selectively_vert_row2(PLANE_TYPE plane_type, |
+ uint16_t *s, int pitch, |
+ unsigned int mask_16x16_l, |
+ unsigned int mask_8x8_l, |
+ unsigned int mask_4x4_l, |
+ unsigned int mask_4x4_int_l, |
+ const loop_filter_info_n *lfi_n, |
+ const uint8_t *lfl, int bd) { |
+ const int mask_shift = plane_type ? 4 : 8; |
+ const int mask_cutoff = plane_type ? 0xf : 0xff; |
+ const int lfl_forward = plane_type ? 4 : 8; |
+ |
+ unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff; |
+ unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff; |
+ unsigned int mask_4x4_0 = mask_4x4_l & mask_cutoff; |
+ unsigned int mask_4x4_int_0 = mask_4x4_int_l & mask_cutoff; |
+ unsigned int mask_16x16_1 = (mask_16x16_l >> mask_shift) & mask_cutoff; |
+ unsigned int mask_8x8_1 = (mask_8x8_l >> mask_shift) & mask_cutoff; |
+ unsigned int mask_4x4_1 = (mask_4x4_l >> mask_shift) & mask_cutoff; |
+ unsigned int mask_4x4_int_1 = (mask_4x4_int_l >> mask_shift) & mask_cutoff; |
+ unsigned int mask; |
+ |
+ for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_4x4_int_0 | |
+ mask_16x16_1 | mask_8x8_1 | mask_4x4_1 | mask_4x4_int_1; |
+ mask; mask >>= 1) { |
+ const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl; |
+ const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward); |
+ |
+ // TODO(yunqingwang): count in loopfilter functions should be removed. |
+ if (mask & 1) { |
+ if ((mask_16x16_0 | mask_16x16_1) & 1) { |
+ if ((mask_16x16_0 & mask_16x16_1) & 1) { |
+ vp9_highbd_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim, |
+ lfi0->hev_thr, bd); |
+ } else if (mask_16x16_0 & 1) { |
+ vp9_highbd_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim, |
+ lfi0->hev_thr, bd); |
+ } else { |
+ vp9_highbd_lpf_vertical_16(s + 8 *pitch, pitch, lfi1->mblim, |
+ lfi1->lim, lfi1->hev_thr, bd); |
+ } |
+ } |
+ |
+ if ((mask_8x8_0 | mask_8x8_1) & 1) { |
+ if ((mask_8x8_0 & mask_8x8_1) & 1) { |
+ vp9_highbd_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim, |
+ lfi0->hev_thr, lfi1->mblim, lfi1->lim, |
+ lfi1->hev_thr, bd); |
+ } else if (mask_8x8_0 & 1) { |
+ vp9_highbd_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim, |
+ lfi0->hev_thr, 1, bd); |
+ } else { |
+ vp9_highbd_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim, |
+ lfi1->lim, lfi1->hev_thr, 1, bd); |
+ } |
+ } |
+ |
+ if ((mask_4x4_0 | mask_4x4_1) & 1) { |
+ if ((mask_4x4_0 & mask_4x4_1) & 1) { |
+ vp9_highbd_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim, |
+ lfi0->hev_thr, lfi1->mblim, lfi1->lim, |
+ lfi1->hev_thr, bd); |
+ } else if (mask_4x4_0 & 1) { |
+ vp9_highbd_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, |
+ lfi0->hev_thr, 1, bd); |
+ } else { |
+ vp9_highbd_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, |
+ lfi1->lim, lfi1->hev_thr, 1, bd); |
+ } |
+ } |
+ |
+ if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) { |
+ if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) { |
+ vp9_highbd_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim, |
+ lfi0->hev_thr, lfi1->mblim, lfi1->lim, |
+ lfi1->hev_thr, bd); |
+ } else if (mask_4x4_int_0 & 1) { |
+ vp9_highbd_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim, |
+ lfi0->hev_thr, 1, bd); |
+ } else { |
+ vp9_highbd_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, |
+ lfi1->lim, lfi1->hev_thr, 1, bd); |
+ } |
+ } |
+ } |
+ |
+ s += 8; |
+ lfl += 1; |
+ mask_16x16_0 >>= 1; |
+ mask_8x8_0 >>= 1; |
+ mask_4x4_0 >>= 1; |
+ mask_4x4_int_0 >>= 1; |
+ mask_16x16_1 >>= 1; |
+ mask_8x8_1 >>= 1; |
+ mask_4x4_1 >>= 1; |
+ mask_4x4_int_1 >>= 1; |
+ } |
+} |
+#endif // CONFIG_VP9_HIGHBITDEPTH |
+ |
static void filter_selectively_horiz(uint8_t *s, int pitch, |
unsigned int mask_16x16, |
unsigned int mask_8x8, |
@@ -419,7 +520,7 @@ |
} |
} else if (mask_8x8 & 1) { |
if ((mask_8x8 & 3) == 3) { |
- // Next block's thresholds |
+ // Next block's thresholds. |
const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1); |
vp9_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim, |
@@ -448,7 +549,7 @@ |
} |
} else if (mask_4x4 & 1) { |
if ((mask_4x4 & 3) == 3) { |
- // Next block's thresholds |
+ // Next block's thresholds. |
const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1); |
vp9_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim, |
@@ -488,6 +589,112 @@ |
} |
} |
+#if CONFIG_VP9_HIGHBITDEPTH |
+static void highbd_filter_selectively_horiz(uint16_t *s, int pitch, |
+ unsigned int mask_16x16, |
+ unsigned int mask_8x8, |
+ unsigned int mask_4x4, |
+ unsigned int mask_4x4_int, |
+ const loop_filter_info_n *lfi_n, |
+ const uint8_t *lfl, int bd) { |
+ unsigned int mask; |
+ int count; |
+ |
+ for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; |
+ mask; mask >>= count) { |
+ const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl; |
+ |
+ count = 1; |
+ if (mask & 1) { |
+ if (mask_16x16 & 1) { |
+ if ((mask_16x16 & 3) == 3) { |
+ vp9_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim, |
+ lfi->hev_thr, 2, bd); |
+ count = 2; |
+ } else { |
+ vp9_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim, |
+ lfi->hev_thr, 1, bd); |
+ } |
+ } else if (mask_8x8 & 1) { |
+ if ((mask_8x8 & 3) == 3) { |
+ // Next block's thresholds. |
+ const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1); |
+ |
+ vp9_highbd_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim, |
+ lfi->hev_thr, lfin->mblim, lfin->lim, |
+ lfin->hev_thr, bd); |
+ |
+ if ((mask_4x4_int & 3) == 3) { |
+ vp9_highbd_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim, |
+ lfi->lim, lfi->hev_thr, |
+ lfin->mblim, lfin->lim, |
+ lfin->hev_thr, bd); |
+ } else { |
+ if (mask_4x4_int & 1) { |
+ vp9_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, |
+ lfi->lim, lfi->hev_thr, 1, bd); |
+ } else if (mask_4x4_int & 2) { |
+ vp9_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, |
+ lfin->lim, lfin->hev_thr, 1, bd); |
+ } |
+ } |
+ count = 2; |
+ } else { |
+ vp9_highbd_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, |
+ lfi->hev_thr, 1, bd); |
+ |
+ if (mask_4x4_int & 1) { |
+ vp9_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, |
+ lfi->lim, lfi->hev_thr, 1, bd); |
+ } |
+ } |
+ } else if (mask_4x4 & 1) { |
+ if ((mask_4x4 & 3) == 3) { |
+ // Next block's thresholds. |
+ const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1); |
+ |
+ vp9_highbd_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim, |
+ lfi->hev_thr, lfin->mblim, lfin->lim, |
+ lfin->hev_thr, bd); |
+ if ((mask_4x4_int & 3) == 3) { |
+ vp9_highbd_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim, |
+ lfi->lim, lfi->hev_thr, |
+ lfin->mblim, lfin->lim, |
+ lfin->hev_thr, bd); |
+ } else { |
+ if (mask_4x4_int & 1) { |
+ vp9_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, |
+ lfi->lim, lfi->hev_thr, 1, bd); |
+ } else if (mask_4x4_int & 2) { |
+ vp9_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, |
+ lfin->lim, lfin->hev_thr, 1, bd); |
+ } |
+ } |
+ count = 2; |
+ } else { |
+ vp9_highbd_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, |
+ lfi->hev_thr, 1, bd); |
+ |
+ if (mask_4x4_int & 1) { |
+ vp9_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, |
+ lfi->lim, lfi->hev_thr, 1, bd); |
+ } |
+ } |
+ } else if (mask_4x4_int & 1) { |
+ vp9_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, |
+ lfi->hev_thr, 1, bd); |
+ } |
+ } |
+ s += 8 * count; |
+ lfl += count; |
+ mask_16x16 >>= count; |
+ mask_8x8 >>= count; |
+ mask_4x4 >>= count; |
+ mask_4x4_int >>= count; |
+ } |
+} |
+#endif // CONFIG_VP9_HIGHBITDEPTH |
+ |
// This function ors into the current lfm structure, where to do loop |
// filters for the specific mi we are looking at. It uses information |
// including the block_size_type (32x16, 32x32, etc.), the transform size, |
@@ -903,6 +1110,45 @@ |
} |
} |
+#if CONFIG_VP9_HIGHBITDEPTH |
+static void highbd_filter_selectively_vert(uint16_t *s, int pitch, |
+ unsigned int mask_16x16, |
+ unsigned int mask_8x8, |
+ unsigned int mask_4x4, |
+ unsigned int mask_4x4_int, |
+ const loop_filter_info_n *lfi_n, |
+ const uint8_t *lfl, int bd) { |
+ unsigned int mask; |
+ |
+ for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; |
+ mask; mask >>= 1) { |
+ const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl; |
+ |
+ if (mask & 1) { |
+ if (mask_16x16 & 1) { |
+ vp9_highbd_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, |
+ lfi->hev_thr, bd); |
+ } else if (mask_8x8 & 1) { |
+ vp9_highbd_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, |
+ lfi->hev_thr, 1, bd); |
+ } else if (mask_4x4 & 1) { |
+ vp9_highbd_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, |
+ lfi->hev_thr, 1, bd); |
+ } |
+ } |
+ if (mask_4x4_int & 1) |
+ vp9_highbd_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, |
+ lfi->hev_thr, 1, bd); |
+ s += 8; |
+ lfl += 1; |
+ mask_16x16 >>= 1; |
+ mask_8x8 >>= 1; |
+ mask_4x4 >>= 1; |
+ mask_4x4_int >>= 1; |
+ } |
+} |
+#endif // CONFIG_VP9_HIGHBITDEPTH |
+ |
static void filter_block_plane_non420(VP9_COMMON *cm, |
struct macroblockd_plane *plane, |
MODE_INFO *mi_8x8, |
@@ -909,8 +1155,8 @@ |
int mi_row, int mi_col) { |
const int ss_x = plane->subsampling_x; |
const int ss_y = plane->subsampling_y; |
- const int row_step = 1 << ss_x; |
- const int col_step = 1 << ss_y; |
+ const int row_step = 1 << ss_y; |
+ const int col_step = 1 << ss_x; |
const int row_step_stride = cm->mi_stride * row_step; |
struct buf_2d *const dst = &plane->dst; |
uint8_t* const dst0 = dst->buf; |
@@ -1001,6 +1247,25 @@ |
// Disable filtering on the leftmost column |
border_mask = ~(mi_col == 0); |
+#if CONFIG_VP9_HIGHBITDEPTH |
+ if (cm->use_highbitdepth) { |
+ highbd_filter_selectively_vert(CONVERT_TO_SHORTPTR(dst->buf), |
+ dst->stride, |
+ mask_16x16_c & border_mask, |
+ mask_8x8_c & border_mask, |
+ mask_4x4_c & border_mask, |
+ mask_4x4_int[r], |
+ &cm->lf_info, &lfl[r << 3], |
+ (int)cm->bit_depth); |
+ } else { |
+ filter_selectively_vert(dst->buf, dst->stride, |
+ mask_16x16_c & border_mask, |
+ mask_8x8_c & border_mask, |
+ mask_4x4_c & border_mask, |
+ mask_4x4_int[r], |
+ &cm->lf_info, &lfl[r << 3]); |
+ } |
+#else |
filter_selectively_vert(dst->buf, dst->stride, |
mask_16x16_c & border_mask, |
mask_8x8_c & border_mask, |
@@ -1007,6 +1272,7 @@ |
mask_4x4_c & border_mask, |
mask_4x4_int[r], |
&cm->lf_info, &lfl[r << 3]); |
+#endif // CONFIG_VP9_HIGHBITDEPTH |
dst->buf += 8 * dst->stride; |
mi_8x8 += row_step_stride; |
} |
@@ -1030,7 +1296,25 @@ |
mask_8x8_r = mask_8x8[r]; |
mask_4x4_r = mask_4x4[r]; |
} |
- |
+#if CONFIG_VP9_HIGHBITDEPTH |
+ if (cm->use_highbitdepth) { |
+ highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf), |
+ dst->stride, |
+ mask_16x16_r, |
+ mask_8x8_r, |
+ mask_4x4_r, |
+ mask_4x4_int_r, |
+ &cm->lf_info, &lfl[r << 3], |
+ (int)cm->bit_depth); |
+ } else { |
+ filter_selectively_horiz(dst->buf, dst->stride, |
+ mask_16x16_r, |
+ mask_8x8_r, |
+ mask_4x4_r, |
+ mask_4x4_int_r, |
+ &cm->lf_info, &lfl[r << 3]); |
+ } |
+#else |
filter_selectively_horiz(dst->buf, dst->stride, |
mask_16x16_r, |
mask_8x8_r, |
@@ -1037,6 +1321,7 @@ |
mask_4x4_r, |
mask_4x4_int_r, |
&cm->lf_info, &lfl[r << 3]); |
+#endif // CONFIG_VP9_HIGHBITDEPTH |
dst->buf += 8 * dst->stride; |
} |
} |
@@ -1062,7 +1347,29 @@ |
unsigned int mask_4x4_l = mask_4x4 & 0xffff; |
unsigned int mask_4x4_int_l = mask_4x4_int & 0xffff; |
- // Disable filtering on the leftmost column |
+ // Disable filtering on the leftmost column. |
+#if CONFIG_VP9_HIGHBITDEPTH |
+ if (cm->use_highbitdepth) { |
+ highbd_filter_selectively_vert_row2(plane->plane_type, |
+ CONVERT_TO_SHORTPTR(dst->buf), |
+ dst->stride, |
+ mask_16x16_l, |
+ mask_8x8_l, |
+ mask_4x4_l, |
+ mask_4x4_int_l, |
+ &cm->lf_info, &lfm->lfl_y[r << 3], |
+ (int)cm->bit_depth); |
+ } else { |
+ filter_selectively_vert_row2(plane->plane_type, |
+ dst->buf, dst->stride, |
+ mask_16x16_l, |
+ mask_8x8_l, |
+ mask_4x4_l, |
+ mask_4x4_int_l, |
+ &cm->lf_info, |
+ &lfm->lfl_y[r << 3]); |
+ } |
+#else |
filter_selectively_vert_row2(plane->plane_type, |
dst->buf, dst->stride, |
mask_16x16_l, |
@@ -1070,7 +1377,7 @@ |
mask_4x4_l, |
mask_4x4_int_l, |
&cm->lf_info, &lfm->lfl_y[r << 3]); |
- |
+#endif // CONFIG_VP9_HIGHBITDEPTH |
dst->buf += 16 * dst->stride; |
mask_16x16 >>= 16; |
mask_8x8 >>= 16; |
@@ -1100,12 +1407,35 @@ |
mask_4x4_r = mask_4x4 & 0xff; |
} |
+#if CONFIG_VP9_HIGHBITDEPTH |
+ if (cm->use_highbitdepth) { |
+ highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf), |
+ dst->stride, |
+ mask_16x16_r, |
+ mask_8x8_r, |
+ mask_4x4_r, |
+ mask_4x4_int & 0xff, |
+ &cm->lf_info, |
+ &lfm->lfl_y[r << 3], |
+ (int)cm->bit_depth); |
+ } else { |
+ filter_selectively_horiz(dst->buf, dst->stride, |
+ mask_16x16_r, |
+ mask_8x8_r, |
+ mask_4x4_r, |
+ mask_4x4_int & 0xff, |
+ &cm->lf_info, |
+ &lfm->lfl_y[r << 3]); |
+ } |
+#else |
filter_selectively_horiz(dst->buf, dst->stride, |
mask_16x16_r, |
mask_8x8_r, |
mask_4x4_r, |
mask_4x4_int & 0xff, |
- &cm->lf_info, &lfm->lfl_y[r << 3]); |
+ &cm->lf_info, |
+ &lfm->lfl_y[r << 3]); |
+#endif // CONFIG_VP9_HIGHBITDEPTH |
dst->buf += 8 * dst->stride; |
mask_16x16 >>= 8; |
@@ -1135,7 +1465,30 @@ |
unsigned int mask_4x4_l = mask_4x4 & 0xff; |
unsigned int mask_4x4_int_l = mask_4x4_int & 0xff; |
- // Disable filtering on the leftmost column |
+ // Disable filtering on the leftmost column. |
+#if CONFIG_VP9_HIGHBITDEPTH |
+ if (cm->use_highbitdepth) { |
+ highbd_filter_selectively_vert_row2(plane->plane_type, |
+ CONVERT_TO_SHORTPTR(dst->buf), |
+ dst->stride, |
+ mask_16x16_l, |
+ mask_8x8_l, |
+ mask_4x4_l, |
+ mask_4x4_int_l, |
+ &cm->lf_info, |
+ &lfm->lfl_uv[r << 1], |
+ (int)cm->bit_depth); |
+ } else { |
+ filter_selectively_vert_row2(plane->plane_type, |
+ dst->buf, dst->stride, |
+ mask_16x16_l, |
+ mask_8x8_l, |
+ mask_4x4_l, |
+ mask_4x4_int_l, |
+ &cm->lf_info, |
+ &lfm->lfl_uv[r << 1]); |
+ } |
+#else |
filter_selectively_vert_row2(plane->plane_type, |
dst->buf, dst->stride, |
mask_16x16_l, |
@@ -1142,7 +1495,9 @@ |
mask_8x8_l, |
mask_4x4_l, |
mask_4x4_int_l, |
- &cm->lf_info, &lfm->lfl_uv[r << 1]); |
+ &cm->lf_info, |
+ &lfm->lfl_uv[r << 1]); |
+#endif // CONFIG_VP9_HIGHBITDEPTH |
dst->buf += 16 * dst->stride; |
mask_16x16 >>= 8; |
@@ -1177,12 +1532,35 @@ |
mask_4x4_r = mask_4x4 & 0xf; |
} |
+#if CONFIG_VP9_HIGHBITDEPTH |
+ if (cm->use_highbitdepth) { |
+ highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf), |
+ dst->stride, |
+ mask_16x16_r, |
+ mask_8x8_r, |
+ mask_4x4_r, |
+ mask_4x4_int_r, |
+ &cm->lf_info, |
+ &lfm->lfl_uv[r << 1], |
+ (int)cm->bit_depth); |
+ } else { |
+ filter_selectively_horiz(dst->buf, dst->stride, |
+ mask_16x16_r, |
+ mask_8x8_r, |
+ mask_4x4_r, |
+ mask_4x4_int_r, |
+ &cm->lf_info, |
+ &lfm->lfl_uv[r << 1]); |
+ } |
+#else |
filter_selectively_horiz(dst->buf, dst->stride, |
mask_16x16_r, |
mask_8x8_r, |
mask_4x4_r, |
mask_4x4_int_r, |
- &cm->lf_info, &lfm->lfl_uv[r << 1]); |
+ &cm->lf_info, |
+ &lfm->lfl_uv[r << 1]); |
+#endif // CONFIG_VP9_HIGHBITDEPTH |
dst->buf += 8 * dst->stride; |
mask_16x16 >>= 4; |