Index: source/libvpx/vp9/common/vp9_loopfilter.c |
=================================================================== |
--- source/libvpx/vp9/common/vp9_loopfilter.c (revision 240950) |
+++ source/libvpx/vp9/common/vp9_loopfilter.c (working copy) |
@@ -32,6 +32,8 @@ |
uint16_t left_uv[TX_SIZES]; |
uint16_t above_uv[TX_SIZES]; |
uint16_t int_4x4_uv; |
+ uint8_t lfl_y[64]; |
+ uint8_t lfl_uv[16]; |
} LOOP_FILTER_MASK; |
// 64 bit masks for left transform size. Each 1 represents a position where |
@@ -281,10 +283,10 @@ |
// n_shift is the a multiplier for lf_deltas |
// the multiplier is 1 for when filter_lvl is between 0 and 31; |
// 2 when filter_lvl is between 32 and 63 |
- const int n_shift = default_filt_lvl >> 5; |
+ const int scale = 1 << (default_filt_lvl >> 5); |
loop_filter_info_n *const lfi = &cm->lf_info; |
struct loopfilter *const lf = &cm->lf; |
- struct segmentation *const seg = &cm->seg; |
+ const struct segmentation *const seg = &cm->seg; |
// update limits if sharpness has changed |
if (lf->last_sharpness_level != lf->sharpness_level) { |
@@ -293,9 +295,7 @@ |
} |
for (seg_id = 0; seg_id < MAX_SEGMENTS; seg_id++) { |
- int lvl_seg = default_filt_lvl, ref, mode, intra_lvl; |
- |
- // Set the baseline filter values for each segment |
+ int lvl_seg = default_filt_lvl; |
if (vp9_segfeature_active(seg, seg_id, SEG_LVL_ALT_LF)) { |
const int data = vp9_get_segdata(seg, seg_id, SEG_LVL_ALT_LF); |
lvl_seg = seg->abs_delta == SEGMENT_ABSDATA |
@@ -307,77 +307,118 @@ |
// we could get rid of this if we assume that deltas are set to |
// zero when not in use; encoder always uses deltas |
vpx_memset(lfi->lvl[seg_id], lvl_seg, sizeof(lfi->lvl[seg_id])); |
- continue; |
- } |
+ } else { |
+ int ref, mode; |
+ const int intra_lvl = lvl_seg + lf->ref_deltas[INTRA_FRAME] * scale; |
+ lfi->lvl[seg_id][INTRA_FRAME][0] = clamp(intra_lvl, 0, MAX_LOOP_FILTER); |
- intra_lvl = lvl_seg + lf->ref_deltas[INTRA_FRAME] * (1 << n_shift); |
- lfi->lvl[seg_id][INTRA_FRAME][0] = clamp(intra_lvl, 0, MAX_LOOP_FILTER); |
- |
- for (ref = LAST_FRAME; ref < MAX_REF_FRAMES; ++ref) |
- for (mode = 0; mode < MAX_MODE_LF_DELTAS; ++mode) { |
- const int inter_lvl = lvl_seg + lf->ref_deltas[ref] * (1 << n_shift) |
- + lf->mode_deltas[mode] * (1 << n_shift); |
- lfi->lvl[seg_id][ref][mode] = clamp(inter_lvl, 0, MAX_LOOP_FILTER); |
+ for (ref = LAST_FRAME; ref < MAX_REF_FRAMES; ++ref) { |
+ for (mode = 0; mode < MAX_MODE_LF_DELTAS; ++mode) { |
+ const int inter_lvl = lvl_seg + lf->ref_deltas[ref] * scale |
+ + lf->mode_deltas[mode] * scale; |
+ lfi->lvl[seg_id][ref][mode] = clamp(inter_lvl, 0, MAX_LOOP_FILTER); |
+ } |
} |
+ } |
} |
} |
-static int build_lfi(const loop_filter_info_n *lfi_n, |
- const MB_MODE_INFO *mbmi, |
- const loop_filter_thresh **lfi) { |
- const int seg = mbmi->segment_id; |
- const int ref = mbmi->ref_frame[0]; |
- const int mode = lfi_n->mode_lf_lut[mbmi->mode]; |
- const int filter_level = lfi_n->lvl[seg][ref][mode]; |
+static void filter_selectively_vert_row2(PLANE_TYPE plane_type, |
+ uint8_t *s, int pitch, |
+ unsigned int mask_16x16_l, |
+ unsigned int mask_8x8_l, |
+ unsigned int mask_4x4_l, |
+ unsigned int mask_4x4_int_l, |
+ const loop_filter_info_n *lfi_n, |
+ const uint8_t *lfl) { |
+ const int mask_shift = plane_type ? 4 : 8; |
+ const int mask_cutoff = plane_type ? 0xf : 0xff; |
+ const int lfl_forward = plane_type ? 4 : 8; |
- if (filter_level > 0) { |
- *lfi = &lfi_n->lfthr[filter_level]; |
- return 1; |
- } else { |
- return 0; |
- } |
-} |
- |
-static void filter_selectively_vert(uint8_t *s, int pitch, |
- unsigned int mask_16x16, |
- unsigned int mask_8x8, |
- unsigned int mask_4x4, |
- unsigned int mask_4x4_int, |
- const loop_filter_thresh **p_lfi) { |
+ unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff; |
+ unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff; |
+ unsigned int mask_4x4_0 = mask_4x4_l & mask_cutoff; |
+ unsigned int mask_4x4_int_0 = mask_4x4_int_l & mask_cutoff; |
+ unsigned int mask_16x16_1 = (mask_16x16_l >> mask_shift) & mask_cutoff; |
+ unsigned int mask_8x8_1 = (mask_8x8_l >> mask_shift) & mask_cutoff; |
+ unsigned int mask_4x4_1 = (mask_4x4_l >> mask_shift) & mask_cutoff; |
+ unsigned int mask_4x4_int_1 = (mask_4x4_int_l >> mask_shift) & mask_cutoff; |
unsigned int mask; |
- for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; |
- mask; mask >>= 1) { |
- const loop_filter_thresh *lfi = *p_lfi; |
+ for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_4x4_int_0 | |
+ mask_16x16_1 | mask_8x8_1 | mask_4x4_1 | mask_4x4_int_1; |
+ mask; mask >>= 1) { |
+ const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl; |
+ const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward); |
+ // TODO(yunqingwang): count in loopfilter functions should be removed. |
if (mask & 1) { |
- if (mask_16x16 & 1) { |
- vp9_mb_lpf_vertical_edge_w(s, pitch, lfi->mblim, lfi->lim, |
- lfi->hev_thr); |
- assert(!(mask_8x8 & 1)); |
- assert(!(mask_4x4 & 1)); |
- assert(!(mask_4x4_int & 1)); |
- } else if (mask_8x8 & 1) { |
- vp9_mbloop_filter_vertical_edge(s, pitch, lfi->mblim, lfi->lim, |
- lfi->hev_thr, 1); |
- assert(!(mask_16x16 & 1)); |
- assert(!(mask_4x4 & 1)); |
- } else if (mask_4x4 & 1) { |
- vp9_loop_filter_vertical_edge(s, pitch, lfi->mblim, lfi->lim, |
- lfi->hev_thr, 1); |
- assert(!(mask_16x16 & 1)); |
- assert(!(mask_8x8 & 1)); |
+ if ((mask_16x16_0 | mask_16x16_1) & 1) { |
+ if ((mask_16x16_0 & mask_16x16_1) & 1) { |
+ vp9_mb_lpf_vertical_edge_w_16(s, pitch, lfi0->mblim, lfi0->lim, |
+ lfi0->hev_thr); |
+ } else if (mask_16x16_0 & 1) { |
+ vp9_mb_lpf_vertical_edge_w(s, pitch, lfi0->mblim, lfi0->lim, |
+ lfi0->hev_thr); |
+ } else { |
+ vp9_mb_lpf_vertical_edge_w(s + 8 *pitch, pitch, lfi1->mblim, |
+ lfi1->lim, lfi1->hev_thr); |
+ } |
} |
+ |
+ if ((mask_8x8_0 | mask_8x8_1) & 1) { |
+ if ((mask_8x8_0 & mask_8x8_1) & 1) { |
+ vp9_mbloop_filter_vertical_edge_16(s, pitch, lfi0->mblim, lfi0->lim, |
+ lfi0->hev_thr, lfi1->mblim, |
+ lfi1->lim, lfi1->hev_thr); |
+ } else if (mask_8x8_0 & 1) { |
+ vp9_mbloop_filter_vertical_edge(s, pitch, lfi0->mblim, lfi0->lim, |
+ lfi0->hev_thr, 1); |
+ } else { |
+ vp9_mbloop_filter_vertical_edge(s + 8 *pitch, pitch, lfi1->mblim, |
+ lfi1->lim, lfi1->hev_thr, 1); |
+ } |
+ } |
+ |
+ if ((mask_4x4_0 | mask_4x4_1) & 1) { |
+ if ((mask_4x4_0 & mask_4x4_1) & 1) { |
+ vp9_loop_filter_vertical_edge_16(s, pitch, lfi0->mblim, lfi0->lim, |
+ lfi0->hev_thr, lfi1->mblim, |
+ lfi1->lim, lfi1->hev_thr); |
+ } else if (mask_4x4_0 & 1) { |
+ vp9_loop_filter_vertical_edge(s, pitch, lfi0->mblim, lfi0->lim, |
+ lfi0->hev_thr, 1); |
+ } else { |
+ vp9_loop_filter_vertical_edge(s + 8 *pitch, pitch, lfi1->mblim, |
+ lfi1->lim, lfi1->hev_thr, 1); |
+ } |
+ } |
+ |
+ if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) { |
+ if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) { |
+ vp9_loop_filter_vertical_edge_16(s + 4, pitch, lfi0->mblim, lfi0->lim, |
+ lfi0->hev_thr, lfi1->mblim, |
+ lfi1->lim, lfi1->hev_thr); |
+ } else if (mask_4x4_int_0 & 1) { |
+ vp9_loop_filter_vertical_edge(s + 4, pitch, lfi0->mblim, lfi0->lim, |
+ lfi0->hev_thr, 1); |
+ } else { |
+ vp9_loop_filter_vertical_edge(s + 8 *pitch + 4, pitch, lfi1->mblim, |
+ lfi1->lim, lfi1->hev_thr, 1); |
+ } |
+ } |
} |
- if (mask_4x4_int & 1) |
- vp9_loop_filter_vertical_edge(s + 4, pitch, lfi->mblim, lfi->lim, |
- lfi->hev_thr, 1); |
+ |
s += 8; |
- p_lfi++; |
- mask_16x16 >>= 1; |
- mask_8x8 >>= 1; |
- mask_4x4 >>= 1; |
- mask_4x4_int >>= 1; |
+ lfl += 1; |
+ mask_16x16_0 >>= 1; |
+ mask_8x8_0 >>= 1; |
+ mask_4x4_0 >>= 1; |
+ mask_4x4_int_0 >>= 1; |
+ mask_16x16_1 >>= 1; |
+ mask_8x8_1 >>= 1; |
+ mask_4x4_1 >>= 1; |
+ mask_4x4_int_1 >>= 1; |
} |
} |
@@ -386,49 +427,98 @@ |
unsigned int mask_8x8, |
unsigned int mask_4x4, |
unsigned int mask_4x4_int, |
- int only_4x4_1, |
- const loop_filter_thresh **p_lfi) { |
+ const loop_filter_info_n *lfi_n, |
+ const uint8_t *lfl) { |
unsigned int mask; |
int count; |
for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; |
mask; mask >>= count) { |
- const loop_filter_thresh *lfi = *p_lfi; |
+ const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl; |
count = 1; |
if (mask & 1) { |
- if (!only_4x4_1) { |
- if (mask_16x16 & 1) { |
- if ((mask_16x16 & 3) == 3) { |
- vp9_mb_lpf_horizontal_edge_w(s, pitch, lfi->mblim, lfi->lim, |
- lfi->hev_thr, 2); |
- count = 2; |
+ if (mask_16x16 & 1) { |
+ if ((mask_16x16 & 3) == 3) { |
+ vp9_mb_lpf_horizontal_edge_w(s, pitch, lfi->mblim, lfi->lim, |
+ lfi->hev_thr, 2); |
+ count = 2; |
+ } else { |
+ vp9_mb_lpf_horizontal_edge_w(s, pitch, lfi->mblim, lfi->lim, |
+ lfi->hev_thr, 1); |
+ } |
+ } else if (mask_8x8 & 1) { |
+ if ((mask_8x8 & 3) == 3) { |
+ // Next block's thresholds |
+ const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1); |
+ |
+ vp9_mbloop_filter_horizontal_edge_16(s, pitch, lfi->mblim, |
+ lfi->lim, lfi->hev_thr, |
+ lfin->mblim, lfin->lim, |
+ lfin->hev_thr); |
+ |
+ if ((mask_4x4_int & 3) == 3) { |
+ vp9_loop_filter_horizontal_edge_16(s + 4 * pitch, pitch, lfi->mblim, |
+ lfi->lim, lfi->hev_thr, |
+ lfin->mblim, lfin->lim, |
+ lfin->hev_thr); |
} else { |
- vp9_mb_lpf_horizontal_edge_w(s, pitch, lfi->mblim, lfi->lim, |
- lfi->hev_thr, 1); |
+ if (mask_4x4_int & 1) |
+ vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim, |
+ lfi->lim, lfi->hev_thr, 1); |
+ else if (mask_4x4_int & 2) |
+ vp9_loop_filter_horizontal_edge(s + 8 + 4 * pitch, pitch, |
+ lfin->mblim, lfin->lim, |
+ lfin->hev_thr, 1); |
} |
- assert(!(mask_8x8 & 1)); |
- assert(!(mask_4x4 & 1)); |
- assert(!(mask_4x4_int & 1)); |
- } else if (mask_8x8 & 1) { |
+ count = 2; |
+ } else { |
vp9_mbloop_filter_horizontal_edge(s, pitch, lfi->mblim, lfi->lim, |
lfi->hev_thr, 1); |
- assert(!(mask_16x16 & 1)); |
- assert(!(mask_4x4 & 1)); |
- } else if (mask_4x4 & 1) { |
- vp9_loop_filter_horizontal_edge(s, pitch, lfi->mblim, lfi->lim, |
- lfi->hev_thr, 1); |
- assert(!(mask_16x16 & 1)); |
- assert(!(mask_8x8 & 1)); |
+ |
+ if (mask_4x4_int & 1) |
+ vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim, |
+ lfi->lim, lfi->hev_thr, 1); |
} |
- } |
+ } else if (mask_4x4 & 1) { |
+ if ((mask_4x4 & 3) == 3) { |
+ // Next block's thresholds |
+ const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1); |
- if (mask_4x4_int & 1) |
+ vp9_loop_filter_horizontal_edge_16(s, pitch, lfi->mblim, |
+ lfi->lim, lfi->hev_thr, |
+ lfin->mblim, lfin->lim, |
+ lfin->hev_thr); |
+ if ((mask_4x4_int & 3) == 3) { |
+ vp9_loop_filter_horizontal_edge_16(s + 4 * pitch, pitch, lfi->mblim, |
+ lfi->lim, lfi->hev_thr, |
+ lfin->mblim, lfin->lim, |
+ lfin->hev_thr); |
+ } else { |
+ if (mask_4x4_int & 1) |
+ vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim, |
+ lfi->lim, lfi->hev_thr, 1); |
+ else if (mask_4x4_int & 2) |
+ vp9_loop_filter_horizontal_edge(s + 8 + 4 * pitch, pitch, |
+ lfin->mblim, lfin->lim, |
+ lfin->hev_thr, 1); |
+ } |
+ count = 2; |
+ } else { |
+ vp9_loop_filter_horizontal_edge(s, pitch, lfi->mblim, lfi->lim, |
+ lfi->hev_thr, 1); |
+ |
+ if (mask_4x4_int & 1) |
+ vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim, |
+ lfi->lim, lfi->hev_thr, 1); |
+ } |
+ } else if (mask_4x4_int & 1) { |
vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim, |
lfi->lim, lfi->hev_thr, 1); |
+ } |
} |
s += 8 * count; |
- p_lfi += count; |
+ lfl += count; |
mask_16x16 >>= count; |
mask_8x8 >>= count; |
mask_4x4 >>= count; |
@@ -461,10 +551,20 @@ |
uint16_t *left_uv = &lfm->left_uv[tx_size_uv]; |
uint16_t *above_uv = &lfm->above_uv[tx_size_uv]; |
uint16_t *int_4x4_uv = &lfm->int_4x4_uv; |
+ int i; |
+ int w = num_8x8_blocks_wide_lookup[block_size]; |
+ int h = num_8x8_blocks_high_lookup[block_size]; |
// If filter level is 0 we don't loop filter. |
- if (!filter_level) |
+ if (!filter_level) { |
return; |
+ } else { |
+ int index = shift_y; |
+ for (i = 0; i < h; i++) { |
+ vpx_memset(&lfm->lfl_y[index], filter_level, w); |
+ index += 8; |
+ } |
+ } |
// These set 1 in the current block size for the block size edges. |
// For instance if the block size is 32x16, we'll set : |
@@ -530,9 +630,19 @@ |
uint64_t *left_y = &lfm->left_y[tx_size_y]; |
uint64_t *above_y = &lfm->above_y[tx_size_y]; |
uint64_t *int_4x4_y = &lfm->int_4x4_y; |
+ int i; |
+ int w = num_8x8_blocks_wide_lookup[block_size]; |
+ int h = num_8x8_blocks_high_lookup[block_size]; |
- if (!filter_level) |
+ if (!filter_level) { |
return; |
+ } else { |
+ int index = shift_y; |
+ for (i = 0; i < h; i++) { |
+ vpx_memset(&lfm->lfl_y[index], filter_level, w); |
+ index += 8; |
+ } |
+ } |
*above_y |= above_prediction_mask[block_size] << shift_y; |
*left_y |= left_prediction_mask[block_size] << shift_y; |
@@ -784,8 +894,74 @@ |
lfm->left_uv[i] &= 0xeeee; |
} |
} |
+ |
+ // Assert if we try to apply 2 different loop filters at the same position. |
+ assert(!(lfm->left_y[TX_16X16] & lfm->left_y[TX_8X8])); |
+ assert(!(lfm->left_y[TX_16X16] & lfm->left_y[TX_4X4])); |
+ assert(!(lfm->left_y[TX_8X8] & lfm->left_y[TX_4X4])); |
+ assert(!(lfm->int_4x4_y & lfm->left_y[TX_16X16])); |
+ assert(!(lfm->left_uv[TX_16X16]&lfm->left_uv[TX_8X8])); |
+ assert(!(lfm->left_uv[TX_16X16] & lfm->left_uv[TX_4X4])); |
+ assert(!(lfm->left_uv[TX_8X8] & lfm->left_uv[TX_4X4])); |
+ assert(!(lfm->int_4x4_uv & lfm->left_uv[TX_16X16])); |
+ assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_8X8])); |
+ assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_4X4])); |
+ assert(!(lfm->above_y[TX_8X8] & lfm->above_y[TX_4X4])); |
+ assert(!(lfm->int_4x4_y & lfm->above_y[TX_16X16])); |
+ assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_8X8])); |
+ assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_4X4])); |
+ assert(!(lfm->above_uv[TX_8X8] & lfm->above_uv[TX_4X4])); |
+ assert(!(lfm->int_4x4_uv & lfm->above_uv[TX_16X16])); |
} |
+ |
#if CONFIG_NON420 |
+static uint8_t build_lfi(const loop_filter_info_n *lfi_n, |
+ const MB_MODE_INFO *mbmi) { |
+ const int seg = mbmi->segment_id; |
+ const int ref = mbmi->ref_frame[0]; |
+ const int mode = lfi_n->mode_lf_lut[mbmi->mode]; |
+ const int filter_level = lfi_n->lvl[seg][ref][mode]; |
+ |
+ return filter_level; |
+} |
+ |
+static void filter_selectively_vert(uint8_t *s, int pitch, |
+ unsigned int mask_16x16, |
+ unsigned int mask_8x8, |
+ unsigned int mask_4x4, |
+ unsigned int mask_4x4_int, |
+ const loop_filter_info_n *lfi_n, |
+ const uint8_t *lfl) { |
+ unsigned int mask; |
+ |
+ for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; |
+ mask; mask >>= 1) { |
+ const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl; |
+ |
+ if (mask & 1) { |
+ if (mask_16x16 & 1) { |
+ vp9_mb_lpf_vertical_edge_w(s, pitch, lfi->mblim, lfi->lim, |
+ lfi->hev_thr); |
+ } else if (mask_8x8 & 1) { |
+ vp9_mbloop_filter_vertical_edge(s, pitch, lfi->mblim, lfi->lim, |
+ lfi->hev_thr, 1); |
+ } else if (mask_4x4 & 1) { |
+ vp9_loop_filter_vertical_edge(s, pitch, lfi->mblim, lfi->lim, |
+ lfi->hev_thr, 1); |
+ } |
+ } |
+ if (mask_4x4_int & 1) |
+ vp9_loop_filter_vertical_edge(s + 4, pitch, lfi->mblim, lfi->lim, |
+ lfi->hev_thr, 1); |
+ s += 8; |
+ lfl += 1; |
+ mask_16x16 >>= 1; |
+ mask_8x8 >>= 1; |
+ mask_4x4 >>= 1; |
+ mask_4x4_int >>= 1; |
+ } |
+} |
+ |
static void filter_block_plane_non420(VP9_COMMON *cm, |
struct macroblockd_plane *plane, |
MODE_INFO **mi_8x8, |
@@ -801,7 +977,7 @@ |
unsigned int mask_8x8[MI_BLOCK_SIZE] = {0}; |
unsigned int mask_4x4[MI_BLOCK_SIZE] = {0}; |
unsigned int mask_4x4_int[MI_BLOCK_SIZE] = {0}; |
- const loop_filter_thresh *lfi[MI_BLOCK_SIZE][MI_BLOCK_SIZE]; |
+ uint8_t lfl[MI_BLOCK_SIZE * MI_BLOCK_SIZE]; |
int r, c; |
for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += row_step) { |
@@ -830,7 +1006,8 @@ |
const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1; |
// Filter level can vary per MI |
- if (!build_lfi(&cm->lf_info, &mi[0].mbmi, &lfi[r][c >> ss_x])) |
+ if (!(lfl[(r << 3) + (c >> ss_x)] = |
+ build_lfi(&cm->lf_info, &mi[0].mbmi))) |
continue; |
// Build masks based on the transform size of each block |
@@ -887,7 +1064,8 @@ |
mask_16x16_c & border_mask, |
mask_8x8_c & border_mask, |
mask_4x4_c & border_mask, |
- mask_4x4_int[r], lfi[r]); |
+ mask_4x4_int[r], |
+ &cm->lf_info, &lfl[r << 3]); |
dst->buf += 8 * dst->stride; |
mi_8x8 += row_step_stride; |
} |
@@ -898,11 +1076,26 @@ |
const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1; |
const unsigned int mask_4x4_int_r = skip_border_4x4_r ? 0 : mask_4x4_int[r]; |
+ unsigned int mask_16x16_r; |
+ unsigned int mask_8x8_r; |
+ unsigned int mask_4x4_r; |
+ |
+ if (mi_row + r == 0) { |
+ mask_16x16_r = 0; |
+ mask_8x8_r = 0; |
+ mask_4x4_r = 0; |
+ } else { |
+ mask_16x16_r = mask_16x16[r]; |
+ mask_8x8_r = mask_8x8[r]; |
+ mask_4x4_r = mask_4x4[r]; |
+ } |
+ |
filter_selectively_horiz(dst->buf, dst->stride, |
- mask_16x16[r], |
- mask_8x8[r], |
- mask_4x4[r], |
- mask_4x4_int_r, mi_row + r == 0, lfi[r]); |
+ mask_16x16_r, |
+ mask_8x8_r, |
+ mask_4x4_r, |
+ mask_4x4_int_r, |
+ &cm->lf_info, &lfl[r << 3]); |
dst->buf += 8 * dst->stride; |
} |
} |
@@ -910,81 +1103,154 @@ |
static void filter_block_plane(VP9_COMMON *const cm, |
struct macroblockd_plane *const plane, |
- MODE_INFO **mi_8x8, |
- int mi_row, int mi_col, |
+ int mi_row, |
LOOP_FILTER_MASK *lfm) { |
- const int ss_x = plane->subsampling_x; |
- const int ss_y = plane->subsampling_y; |
- const int row_step = 1 << ss_x; |
- const int col_step = 1 << ss_y; |
- const int row_step_stride = cm->mode_info_stride * row_step; |
struct buf_2d *const dst = &plane->dst; |
uint8_t* const dst0 = dst->buf; |
- unsigned int mask_4x4_int[MI_BLOCK_SIZE] = {0}; |
- const loop_filter_thresh *lfi[MI_BLOCK_SIZE][MI_BLOCK_SIZE]; |
int r, c; |
- int row_shift = 3 - ss_x; |
- int row_mask = 0xff >> (ss_x << 2); |
-#define MASK_ROW(value) ((value >> (r_sampled << row_shift)) & row_mask) |
+ if (!plane->plane_type) { |
+ uint64_t mask_16x16 = lfm->left_y[TX_16X16]; |
+ uint64_t mask_8x8 = lfm->left_y[TX_8X8]; |
+ uint64_t mask_4x4 = lfm->left_y[TX_4X4]; |
+ uint64_t mask_4x4_int = lfm->int_4x4_y; |
- for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += row_step) { |
- int r_sampled = r >> ss_x; |
+ // Vertical pass: do 2 rows at one time |
+ for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) { |
+ unsigned int mask_16x16_l = mask_16x16 & 0xffff; |
+ unsigned int mask_8x8_l = mask_8x8 & 0xffff; |
+ unsigned int mask_4x4_l = mask_4x4 & 0xffff; |
+ unsigned int mask_4x4_int_l = mask_4x4_int & 0xffff; |
- // Determine the vertical edges that need filtering |
- for (c = 0; c < MI_BLOCK_SIZE && mi_col + c < cm->mi_cols; c += col_step) { |
- const MODE_INFO *mi = mi_8x8[c]; |
+ // Disable filtering on the leftmost column |
+ filter_selectively_vert_row2(plane->plane_type, |
+ dst->buf, dst->stride, |
+ mask_16x16_l, |
+ mask_8x8_l, |
+ mask_4x4_l, |
+ mask_4x4_int_l, |
+ &cm->lf_info, &lfm->lfl_y[r << 3]); |
- build_lfi(&cm->lf_info, &mi[0].mbmi, &lfi[r][c >> ss_x]); |
+ dst->buf += 16 * dst->stride; |
+ mask_16x16 >>= 16; |
+ mask_8x8 >>= 16; |
+ mask_4x4 >>= 16; |
+ mask_4x4_int >>= 16; |
} |
- if (!plane->plane_type) { |
- mask_4x4_int[r] = MASK_ROW(lfm->int_4x4_y); |
- // Disable filtering on the leftmost column |
- filter_selectively_vert(dst->buf, dst->stride, |
- MASK_ROW(lfm->left_y[TX_16X16]), |
- MASK_ROW(lfm->left_y[TX_8X8]), |
- MASK_ROW(lfm->left_y[TX_4X4]), |
- MASK_ROW(lfm->int_4x4_y), |
- lfi[r]); |
- } else { |
- mask_4x4_int[r] = MASK_ROW(lfm->int_4x4_uv); |
- // Disable filtering on the leftmost column |
- filter_selectively_vert(dst->buf, dst->stride, |
- MASK_ROW(lfm->left_uv[TX_16X16]), |
- MASK_ROW(lfm->left_uv[TX_8X8]), |
- MASK_ROW(lfm->left_uv[TX_4X4]), |
- MASK_ROW(lfm->int_4x4_uv), |
- lfi[r]); |
+ |
+ // Horizontal pass |
+ dst->buf = dst0; |
+ mask_16x16 = lfm->above_y[TX_16X16]; |
+ mask_8x8 = lfm->above_y[TX_8X8]; |
+ mask_4x4 = lfm->above_y[TX_4X4]; |
+ mask_4x4_int = lfm->int_4x4_y; |
+ |
+ for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r++) { |
+ unsigned int mask_16x16_r; |
+ unsigned int mask_8x8_r; |
+ unsigned int mask_4x4_r; |
+ |
+ if (mi_row + r == 0) { |
+ mask_16x16_r = 0; |
+ mask_8x8_r = 0; |
+ mask_4x4_r = 0; |
+ } else { |
+ mask_16x16_r = mask_16x16 & 0xff; |
+ mask_8x8_r = mask_8x8 & 0xff; |
+ mask_4x4_r = mask_4x4 & 0xff; |
+ } |
+ |
+ filter_selectively_horiz(dst->buf, dst->stride, |
+ mask_16x16_r, |
+ mask_8x8_r, |
+ mask_4x4_r, |
+ mask_4x4_int & 0xff, |
+ &cm->lf_info, &lfm->lfl_y[r << 3]); |
+ |
+ dst->buf += 8 * dst->stride; |
+ mask_16x16 >>= 8; |
+ mask_8x8 >>= 8; |
+ mask_4x4 >>= 8; |
+ mask_4x4_int >>= 8; |
} |
- dst->buf += 8 * dst->stride; |
- mi_8x8 += row_step_stride; |
- } |
+ } else { |
+ uint16_t mask_16x16 = lfm->left_uv[TX_16X16]; |
+ uint16_t mask_8x8 = lfm->left_uv[TX_8X8]; |
+ uint16_t mask_4x4 = lfm->left_uv[TX_4X4]; |
+ uint16_t mask_4x4_int = lfm->int_4x4_uv; |
- // Now do horizontal pass |
- dst->buf = dst0; |
- for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += row_step) { |
- const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1; |
- const unsigned int mask_4x4_int_r = skip_border_4x4_r ? 0 : mask_4x4_int[r]; |
- int r_sampled = r >> ss_x; |
+ // Vertical pass: do 2 rows at one time |
+ for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 4) { |
+ if (plane->plane_type == 1) { |
+ for (c = 0; c < (MI_BLOCK_SIZE >> 1); c++) { |
+ lfm->lfl_uv[(r << 1) + c] = lfm->lfl_y[(r << 3) + (c << 1)]; |
+ lfm->lfl_uv[((r + 2) << 1) + c] = lfm->lfl_y[((r + 2) << 3) + |
+ (c << 1)]; |
+ } |
+ } |
- if (!plane->plane_type) { |
+ { |
+ unsigned int mask_16x16_l = mask_16x16 & 0xff; |
+ unsigned int mask_8x8_l = mask_8x8 & 0xff; |
+ unsigned int mask_4x4_l = mask_4x4 & 0xff; |
+ unsigned int mask_4x4_int_l = mask_4x4_int & 0xff; |
+ |
+ // Disable filtering on the leftmost column |
+ filter_selectively_vert_row2(plane->plane_type, |
+ dst->buf, dst->stride, |
+ mask_16x16_l, |
+ mask_8x8_l, |
+ mask_4x4_l, |
+ mask_4x4_int_l, |
+ &cm->lf_info, &lfm->lfl_uv[r << 1]); |
+ |
+ dst->buf += 16 * dst->stride; |
+ mask_16x16 >>= 8; |
+ mask_8x8 >>= 8; |
+ mask_4x4 >>= 8; |
+ mask_4x4_int >>= 8; |
+ } |
+ } |
+ |
+ // Horizontal pass |
+ dst->buf = dst0; |
+ mask_16x16 = lfm->above_uv[TX_16X16]; |
+ mask_8x8 = lfm->above_uv[TX_8X8]; |
+ mask_4x4 = lfm->above_uv[TX_4X4]; |
+ mask_4x4_int = lfm->int_4x4_uv; |
+ |
+ for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) { |
+ const int skip_border_4x4_r = mi_row + r == cm->mi_rows - 1; |
+ const unsigned int mask_4x4_int_r = skip_border_4x4_r ? |
+ 0 : (mask_4x4_int & 0xf); |
+ unsigned int mask_16x16_r; |
+ unsigned int mask_8x8_r; |
+ unsigned int mask_4x4_r; |
+ |
+ if (mi_row + r == 0) { |
+ mask_16x16_r = 0; |
+ mask_8x8_r = 0; |
+ mask_4x4_r = 0; |
+ } else { |
+ mask_16x16_r = mask_16x16 & 0xf; |
+ mask_8x8_r = mask_8x8 & 0xf; |
+ mask_4x4_r = mask_4x4 & 0xf; |
+ } |
+ |
filter_selectively_horiz(dst->buf, dst->stride, |
- MASK_ROW(lfm->above_y[TX_16X16]), |
- MASK_ROW(lfm->above_y[TX_8X8]), |
- MASK_ROW(lfm->above_y[TX_4X4]), |
- MASK_ROW(lfm->int_4x4_y), |
- mi_row + r == 0, lfi[r]); |
- } else { |
- filter_selectively_horiz(dst->buf, dst->stride, |
- MASK_ROW(lfm->above_uv[TX_16X16]), |
- MASK_ROW(lfm->above_uv[TX_8X8]), |
- MASK_ROW(lfm->above_uv[TX_4X4]), |
+ mask_16x16_r, |
+ mask_8x8_r, |
+ mask_4x4_r, |
mask_4x4_int_r, |
- mi_row + r == 0, lfi[r]); |
+ &cm->lf_info, &lfm->lfl_uv[r << 1]); |
+ |
+ dst->buf += 8 * dst->stride; |
+ mask_16x16 >>= 4; |
+ mask_8x8 >>= 4; |
+ mask_4x4 >>= 4; |
+ mask_4x4_int >>= 4; |
} |
- dst->buf += 8 * dst->stride; |
} |
-#undef MASK_ROW |
} |
void vp9_loop_filter_rows(const YV12_BUFFER_CONFIG *frame_buffer, |
@@ -1017,8 +1283,7 @@ |
#if CONFIG_NON420 |
if (use_420) |
#endif |
- filter_block_plane(cm, &xd->plane[plane], mi_8x8 + mi_col, mi_row, |
- mi_col, &lfm); |
+ filter_block_plane(cm, &xd->plane[plane], mi_row, &lfm); |
#if CONFIG_NON420 |
else |
filter_block_plane_non420(cm, &xd->plane[plane], mi_8x8 + mi_col, |