| Index: source/libvpx/vp9/common/vp9_loopfilter.c | 
| =================================================================== | 
| --- source/libvpx/vp9/common/vp9_loopfilter.c	(revision 240950) | 
| +++ source/libvpx/vp9/common/vp9_loopfilter.c	(working copy) | 
| @@ -32,6 +32,8 @@ | 
| uint16_t left_uv[TX_SIZES]; | 
| uint16_t above_uv[TX_SIZES]; | 
| uint16_t int_4x4_uv; | 
| +  uint8_t lfl_y[64]; | 
| +  uint8_t lfl_uv[16]; | 
| } LOOP_FILTER_MASK; | 
|  | 
| // 64 bit masks for left transform size.  Each 1 represents a position where | 
| @@ -281,10 +283,10 @@ | 
| // n_shift is the a multiplier for lf_deltas | 
| // the multiplier is 1 for when filter_lvl is between 0 and 31; | 
| // 2 when filter_lvl is between 32 and 63 | 
| -  const int n_shift = default_filt_lvl >> 5; | 
| +  const int scale = 1 << (default_filt_lvl >> 5); | 
| loop_filter_info_n *const lfi = &cm->lf_info; | 
| struct loopfilter *const lf = &cm->lf; | 
| -  struct segmentation *const seg = &cm->seg; | 
| +  const struct segmentation *const seg = &cm->seg; | 
|  | 
| // update limits if sharpness has changed | 
| if (lf->last_sharpness_level != lf->sharpness_level) { | 
| @@ -293,9 +295,7 @@ | 
| } | 
|  | 
| for (seg_id = 0; seg_id < MAX_SEGMENTS; seg_id++) { | 
| -    int lvl_seg = default_filt_lvl, ref, mode, intra_lvl; | 
| - | 
| -    // Set the baseline filter values for each segment | 
| +    int lvl_seg = default_filt_lvl; | 
| if (vp9_segfeature_active(seg, seg_id, SEG_LVL_ALT_LF)) { | 
| const int data = vp9_get_segdata(seg, seg_id, SEG_LVL_ALT_LF); | 
| lvl_seg = seg->abs_delta == SEGMENT_ABSDATA | 
| @@ -307,77 +307,118 @@ | 
| // we could get rid of this if we assume that deltas are set to | 
| // zero when not in use; encoder always uses deltas | 
| vpx_memset(lfi->lvl[seg_id], lvl_seg, sizeof(lfi->lvl[seg_id])); | 
| -      continue; | 
| -    } | 
| +    } else { | 
| +      int ref, mode; | 
| +      const int intra_lvl = lvl_seg + lf->ref_deltas[INTRA_FRAME] * scale; | 
| +      lfi->lvl[seg_id][INTRA_FRAME][0] = clamp(intra_lvl, 0, MAX_LOOP_FILTER); | 
|  | 
| -    intra_lvl = lvl_seg + lf->ref_deltas[INTRA_FRAME] * (1 << n_shift); | 
| -    lfi->lvl[seg_id][INTRA_FRAME][0] = clamp(intra_lvl, 0, MAX_LOOP_FILTER); | 
| - | 
| -    for (ref = LAST_FRAME; ref < MAX_REF_FRAMES; ++ref) | 
| -      for (mode = 0; mode < MAX_MODE_LF_DELTAS; ++mode) { | 
| -        const int inter_lvl = lvl_seg + lf->ref_deltas[ref] * (1 << n_shift) | 
| -                                      + lf->mode_deltas[mode] * (1 << n_shift); | 
| -        lfi->lvl[seg_id][ref][mode] = clamp(inter_lvl, 0, MAX_LOOP_FILTER); | 
| +      for (ref = LAST_FRAME; ref < MAX_REF_FRAMES; ++ref) { | 
| +        for (mode = 0; mode < MAX_MODE_LF_DELTAS; ++mode) { | 
| +          const int inter_lvl = lvl_seg + lf->ref_deltas[ref] * scale | 
| +                                        + lf->mode_deltas[mode] * scale; | 
| +          lfi->lvl[seg_id][ref][mode] = clamp(inter_lvl, 0, MAX_LOOP_FILTER); | 
| +        } | 
| } | 
| +    } | 
| } | 
| } | 
|  | 
| -static int build_lfi(const loop_filter_info_n *lfi_n, | 
| -                     const MB_MODE_INFO *mbmi, | 
| -                     const loop_filter_thresh **lfi) { | 
| -  const int seg = mbmi->segment_id; | 
| -  const int ref = mbmi->ref_frame[0]; | 
| -  const int mode = lfi_n->mode_lf_lut[mbmi->mode]; | 
| -  const int filter_level = lfi_n->lvl[seg][ref][mode]; | 
| +static void filter_selectively_vert_row2(PLANE_TYPE plane_type, | 
| +                                         uint8_t *s, int pitch, | 
| +                                         unsigned int mask_16x16_l, | 
| +                                         unsigned int mask_8x8_l, | 
| +                                         unsigned int mask_4x4_l, | 
| +                                         unsigned int mask_4x4_int_l, | 
| +                                         const loop_filter_info_n *lfi_n, | 
| +                                         const uint8_t *lfl) { | 
| +  const int mask_shift = plane_type ? 4 : 8; | 
| +  const int mask_cutoff = plane_type ? 0xf : 0xff; | 
| +  const int lfl_forward = plane_type ? 4 : 8; | 
|  | 
| -  if (filter_level > 0) { | 
| -    *lfi = &lfi_n->lfthr[filter_level]; | 
| -    return 1; | 
| -  } else { | 
| -    return 0; | 
| -  } | 
| -} | 
| - | 
| -static void filter_selectively_vert(uint8_t *s, int pitch, | 
| -                                    unsigned int mask_16x16, | 
| -                                    unsigned int mask_8x8, | 
| -                                    unsigned int mask_4x4, | 
| -                                    unsigned int mask_4x4_int, | 
| -                                    const loop_filter_thresh **p_lfi) { | 
| +  unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff; | 
| +  unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff; | 
| +  unsigned int mask_4x4_0 = mask_4x4_l & mask_cutoff; | 
| +  unsigned int mask_4x4_int_0 = mask_4x4_int_l & mask_cutoff; | 
| +  unsigned int mask_16x16_1 = (mask_16x16_l >> mask_shift) & mask_cutoff; | 
| +  unsigned int mask_8x8_1 = (mask_8x8_l >> mask_shift) & mask_cutoff; | 
| +  unsigned int mask_4x4_1 = (mask_4x4_l >> mask_shift) & mask_cutoff; | 
| +  unsigned int mask_4x4_int_1 = (mask_4x4_int_l >> mask_shift) & mask_cutoff; | 
| unsigned int mask; | 
|  | 
| -  for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; | 
| -       mask; mask >>= 1) { | 
| -    const loop_filter_thresh *lfi = *p_lfi; | 
| +  for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_4x4_int_0 | | 
| +      mask_16x16_1 | mask_8x8_1 | mask_4x4_1 | mask_4x4_int_1; | 
| +      mask; mask >>= 1) { | 
| +    const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl; | 
| +    const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward); | 
|  | 
| +    // TODO(yunqingwang): count in loopfilter functions should be removed. | 
| if (mask & 1) { | 
| -      if (mask_16x16 & 1) { | 
| -        vp9_mb_lpf_vertical_edge_w(s, pitch, lfi->mblim, lfi->lim, | 
| -                                   lfi->hev_thr); | 
| -        assert(!(mask_8x8 & 1)); | 
| -        assert(!(mask_4x4 & 1)); | 
| -        assert(!(mask_4x4_int & 1)); | 
| -      } else if (mask_8x8 & 1) { | 
| -        vp9_mbloop_filter_vertical_edge(s, pitch, lfi->mblim, lfi->lim, | 
| -                                        lfi->hev_thr, 1); | 
| -        assert(!(mask_16x16 & 1)); | 
| -        assert(!(mask_4x4 & 1)); | 
| -      } else if (mask_4x4 & 1) { | 
| -        vp9_loop_filter_vertical_edge(s, pitch, lfi->mblim, lfi->lim, | 
| -                                      lfi->hev_thr, 1); | 
| -        assert(!(mask_16x16 & 1)); | 
| -        assert(!(mask_8x8 & 1)); | 
| +      if ((mask_16x16_0 | mask_16x16_1) & 1) { | 
| +        if ((mask_16x16_0 & mask_16x16_1) & 1) { | 
| +          vp9_mb_lpf_vertical_edge_w_16(s, pitch, lfi0->mblim, lfi0->lim, | 
| +                                     lfi0->hev_thr); | 
| +        } else if (mask_16x16_0 & 1) { | 
| +          vp9_mb_lpf_vertical_edge_w(s, pitch, lfi0->mblim, lfi0->lim, | 
| +                                     lfi0->hev_thr); | 
| +        } else { | 
| +          vp9_mb_lpf_vertical_edge_w(s + 8 *pitch, pitch, lfi1->mblim, | 
| +                                     lfi1->lim, lfi1->hev_thr); | 
| +        } | 
| } | 
| + | 
| +      if ((mask_8x8_0 | mask_8x8_1) & 1) { | 
| +        if ((mask_8x8_0 & mask_8x8_1) & 1) { | 
| +          vp9_mbloop_filter_vertical_edge_16(s, pitch, lfi0->mblim, lfi0->lim, | 
| +                                          lfi0->hev_thr, lfi1->mblim, | 
| +                                          lfi1->lim, lfi1->hev_thr); | 
| +        } else if (mask_8x8_0 & 1) { | 
| +          vp9_mbloop_filter_vertical_edge(s, pitch, lfi0->mblim, lfi0->lim, | 
| +                                          lfi0->hev_thr, 1); | 
| +        } else { | 
| +          vp9_mbloop_filter_vertical_edge(s + 8 *pitch, pitch, lfi1->mblim, | 
| +                                          lfi1->lim, lfi1->hev_thr, 1); | 
| +        } | 
| +      } | 
| + | 
| +      if ((mask_4x4_0 | mask_4x4_1) & 1) { | 
| +        if ((mask_4x4_0 & mask_4x4_1) & 1) { | 
| +          vp9_loop_filter_vertical_edge_16(s, pitch, lfi0->mblim, lfi0->lim, | 
| +                                        lfi0->hev_thr, lfi1->mblim, | 
| +                                        lfi1->lim, lfi1->hev_thr); | 
| +        } else if (mask_4x4_0 & 1) { | 
| +          vp9_loop_filter_vertical_edge(s, pitch, lfi0->mblim, lfi0->lim, | 
| +                                        lfi0->hev_thr, 1); | 
| +        } else { | 
| +          vp9_loop_filter_vertical_edge(s + 8 *pitch, pitch, lfi1->mblim, | 
| +                                        lfi1->lim, lfi1->hev_thr, 1); | 
| +        } | 
| +      } | 
| + | 
| +      if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) { | 
| +        if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) { | 
| +          vp9_loop_filter_vertical_edge_16(s + 4, pitch, lfi0->mblim, lfi0->lim, | 
| +                                        lfi0->hev_thr, lfi1->mblim, | 
| +                                        lfi1->lim, lfi1->hev_thr); | 
| +        } else if (mask_4x4_int_0 & 1) { | 
| +          vp9_loop_filter_vertical_edge(s + 4, pitch, lfi0->mblim, lfi0->lim, | 
| +                                        lfi0->hev_thr, 1); | 
| +        } else { | 
| +          vp9_loop_filter_vertical_edge(s + 8 *pitch + 4, pitch, lfi1->mblim, | 
| +                                        lfi1->lim, lfi1->hev_thr, 1); | 
| +        } | 
| +      } | 
| } | 
| -    if (mask_4x4_int & 1) | 
| -      vp9_loop_filter_vertical_edge(s + 4, pitch, lfi->mblim, lfi->lim, | 
| -                                    lfi->hev_thr, 1); | 
| + | 
| s += 8; | 
| -    p_lfi++; | 
| -    mask_16x16 >>= 1; | 
| -    mask_8x8 >>= 1; | 
| -    mask_4x4 >>= 1; | 
| -    mask_4x4_int >>= 1; | 
| +    lfl += 1; | 
| +    mask_16x16_0 >>= 1; | 
| +    mask_8x8_0 >>= 1; | 
| +    mask_4x4_0 >>= 1; | 
| +    mask_4x4_int_0 >>= 1; | 
| +    mask_16x16_1 >>= 1; | 
| +    mask_8x8_1 >>= 1; | 
| +    mask_4x4_1 >>= 1; | 
| +    mask_4x4_int_1 >>= 1; | 
| } | 
| } | 
|  | 
| @@ -386,49 +427,98 @@ | 
| unsigned int mask_8x8, | 
| unsigned int mask_4x4, | 
| unsigned int mask_4x4_int, | 
| -                                     int only_4x4_1, | 
| -                                     const loop_filter_thresh **p_lfi) { | 
| +                                     const loop_filter_info_n *lfi_n, | 
| +                                     const uint8_t *lfl) { | 
| unsigned int mask; | 
| int count; | 
|  | 
| for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; | 
| mask; mask >>= count) { | 
| -    const loop_filter_thresh *lfi = *p_lfi; | 
| +    const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl; | 
|  | 
| count = 1; | 
| if (mask & 1) { | 
| -      if (!only_4x4_1) { | 
| -        if (mask_16x16 & 1) { | 
| -          if ((mask_16x16 & 3) == 3) { | 
| -            vp9_mb_lpf_horizontal_edge_w(s, pitch, lfi->mblim, lfi->lim, | 
| -                                         lfi->hev_thr, 2); | 
| -            count = 2; | 
| +      if (mask_16x16 & 1) { | 
| +        if ((mask_16x16 & 3) == 3) { | 
| +          vp9_mb_lpf_horizontal_edge_w(s, pitch, lfi->mblim, lfi->lim, | 
| +                                       lfi->hev_thr, 2); | 
| +          count = 2; | 
| +        } else { | 
| +          vp9_mb_lpf_horizontal_edge_w(s, pitch, lfi->mblim, lfi->lim, | 
| +                                       lfi->hev_thr, 1); | 
| +        } | 
| +      } else if (mask_8x8 & 1) { | 
| +        if ((mask_8x8 & 3) == 3) { | 
| +          // Next block's thresholds | 
| +          const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1); | 
| + | 
| +          vp9_mbloop_filter_horizontal_edge_16(s, pitch, lfi->mblim, | 
| +                                               lfi->lim, lfi->hev_thr, | 
| +                                               lfin->mblim, lfin->lim, | 
| +                                               lfin->hev_thr); | 
| + | 
| +          if ((mask_4x4_int & 3) == 3) { | 
| +            vp9_loop_filter_horizontal_edge_16(s + 4 * pitch, pitch, lfi->mblim, | 
| +                                               lfi->lim, lfi->hev_thr, | 
| +                                               lfin->mblim, lfin->lim, | 
| +                                               lfin->hev_thr); | 
| } else { | 
| -            vp9_mb_lpf_horizontal_edge_w(s, pitch, lfi->mblim, lfi->lim, | 
| -                                         lfi->hev_thr, 1); | 
| +            if (mask_4x4_int & 1) | 
| +              vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim, | 
| +                                              lfi->lim, lfi->hev_thr, 1); | 
| +            else if (mask_4x4_int & 2) | 
| +              vp9_loop_filter_horizontal_edge(s + 8 + 4 * pitch, pitch, | 
| +                                              lfin->mblim, lfin->lim, | 
| +                                              lfin->hev_thr, 1); | 
| } | 
| -          assert(!(mask_8x8 & 1)); | 
| -          assert(!(mask_4x4 & 1)); | 
| -          assert(!(mask_4x4_int & 1)); | 
| -        } else if (mask_8x8 & 1) { | 
| +          count = 2; | 
| +        } else { | 
| vp9_mbloop_filter_horizontal_edge(s, pitch, lfi->mblim, lfi->lim, | 
| lfi->hev_thr, 1); | 
| -          assert(!(mask_16x16 & 1)); | 
| -          assert(!(mask_4x4 & 1)); | 
| -        } else if (mask_4x4 & 1) { | 
| -          vp9_loop_filter_horizontal_edge(s, pitch, lfi->mblim, lfi->lim, | 
| -                                          lfi->hev_thr, 1); | 
| -          assert(!(mask_16x16 & 1)); | 
| -          assert(!(mask_8x8 & 1)); | 
| + | 
| +          if (mask_4x4_int & 1) | 
| +            vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim, | 
| +                                            lfi->lim, lfi->hev_thr, 1); | 
| } | 
| -      } | 
| +      } else if (mask_4x4 & 1) { | 
| +        if ((mask_4x4 & 3) == 3) { | 
| +          // Next block's thresholds | 
| +          const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1); | 
|  | 
| -      if (mask_4x4_int & 1) | 
| +          vp9_loop_filter_horizontal_edge_16(s, pitch, lfi->mblim, | 
| +                                             lfi->lim, lfi->hev_thr, | 
| +                                             lfin->mblim, lfin->lim, | 
| +                                             lfin->hev_thr); | 
| +          if ((mask_4x4_int & 3) == 3) { | 
| +            vp9_loop_filter_horizontal_edge_16(s + 4 * pitch, pitch, lfi->mblim, | 
| +                                               lfi->lim, lfi->hev_thr, | 
| +                                               lfin->mblim, lfin->lim, | 
| +                                               lfin->hev_thr); | 
| +          } else { | 
| +            if (mask_4x4_int & 1) | 
| +              vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim, | 
| +                                              lfi->lim, lfi->hev_thr, 1); | 
| +            else if (mask_4x4_int & 2) | 
| +              vp9_loop_filter_horizontal_edge(s + 8 + 4 * pitch, pitch, | 
| +                                              lfin->mblim, lfin->lim, | 
| +                                              lfin->hev_thr, 1); | 
| +          } | 
| +          count = 2; | 
| +        } else { | 
| +        vp9_loop_filter_horizontal_edge(s, pitch, lfi->mblim, lfi->lim, | 
| +                                        lfi->hev_thr, 1); | 
| + | 
| +        if (mask_4x4_int & 1) | 
| +          vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim, | 
| +                                          lfi->lim, lfi->hev_thr, 1); | 
| +        } | 
| +      } else if (mask_4x4_int & 1) { | 
| vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim, | 
| lfi->lim, lfi->hev_thr, 1); | 
| +      } | 
| } | 
| s += 8 * count; | 
| -    p_lfi += count; | 
| +    lfl += count; | 
| mask_16x16 >>= count; | 
| mask_8x8 >>= count; | 
| mask_4x4 >>= count; | 
| @@ -461,10 +551,20 @@ | 
| uint16_t *left_uv = &lfm->left_uv[tx_size_uv]; | 
| uint16_t *above_uv = &lfm->above_uv[tx_size_uv]; | 
| uint16_t *int_4x4_uv = &lfm->int_4x4_uv; | 
| +  int i; | 
| +  int w = num_8x8_blocks_wide_lookup[block_size]; | 
| +  int h = num_8x8_blocks_high_lookup[block_size]; | 
|  | 
| // If filter level is 0 we don't loop filter. | 
| -  if (!filter_level) | 
| +  if (!filter_level) { | 
| return; | 
| +  } else { | 
| +    int index = shift_y; | 
| +    for (i = 0; i < h; i++) { | 
| +      vpx_memset(&lfm->lfl_y[index], filter_level, w); | 
| +      index += 8; | 
| +    } | 
| +  } | 
|  | 
| // These set 1 in the current block size for the block size edges. | 
| // For instance if the block size is 32x16,   we'll set : | 
| @@ -530,9 +630,19 @@ | 
| uint64_t *left_y = &lfm->left_y[tx_size_y]; | 
| uint64_t *above_y = &lfm->above_y[tx_size_y]; | 
| uint64_t *int_4x4_y = &lfm->int_4x4_y; | 
| +  int i; | 
| +  int w = num_8x8_blocks_wide_lookup[block_size]; | 
| +  int h = num_8x8_blocks_high_lookup[block_size]; | 
|  | 
| -  if (!filter_level) | 
| +  if (!filter_level) { | 
| return; | 
| +  } else { | 
| +    int index = shift_y; | 
| +    for (i = 0; i < h; i++) { | 
| +      vpx_memset(&lfm->lfl_y[index], filter_level, w); | 
| +      index += 8; | 
| +    } | 
| +  } | 
|  | 
| *above_y |= above_prediction_mask[block_size] << shift_y; | 
| *left_y |= left_prediction_mask[block_size] << shift_y; | 
| @@ -784,8 +894,74 @@ | 
| lfm->left_uv[i] &= 0xeeee; | 
| } | 
| } | 
| + | 
| +  // Assert if we try to apply 2 different loop filters at the same position. | 
| +  assert(!(lfm->left_y[TX_16X16] & lfm->left_y[TX_8X8])); | 
| +  assert(!(lfm->left_y[TX_16X16] & lfm->left_y[TX_4X4])); | 
| +  assert(!(lfm->left_y[TX_8X8] & lfm->left_y[TX_4X4])); | 
| +  assert(!(lfm->int_4x4_y & lfm->left_y[TX_16X16])); | 
| +  assert(!(lfm->left_uv[TX_16X16]&lfm->left_uv[TX_8X8])); | 
| +  assert(!(lfm->left_uv[TX_16X16] & lfm->left_uv[TX_4X4])); | 
| +  assert(!(lfm->left_uv[TX_8X8] & lfm->left_uv[TX_4X4])); | 
| +  assert(!(lfm->int_4x4_uv & lfm->left_uv[TX_16X16])); | 
| +  assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_8X8])); | 
| +  assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_4X4])); | 
| +  assert(!(lfm->above_y[TX_8X8] & lfm->above_y[TX_4X4])); | 
| +  assert(!(lfm->int_4x4_y & lfm->above_y[TX_16X16])); | 
| +  assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_8X8])); | 
| +  assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_4X4])); | 
| +  assert(!(lfm->above_uv[TX_8X8] & lfm->above_uv[TX_4X4])); | 
| +  assert(!(lfm->int_4x4_uv & lfm->above_uv[TX_16X16])); | 
| } | 
| + | 
| #if CONFIG_NON420 | 
| +static uint8_t build_lfi(const loop_filter_info_n *lfi_n, | 
| +                     const MB_MODE_INFO *mbmi) { | 
| +  const int seg = mbmi->segment_id; | 
| +  const int ref = mbmi->ref_frame[0]; | 
| +  const int mode = lfi_n->mode_lf_lut[mbmi->mode]; | 
| +  const int filter_level = lfi_n->lvl[seg][ref][mode]; | 
| + | 
| +  return filter_level; | 
| +} | 
| + | 
| +static void filter_selectively_vert(uint8_t *s, int pitch, | 
| +                                    unsigned int mask_16x16, | 
| +                                    unsigned int mask_8x8, | 
| +                                    unsigned int mask_4x4, | 
| +                                    unsigned int mask_4x4_int, | 
| +                                    const loop_filter_info_n *lfi_n, | 
| +                                    const uint8_t *lfl) { | 
| +  unsigned int mask; | 
| + | 
| +  for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; | 
| +       mask; mask >>= 1) { | 
| +    const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl; | 
| + | 
| +    if (mask & 1) { | 
| +      if (mask_16x16 & 1) { | 
| +        vp9_mb_lpf_vertical_edge_w(s, pitch, lfi->mblim, lfi->lim, | 
| +                                   lfi->hev_thr); | 
| +      } else if (mask_8x8 & 1) { | 
| +        vp9_mbloop_filter_vertical_edge(s, pitch, lfi->mblim, lfi->lim, | 
| +                                        lfi->hev_thr, 1); | 
| +      } else if (mask_4x4 & 1) { | 
| +        vp9_loop_filter_vertical_edge(s, pitch, lfi->mblim, lfi->lim, | 
| +                                      lfi->hev_thr, 1); | 
| +      } | 
| +    } | 
| +    if (mask_4x4_int & 1) | 
| +      vp9_loop_filter_vertical_edge(s + 4, pitch, lfi->mblim, lfi->lim, | 
| +                                    lfi->hev_thr, 1); | 
| +    s += 8; | 
| +    lfl += 1; | 
| +    mask_16x16 >>= 1; | 
| +    mask_8x8 >>= 1; | 
| +    mask_4x4 >>= 1; | 
| +    mask_4x4_int >>= 1; | 
| +  } | 
| +} | 
| + | 
| static void filter_block_plane_non420(VP9_COMMON *cm, | 
| struct macroblockd_plane *plane, | 
| MODE_INFO **mi_8x8, | 
| @@ -801,7 +977,7 @@ | 
| unsigned int mask_8x8[MI_BLOCK_SIZE] = {0}; | 
| unsigned int mask_4x4[MI_BLOCK_SIZE] = {0}; | 
| unsigned int mask_4x4_int[MI_BLOCK_SIZE] = {0}; | 
| -  const loop_filter_thresh *lfi[MI_BLOCK_SIZE][MI_BLOCK_SIZE]; | 
| +  uint8_t lfl[MI_BLOCK_SIZE * MI_BLOCK_SIZE]; | 
| int r, c; | 
|  | 
| for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += row_step) { | 
| @@ -830,7 +1006,8 @@ | 
| const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1; | 
|  | 
| // Filter level can vary per MI | 
| -      if (!build_lfi(&cm->lf_info, &mi[0].mbmi, &lfi[r][c >> ss_x])) | 
| +      if (!(lfl[(r << 3) + (c >> ss_x)] = | 
| +          build_lfi(&cm->lf_info, &mi[0].mbmi))) | 
| continue; | 
|  | 
| // Build masks based on the transform size of each block | 
| @@ -887,7 +1064,8 @@ | 
| mask_16x16_c & border_mask, | 
| mask_8x8_c & border_mask, | 
| mask_4x4_c & border_mask, | 
| -                            mask_4x4_int[r], lfi[r]); | 
| +                            mask_4x4_int[r], | 
| +                            &cm->lf_info, &lfl[r << 3]); | 
| dst->buf += 8 * dst->stride; | 
| mi_8x8 += row_step_stride; | 
| } | 
| @@ -898,11 +1076,26 @@ | 
| const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1; | 
| const unsigned int mask_4x4_int_r = skip_border_4x4_r ? 0 : mask_4x4_int[r]; | 
|  | 
| +    unsigned int mask_16x16_r; | 
| +    unsigned int mask_8x8_r; | 
| +    unsigned int mask_4x4_r; | 
| + | 
| +    if (mi_row + r == 0) { | 
| +      mask_16x16_r = 0; | 
| +      mask_8x8_r = 0; | 
| +      mask_4x4_r = 0; | 
| +    } else { | 
| +      mask_16x16_r = mask_16x16[r]; | 
| +      mask_8x8_r = mask_8x8[r]; | 
| +      mask_4x4_r = mask_4x4[r]; | 
| +    } | 
| + | 
| filter_selectively_horiz(dst->buf, dst->stride, | 
| -                             mask_16x16[r], | 
| -                             mask_8x8[r], | 
| -                             mask_4x4[r], | 
| -                             mask_4x4_int_r, mi_row + r == 0, lfi[r]); | 
| +                             mask_16x16_r, | 
| +                             mask_8x8_r, | 
| +                             mask_4x4_r, | 
| +                             mask_4x4_int_r, | 
| +                             &cm->lf_info, &lfl[r << 3]); | 
| dst->buf += 8 * dst->stride; | 
| } | 
| } | 
| @@ -910,81 +1103,154 @@ | 
|  | 
| static void filter_block_plane(VP9_COMMON *const cm, | 
| struct macroblockd_plane *const plane, | 
| -                               MODE_INFO **mi_8x8, | 
| -                               int mi_row, int mi_col, | 
| +                               int mi_row, | 
| LOOP_FILTER_MASK *lfm) { | 
| -  const int ss_x = plane->subsampling_x; | 
| -  const int ss_y = plane->subsampling_y; | 
| -  const int row_step = 1 << ss_x; | 
| -  const int col_step = 1 << ss_y; | 
| -  const int row_step_stride = cm->mode_info_stride * row_step; | 
| struct buf_2d *const dst = &plane->dst; | 
| uint8_t* const dst0 = dst->buf; | 
| -  unsigned int mask_4x4_int[MI_BLOCK_SIZE] = {0}; | 
| -  const loop_filter_thresh *lfi[MI_BLOCK_SIZE][MI_BLOCK_SIZE]; | 
| int r, c; | 
| -  int row_shift = 3 - ss_x; | 
| -  int row_mask = 0xff >> (ss_x << 2); | 
|  | 
| -#define MASK_ROW(value) ((value >> (r_sampled << row_shift)) & row_mask) | 
| +  if (!plane->plane_type) { | 
| +    uint64_t mask_16x16 = lfm->left_y[TX_16X16]; | 
| +    uint64_t mask_8x8 = lfm->left_y[TX_8X8]; | 
| +    uint64_t mask_4x4 = lfm->left_y[TX_4X4]; | 
| +    uint64_t mask_4x4_int = lfm->int_4x4_y; | 
|  | 
| -  for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += row_step) { | 
| -    int r_sampled = r >> ss_x; | 
| +    // Vertical pass: do 2 rows at one time | 
| +    for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) { | 
| +      unsigned int mask_16x16_l = mask_16x16 & 0xffff; | 
| +      unsigned int mask_8x8_l = mask_8x8 & 0xffff; | 
| +      unsigned int mask_4x4_l = mask_4x4 & 0xffff; | 
| +      unsigned int mask_4x4_int_l = mask_4x4_int & 0xffff; | 
|  | 
| -    // Determine the vertical edges that need filtering | 
| -    for (c = 0; c < MI_BLOCK_SIZE && mi_col + c < cm->mi_cols; c += col_step) { | 
| -      const MODE_INFO *mi = mi_8x8[c]; | 
| +      // Disable filtering on the leftmost column | 
| +      filter_selectively_vert_row2(plane->plane_type, | 
| +                                   dst->buf, dst->stride, | 
| +                                   mask_16x16_l, | 
| +                                   mask_8x8_l, | 
| +                                   mask_4x4_l, | 
| +                                   mask_4x4_int_l, | 
| +                                   &cm->lf_info, &lfm->lfl_y[r << 3]); | 
|  | 
| -      build_lfi(&cm->lf_info, &mi[0].mbmi, &lfi[r][c >> ss_x]); | 
| +      dst->buf += 16 * dst->stride; | 
| +      mask_16x16 >>= 16; | 
| +      mask_8x8 >>= 16; | 
| +      mask_4x4 >>= 16; | 
| +      mask_4x4_int >>= 16; | 
| } | 
| -    if (!plane->plane_type) { | 
| -      mask_4x4_int[r] = MASK_ROW(lfm->int_4x4_y); | 
| -      // Disable filtering on the leftmost column | 
| -      filter_selectively_vert(dst->buf, dst->stride, | 
| -                              MASK_ROW(lfm->left_y[TX_16X16]), | 
| -                              MASK_ROW(lfm->left_y[TX_8X8]), | 
| -                              MASK_ROW(lfm->left_y[TX_4X4]), | 
| -                              MASK_ROW(lfm->int_4x4_y), | 
| -                              lfi[r]); | 
| -    } else { | 
| -      mask_4x4_int[r] = MASK_ROW(lfm->int_4x4_uv); | 
| -      // Disable filtering on the leftmost column | 
| -      filter_selectively_vert(dst->buf, dst->stride, | 
| -                              MASK_ROW(lfm->left_uv[TX_16X16]), | 
| -                              MASK_ROW(lfm->left_uv[TX_8X8]), | 
| -                              MASK_ROW(lfm->left_uv[TX_4X4]), | 
| -                              MASK_ROW(lfm->int_4x4_uv), | 
| -                              lfi[r]); | 
| + | 
| +    // Horizontal pass | 
| +    dst->buf = dst0; | 
| +    mask_16x16 = lfm->above_y[TX_16X16]; | 
| +    mask_8x8 = lfm->above_y[TX_8X8]; | 
| +    mask_4x4 = lfm->above_y[TX_4X4]; | 
| +    mask_4x4_int = lfm->int_4x4_y; | 
| + | 
| +    for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r++) { | 
| +      unsigned int mask_16x16_r; | 
| +      unsigned int mask_8x8_r; | 
| +      unsigned int mask_4x4_r; | 
| + | 
| +      if (mi_row + r == 0) { | 
| +        mask_16x16_r = 0; | 
| +        mask_8x8_r = 0; | 
| +        mask_4x4_r = 0; | 
| +      } else { | 
| +        mask_16x16_r = mask_16x16 & 0xff; | 
| +        mask_8x8_r = mask_8x8 & 0xff; | 
| +        mask_4x4_r = mask_4x4 & 0xff; | 
| +      } | 
| + | 
| +      filter_selectively_horiz(dst->buf, dst->stride, | 
| +                               mask_16x16_r, | 
| +                               mask_8x8_r, | 
| +                               mask_4x4_r, | 
| +                               mask_4x4_int & 0xff, | 
| +                               &cm->lf_info, &lfm->lfl_y[r << 3]); | 
| + | 
| +      dst->buf += 8 * dst->stride; | 
| +      mask_16x16 >>= 8; | 
| +      mask_8x8 >>= 8; | 
| +      mask_4x4 >>= 8; | 
| +      mask_4x4_int >>= 8; | 
| } | 
| -    dst->buf += 8 * dst->stride; | 
| -    mi_8x8 += row_step_stride; | 
| -  } | 
| +  } else { | 
| +    uint16_t mask_16x16 = lfm->left_uv[TX_16X16]; | 
| +    uint16_t mask_8x8 = lfm->left_uv[TX_8X8]; | 
| +    uint16_t mask_4x4 = lfm->left_uv[TX_4X4]; | 
| +    uint16_t mask_4x4_int = lfm->int_4x4_uv; | 
|  | 
| -  // Now do horizontal pass | 
| -  dst->buf = dst0; | 
| -  for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += row_step) { | 
| -    const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1; | 
| -    const unsigned int mask_4x4_int_r = skip_border_4x4_r ? 0 : mask_4x4_int[r]; | 
| -    int r_sampled = r >> ss_x; | 
| +    // Vertical pass: do 2 rows at one time | 
| +    for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 4) { | 
| +      if (plane->plane_type == 1) { | 
| +        for (c = 0; c < (MI_BLOCK_SIZE >> 1); c++) { | 
| +          lfm->lfl_uv[(r << 1) + c] = lfm->lfl_y[(r << 3) + (c << 1)]; | 
| +          lfm->lfl_uv[((r + 2) << 1) + c] = lfm->lfl_y[((r + 2) << 3) + | 
| +                                                       (c << 1)]; | 
| +        } | 
| +      } | 
|  | 
| -    if (!plane->plane_type) { | 
| +      { | 
| +        unsigned int mask_16x16_l = mask_16x16 & 0xff; | 
| +        unsigned int mask_8x8_l = mask_8x8 & 0xff; | 
| +        unsigned int mask_4x4_l = mask_4x4 & 0xff; | 
| +        unsigned int mask_4x4_int_l = mask_4x4_int & 0xff; | 
| + | 
| +        // Disable filtering on the leftmost column | 
| +        filter_selectively_vert_row2(plane->plane_type, | 
| +                                     dst->buf, dst->stride, | 
| +                                     mask_16x16_l, | 
| +                                     mask_8x8_l, | 
| +                                     mask_4x4_l, | 
| +                                     mask_4x4_int_l, | 
| +                                     &cm->lf_info, &lfm->lfl_uv[r << 1]); | 
| + | 
| +        dst->buf += 16 * dst->stride; | 
| +        mask_16x16 >>= 8; | 
| +        mask_8x8 >>= 8; | 
| +        mask_4x4 >>= 8; | 
| +        mask_4x4_int >>= 8; | 
| +      } | 
| +    } | 
| + | 
| +    // Horizontal pass | 
| +    dst->buf = dst0; | 
| +    mask_16x16 = lfm->above_uv[TX_16X16]; | 
| +    mask_8x8 = lfm->above_uv[TX_8X8]; | 
| +    mask_4x4 = lfm->above_uv[TX_4X4]; | 
| +    mask_4x4_int = lfm->int_4x4_uv; | 
| + | 
| +    for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) { | 
| +      const int skip_border_4x4_r = mi_row + r == cm->mi_rows - 1; | 
| +      const unsigned int mask_4x4_int_r = skip_border_4x4_r ? | 
| +          0 : (mask_4x4_int & 0xf); | 
| +      unsigned int mask_16x16_r; | 
| +      unsigned int mask_8x8_r; | 
| +      unsigned int mask_4x4_r; | 
| + | 
| +      if (mi_row + r == 0) { | 
| +        mask_16x16_r = 0; | 
| +        mask_8x8_r = 0; | 
| +        mask_4x4_r = 0; | 
| +      } else { | 
| +        mask_16x16_r = mask_16x16 & 0xf; | 
| +        mask_8x8_r = mask_8x8 & 0xf; | 
| +        mask_4x4_r = mask_4x4 & 0xf; | 
| +      } | 
| + | 
| filter_selectively_horiz(dst->buf, dst->stride, | 
| -                               MASK_ROW(lfm->above_y[TX_16X16]), | 
| -                               MASK_ROW(lfm->above_y[TX_8X8]), | 
| -                               MASK_ROW(lfm->above_y[TX_4X4]), | 
| -                               MASK_ROW(lfm->int_4x4_y), | 
| -                               mi_row + r == 0, lfi[r]); | 
| -    } else { | 
| -      filter_selectively_horiz(dst->buf, dst->stride, | 
| -                               MASK_ROW(lfm->above_uv[TX_16X16]), | 
| -                               MASK_ROW(lfm->above_uv[TX_8X8]), | 
| -                               MASK_ROW(lfm->above_uv[TX_4X4]), | 
| +                               mask_16x16_r, | 
| +                               mask_8x8_r, | 
| +                               mask_4x4_r, | 
| mask_4x4_int_r, | 
| -                               mi_row + r == 0, lfi[r]); | 
| +                               &cm->lf_info, &lfm->lfl_uv[r << 1]); | 
| + | 
| +      dst->buf += 8 * dst->stride; | 
| +      mask_16x16 >>= 4; | 
| +      mask_8x8 >>= 4; | 
| +      mask_4x4 >>= 4; | 
| +      mask_4x4_int >>= 4; | 
| } | 
| -    dst->buf += 8 * dst->stride; | 
| } | 
| -#undef MASK_ROW | 
| } | 
|  | 
| void vp9_loop_filter_rows(const YV12_BUFFER_CONFIG *frame_buffer, | 
| @@ -1017,8 +1283,7 @@ | 
| #if CONFIG_NON420 | 
| if (use_420) | 
| #endif | 
| -          filter_block_plane(cm, &xd->plane[plane], mi_8x8 + mi_col, mi_row, | 
| -                             mi_col, &lfm); | 
| +          filter_block_plane(cm, &xd->plane[plane], mi_row, &lfm); | 
| #if CONFIG_NON420 | 
| else | 
| filter_block_plane_non420(cm, &xd->plane[plane], mi_8x8 + mi_col, | 
|  |