OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 204 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
215 int block_inside_limit = lvl >> ((sharpness_lvl > 0) + (sharpness_lvl > 4)); | 215 int block_inside_limit = lvl >> ((sharpness_lvl > 0) + (sharpness_lvl > 4)); |
216 | 216 |
217 if (sharpness_lvl > 0) { | 217 if (sharpness_lvl > 0) { |
218 if (block_inside_limit > (9 - sharpness_lvl)) | 218 if (block_inside_limit > (9 - sharpness_lvl)) |
219 block_inside_limit = (9 - sharpness_lvl); | 219 block_inside_limit = (9 - sharpness_lvl); |
220 } | 220 } |
221 | 221 |
222 if (block_inside_limit < 1) | 222 if (block_inside_limit < 1) |
223 block_inside_limit = 1; | 223 block_inside_limit = 1; |
224 | 224 |
225 vpx_memset(lfi->lfthr[lvl].lim, block_inside_limit, SIMD_WIDTH); | 225 memset(lfi->lfthr[lvl].lim, block_inside_limit, SIMD_WIDTH); |
226 vpx_memset(lfi->lfthr[lvl].mblim, (2 * (lvl + 2) + block_inside_limit), | 226 memset(lfi->lfthr[lvl].mblim, (2 * (lvl + 2) + block_inside_limit), |
227 SIMD_WIDTH); | 227 SIMD_WIDTH); |
228 } | 228 } |
229 } | 229 } |
230 | 230 |
231 static uint8_t get_filter_level(const loop_filter_info_n *lfi_n, | 231 static uint8_t get_filter_level(const loop_filter_info_n *lfi_n, |
232 const MB_MODE_INFO *mbmi) { | 232 const MB_MODE_INFO *mbmi) { |
233 return lfi_n->lvl[mbmi->segment_id][mbmi->ref_frame[0]] | 233 return lfi_n->lvl[mbmi->segment_id][mbmi->ref_frame[0]] |
234 [mode_lf_lut[mbmi->mode]]; | 234 [mode_lf_lut[mbmi->mode]]; |
235 } | 235 } |
236 | 236 |
237 void vp9_loop_filter_init(VP9_COMMON *cm) { | 237 void vp9_loop_filter_init(VP9_COMMON *cm) { |
238 loop_filter_info_n *lfi = &cm->lf_info; | 238 loop_filter_info_n *lfi = &cm->lf_info; |
239 struct loopfilter *lf = &cm->lf; | 239 struct loopfilter *lf = &cm->lf; |
240 int lvl; | 240 int lvl; |
241 | 241 |
242 // init limits for given sharpness | 242 // init limits for given sharpness |
243 update_sharpness(lfi, lf->sharpness_level); | 243 update_sharpness(lfi, lf->sharpness_level); |
244 lf->last_sharpness_level = lf->sharpness_level; | 244 lf->last_sharpness_level = lf->sharpness_level; |
245 | 245 |
246 // init hev threshold const vectors | 246 // init hev threshold const vectors |
247 for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++) | 247 for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++) |
248 vpx_memset(lfi->lfthr[lvl].hev_thr, (lvl >> 4), SIMD_WIDTH); | 248 memset(lfi->lfthr[lvl].hev_thr, (lvl >> 4), SIMD_WIDTH); |
249 } | 249 } |
250 | 250 |
251 void vp9_loop_filter_frame_init(VP9_COMMON *cm, int default_filt_lvl) { | 251 void vp9_loop_filter_frame_init(VP9_COMMON *cm, int default_filt_lvl) { |
252 int seg_id; | 252 int seg_id; |
253 // n_shift is the multiplier for lf_deltas | 253 // n_shift is the multiplier for lf_deltas |
254 // the multiplier is 1 for when filter_lvl is between 0 and 31; | 254 // the multiplier is 1 for when filter_lvl is between 0 and 31; |
255 // 2 when filter_lvl is between 32 and 63 | 255 // 2 when filter_lvl is between 32 and 63 |
256 const int scale = 1 << (default_filt_lvl >> 5); | 256 const int scale = 1 << (default_filt_lvl >> 5); |
257 loop_filter_info_n *const lfi = &cm->lf_info; | 257 loop_filter_info_n *const lfi = &cm->lf_info; |
258 struct loopfilter *const lf = &cm->lf; | 258 struct loopfilter *const lf = &cm->lf; |
(...skipping 10 matching lines...) Expand all Loading... |
269 if (vp9_segfeature_active(seg, seg_id, SEG_LVL_ALT_LF)) { | 269 if (vp9_segfeature_active(seg, seg_id, SEG_LVL_ALT_LF)) { |
270 const int data = vp9_get_segdata(seg, seg_id, SEG_LVL_ALT_LF); | 270 const int data = vp9_get_segdata(seg, seg_id, SEG_LVL_ALT_LF); |
271 lvl_seg = clamp(seg->abs_delta == SEGMENT_ABSDATA ? | 271 lvl_seg = clamp(seg->abs_delta == SEGMENT_ABSDATA ? |
272 data : default_filt_lvl + data, | 272 data : default_filt_lvl + data, |
273 0, MAX_LOOP_FILTER); | 273 0, MAX_LOOP_FILTER); |
274 } | 274 } |
275 | 275 |
276 if (!lf->mode_ref_delta_enabled) { | 276 if (!lf->mode_ref_delta_enabled) { |
277 // we could get rid of this if we assume that deltas are set to | 277 // we could get rid of this if we assume that deltas are set to |
278 // zero when not in use; encoder always uses deltas | 278 // zero when not in use; encoder always uses deltas |
279 vpx_memset(lfi->lvl[seg_id], lvl_seg, sizeof(lfi->lvl[seg_id])); | 279 memset(lfi->lvl[seg_id], lvl_seg, sizeof(lfi->lvl[seg_id])); |
280 } else { | 280 } else { |
281 int ref, mode; | 281 int ref, mode; |
282 const int intra_lvl = lvl_seg + lf->ref_deltas[INTRA_FRAME] * scale; | 282 const int intra_lvl = lvl_seg + lf->ref_deltas[INTRA_FRAME] * scale; |
283 lfi->lvl[seg_id][INTRA_FRAME][0] = clamp(intra_lvl, 0, MAX_LOOP_FILTER); | 283 lfi->lvl[seg_id][INTRA_FRAME][0] = clamp(intra_lvl, 0, MAX_LOOP_FILTER); |
284 | 284 |
285 for (ref = LAST_FRAME; ref < MAX_REF_FRAMES; ++ref) { | 285 for (ref = LAST_FRAME; ref < MAX_REF_FRAMES; ++ref) { |
286 for (mode = 0; mode < MAX_MODE_LF_DELTAS; ++mode) { | 286 for (mode = 0; mode < MAX_MODE_LF_DELTAS; ++mode) { |
287 const int inter_lvl = lvl_seg + lf->ref_deltas[ref] * scale | 287 const int inter_lvl = lvl_seg + lf->ref_deltas[ref] * scale |
288 + lf->mode_deltas[mode] * scale; | 288 + lf->mode_deltas[mode] * scale; |
289 lfi->lvl[seg_id][ref][mode] = clamp(inter_lvl, 0, MAX_LOOP_FILTER); | 289 lfi->lvl[seg_id][ref][mode] = clamp(inter_lvl, 0, MAX_LOOP_FILTER); |
290 } | 290 } |
291 } | 291 } |
292 } | 292 } |
293 } | 293 } |
294 } | 294 } |
295 | 295 |
296 static void filter_selectively_vert_row2(PLANE_TYPE plane_type, | 296 static void filter_selectively_vert_row2(int subsampling_factor, |
297 uint8_t *s, int pitch, | 297 uint8_t *s, int pitch, |
298 unsigned int mask_16x16_l, | 298 unsigned int mask_16x16_l, |
299 unsigned int mask_8x8_l, | 299 unsigned int mask_8x8_l, |
300 unsigned int mask_4x4_l, | 300 unsigned int mask_4x4_l, |
301 unsigned int mask_4x4_int_l, | 301 unsigned int mask_4x4_int_l, |
302 const loop_filter_info_n *lfi_n, | 302 const loop_filter_info_n *lfi_n, |
303 const uint8_t *lfl) { | 303 const uint8_t *lfl) { |
304 const int mask_shift = plane_type ? 4 : 8; | 304 const int mask_shift = subsampling_factor ? 4 : 8; |
305 const int mask_cutoff = plane_type ? 0xf : 0xff; | 305 const int mask_cutoff = subsampling_factor ? 0xf : 0xff; |
306 const int lfl_forward = plane_type ? 4 : 8; | 306 const int lfl_forward = subsampling_factor ? 4 : 8; |
307 | 307 |
308 unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff; | 308 unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff; |
309 unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff; | 309 unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff; |
310 unsigned int mask_4x4_0 = mask_4x4_l & mask_cutoff; | 310 unsigned int mask_4x4_0 = mask_4x4_l & mask_cutoff; |
311 unsigned int mask_4x4_int_0 = mask_4x4_int_l & mask_cutoff; | 311 unsigned int mask_4x4_int_0 = mask_4x4_int_l & mask_cutoff; |
312 unsigned int mask_16x16_1 = (mask_16x16_l >> mask_shift) & mask_cutoff; | 312 unsigned int mask_16x16_1 = (mask_16x16_l >> mask_shift) & mask_cutoff; |
313 unsigned int mask_8x8_1 = (mask_8x8_l >> mask_shift) & mask_cutoff; | 313 unsigned int mask_8x8_1 = (mask_8x8_l >> mask_shift) & mask_cutoff; |
314 unsigned int mask_4x4_1 = (mask_4x4_l >> mask_shift) & mask_cutoff; | 314 unsigned int mask_4x4_1 = (mask_4x4_l >> mask_shift) & mask_cutoff; |
315 unsigned int mask_4x4_int_1 = (mask_4x4_int_l >> mask_shift) & mask_cutoff; | 315 unsigned int mask_4x4_int_1 = (mask_4x4_int_l >> mask_shift) & mask_cutoff; |
316 unsigned int mask; | 316 unsigned int mask; |
(...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
386 mask_4x4_0 >>= 1; | 386 mask_4x4_0 >>= 1; |
387 mask_4x4_int_0 >>= 1; | 387 mask_4x4_int_0 >>= 1; |
388 mask_16x16_1 >>= 1; | 388 mask_16x16_1 >>= 1; |
389 mask_8x8_1 >>= 1; | 389 mask_8x8_1 >>= 1; |
390 mask_4x4_1 >>= 1; | 390 mask_4x4_1 >>= 1; |
391 mask_4x4_int_1 >>= 1; | 391 mask_4x4_int_1 >>= 1; |
392 } | 392 } |
393 } | 393 } |
394 | 394 |
395 #if CONFIG_VP9_HIGHBITDEPTH | 395 #if CONFIG_VP9_HIGHBITDEPTH |
396 static void highbd_filter_selectively_vert_row2(PLANE_TYPE plane_type, | 396 static void highbd_filter_selectively_vert_row2(int subsampling_factor, |
397 uint16_t *s, int pitch, | 397 uint16_t *s, int pitch, |
398 unsigned int mask_16x16_l, | 398 unsigned int mask_16x16_l, |
399 unsigned int mask_8x8_l, | 399 unsigned int mask_8x8_l, |
400 unsigned int mask_4x4_l, | 400 unsigned int mask_4x4_l, |
401 unsigned int mask_4x4_int_l, | 401 unsigned int mask_4x4_int_l, |
402 const loop_filter_info_n *lfi_n, | 402 const loop_filter_info_n *lfi_n, |
403 const uint8_t *lfl, int bd) { | 403 const uint8_t *lfl, int bd) { |
404 const int mask_shift = plane_type ? 4 : 8; | 404 const int mask_shift = subsampling_factor ? 4 : 8; |
405 const int mask_cutoff = plane_type ? 0xf : 0xff; | 405 const int mask_cutoff = subsampling_factor ? 0xf : 0xff; |
406 const int lfl_forward = plane_type ? 4 : 8; | 406 const int lfl_forward = subsampling_factor ? 4 : 8; |
407 | 407 |
408 unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff; | 408 unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff; |
409 unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff; | 409 unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff; |
410 unsigned int mask_4x4_0 = mask_4x4_l & mask_cutoff; | 410 unsigned int mask_4x4_0 = mask_4x4_l & mask_cutoff; |
411 unsigned int mask_4x4_int_0 = mask_4x4_int_l & mask_cutoff; | 411 unsigned int mask_4x4_int_0 = mask_4x4_int_l & mask_cutoff; |
412 unsigned int mask_16x16_1 = (mask_16x16_l >> mask_shift) & mask_cutoff; | 412 unsigned int mask_16x16_1 = (mask_16x16_l >> mask_shift) & mask_cutoff; |
413 unsigned int mask_8x8_1 = (mask_8x8_l >> mask_shift) & mask_cutoff; | 413 unsigned int mask_8x8_1 = (mask_8x8_l >> mask_shift) & mask_cutoff; |
414 unsigned int mask_4x4_1 = (mask_4x4_l >> mask_shift) & mask_cutoff; | 414 unsigned int mask_4x4_1 = (mask_4x4_l >> mask_shift) & mask_cutoff; |
415 unsigned int mask_4x4_int_1 = (mask_4x4_int_l >> mask_shift) & mask_cutoff; | 415 unsigned int mask_4x4_int_1 = (mask_4x4_int_l >> mask_shift) & mask_cutoff; |
416 unsigned int mask; | 416 unsigned int mask; |
(...skipping 303 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
720 int i; | 720 int i; |
721 | 721 |
722 // If filter level is 0 we don't loop filter. | 722 // If filter level is 0 we don't loop filter. |
723 if (!filter_level) { | 723 if (!filter_level) { |
724 return; | 724 return; |
725 } else { | 725 } else { |
726 const int w = num_8x8_blocks_wide_lookup[block_size]; | 726 const int w = num_8x8_blocks_wide_lookup[block_size]; |
727 const int h = num_8x8_blocks_high_lookup[block_size]; | 727 const int h = num_8x8_blocks_high_lookup[block_size]; |
728 int index = shift_y; | 728 int index = shift_y; |
729 for (i = 0; i < h; i++) { | 729 for (i = 0; i < h; i++) { |
730 vpx_memset(&lfm->lfl_y[index], filter_level, w); | 730 memset(&lfm->lfl_y[index], filter_level, w); |
731 index += 8; | 731 index += 8; |
732 } | 732 } |
733 } | 733 } |
734 | 734 |
735 // These set 1 in the current block size for the block size edges. | 735 // These set 1 in the current block size for the block size edges. |
736 // For instance if the block size is 32x16, we'll set: | 736 // For instance if the block size is 32x16, we'll set: |
737 // above = 1111 | 737 // above = 1111 |
738 // 0000 | 738 // 0000 |
739 // and | 739 // and |
740 // left = 1000 | 740 // left = 1000 |
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
794 uint64_t *const int_4x4_y = &lfm->int_4x4_y; | 794 uint64_t *const int_4x4_y = &lfm->int_4x4_y; |
795 int i; | 795 int i; |
796 | 796 |
797 if (!filter_level) { | 797 if (!filter_level) { |
798 return; | 798 return; |
799 } else { | 799 } else { |
800 const int w = num_8x8_blocks_wide_lookup[block_size]; | 800 const int w = num_8x8_blocks_wide_lookup[block_size]; |
801 const int h = num_8x8_blocks_high_lookup[block_size]; | 801 const int h = num_8x8_blocks_high_lookup[block_size]; |
802 int index = shift_y; | 802 int index = shift_y; |
803 for (i = 0; i < h; i++) { | 803 for (i = 0; i < h; i++) { |
804 vpx_memset(&lfm->lfl_y[index], filter_level, w); | 804 memset(&lfm->lfl_y[index], filter_level, w); |
805 index += 8; | 805 index += 8; |
806 } | 806 } |
807 } | 807 } |
808 | 808 |
809 *above_y |= above_prediction_mask[block_size] << shift_y; | 809 *above_y |= above_prediction_mask[block_size] << shift_y; |
810 *left_y |= left_prediction_mask[block_size] << shift_y; | 810 *left_y |= left_prediction_mask[block_size] << shift_y; |
811 | 811 |
812 if (mbmi->skip && is_inter_block(mbmi)) | 812 if (mbmi->skip && is_inter_block(mbmi)) |
813 return; | 813 return; |
814 | 814 |
815 *above_y |= (size_mask[block_size] & | 815 *above_y |= (size_mask[block_size] & |
816 above_64x64_txform_mask[tx_size_y]) << shift_y; | 816 above_64x64_txform_mask[tx_size_y]) << shift_y; |
817 | 817 |
818 *left_y |= (size_mask[block_size] & | 818 *left_y |= (size_mask[block_size] & |
819 left_64x64_txform_mask[tx_size_y]) << shift_y; | 819 left_64x64_txform_mask[tx_size_y]) << shift_y; |
820 | 820 |
821 if (tx_size_y == TX_4X4) | 821 if (tx_size_y == TX_4X4) |
822 *int_4x4_y |= (size_mask[block_size] & 0xffffffffffffffffULL) << shift_y; | 822 *int_4x4_y |= (size_mask[block_size] & 0xffffffffffffffffULL) << shift_y; |
823 } | 823 } |
824 | 824 |
825 // This function sets up the bit masks for the entire 64x64 region represented | 825 // This function sets up the bit masks for the entire 64x64 region represented |
826 // by mi_row, mi_col. | 826 // by mi_row, mi_col. |
827 // TODO(JBB): This function only works for yv12. | 827 // TODO(JBB): This function only works for yv12. |
828 void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col, | 828 void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col, |
829 MODE_INFO *mi, const int mode_info_stride, | 829 MODE_INFO **mi, const int mode_info_stride, |
830 LOOP_FILTER_MASK *lfm) { | 830 LOOP_FILTER_MASK *lfm) { |
831 int idx_32, idx_16, idx_8; | 831 int idx_32, idx_16, idx_8; |
832 const loop_filter_info_n *const lfi_n = &cm->lf_info; | 832 const loop_filter_info_n *const lfi_n = &cm->lf_info; |
833 MODE_INFO *mip = mi; | 833 MODE_INFO **mip = mi; |
834 MODE_INFO *mip2 = mi; | 834 MODE_INFO **mip2 = mi; |
835 | 835 |
836 // These are offsets to the next mi in the 64x64 block. It is what gets | 836 // These are offsets to the next mi in the 64x64 block. It is what gets |
837 // added to the mi ptr as we go through each loop. It helps us to avoid | 837 // added to the mi ptr as we go through each loop. It helps us to avoid |
838 // setting up special row and column counters for each index. The last step | 838 // setting up special row and column counters for each index. The last step |
839 // brings us out back to the starting position. | 839 // brings us out back to the starting position. |
840 const int offset_32[] = {4, (mode_info_stride << 2) - 4, 4, | 840 const int offset_32[] = {4, (mode_info_stride << 2) - 4, 4, |
841 -(mode_info_stride << 2) - 4}; | 841 -(mode_info_stride << 2) - 4}; |
842 const int offset_16[] = {2, (mode_info_stride << 1) - 2, 2, | 842 const int offset_16[] = {2, (mode_info_stride << 1) - 2, 2, |
843 -(mode_info_stride << 1) - 2}; | 843 -(mode_info_stride << 1) - 2}; |
844 const int offset[] = {1, mode_info_stride - 1, 1, -mode_info_stride - 1}; | 844 const int offset[] = {1, mode_info_stride - 1, 1, -mode_info_stride - 1}; |
845 | 845 |
846 // Following variables represent shifts to position the current block | 846 // Following variables represent shifts to position the current block |
847 // mask over the appropriate block. A shift of 36 to the left will move | 847 // mask over the appropriate block. A shift of 36 to the left will move |
848 // the bits for the final 32 by 32 block in the 64x64 up 4 rows and left | 848 // the bits for the final 32 by 32 block in the 64x64 up 4 rows and left |
849 // 4 rows to the appropriate spot. | 849 // 4 rows to the appropriate spot. |
850 const int shift_32_y[] = {0, 4, 32, 36}; | 850 const int shift_32_y[] = {0, 4, 32, 36}; |
851 const int shift_16_y[] = {0, 2, 16, 18}; | 851 const int shift_16_y[] = {0, 2, 16, 18}; |
852 const int shift_8_y[] = {0, 1, 8, 9}; | 852 const int shift_8_y[] = {0, 1, 8, 9}; |
853 const int shift_32_uv[] = {0, 2, 8, 10}; | 853 const int shift_32_uv[] = {0, 2, 8, 10}; |
854 const int shift_16_uv[] = {0, 1, 4, 5}; | 854 const int shift_16_uv[] = {0, 1, 4, 5}; |
855 int i; | 855 int i; |
856 const int max_rows = (mi_row + MI_BLOCK_SIZE > cm->mi_rows ? | 856 const int max_rows = (mi_row + MI_BLOCK_SIZE > cm->mi_rows ? |
857 cm->mi_rows - mi_row : MI_BLOCK_SIZE); | 857 cm->mi_rows - mi_row : MI_BLOCK_SIZE); |
858 const int max_cols = (mi_col + MI_BLOCK_SIZE > cm->mi_cols ? | 858 const int max_cols = (mi_col + MI_BLOCK_SIZE > cm->mi_cols ? |
859 cm->mi_cols - mi_col : MI_BLOCK_SIZE); | 859 cm->mi_cols - mi_col : MI_BLOCK_SIZE); |
860 | 860 |
861 vp9_zero(*lfm); | 861 vp9_zero(*lfm); |
862 assert(mip != NULL); | 862 assert(mip[0] != NULL); |
863 | 863 |
864 // TODO(jimbankoski): Try moving most of the following code into decode | 864 // TODO(jimbankoski): Try moving most of the following code into decode |
865 // loop and storing lfm in the mbmi structure so that we don't have to go | 865 // loop and storing lfm in the mbmi structure so that we don't have to go |
866 // through the recursive loop structure multiple times. | 866 // through the recursive loop structure multiple times. |
867 switch (mip->mbmi.sb_type) { | 867 switch (mip[0]->mbmi.sb_type) { |
868 case BLOCK_64X64: | 868 case BLOCK_64X64: |
869 build_masks(lfi_n, mip , 0, 0, lfm); | 869 build_masks(lfi_n, mip[0] , 0, 0, lfm); |
870 break; | 870 break; |
871 case BLOCK_64X32: | 871 case BLOCK_64X32: |
872 build_masks(lfi_n, mip, 0, 0, lfm); | 872 build_masks(lfi_n, mip[0], 0, 0, lfm); |
873 mip2 = mip + mode_info_stride * 4; | 873 mip2 = mip + mode_info_stride * 4; |
874 if (4 >= max_rows) | 874 if (4 >= max_rows) |
875 break; | 875 break; |
876 build_masks(lfi_n, mip2, 32, 8, lfm); | 876 build_masks(lfi_n, mip2[0], 32, 8, lfm); |
877 break; | 877 break; |
878 case BLOCK_32X64: | 878 case BLOCK_32X64: |
879 build_masks(lfi_n, mip, 0, 0, lfm); | 879 build_masks(lfi_n, mip[0], 0, 0, lfm); |
880 mip2 = mip + 4; | 880 mip2 = mip + 4; |
881 if (4 >= max_cols) | 881 if (4 >= max_cols) |
882 break; | 882 break; |
883 build_masks(lfi_n, mip2, 4, 2, lfm); | 883 build_masks(lfi_n, mip2[0], 4, 2, lfm); |
884 break; | 884 break; |
885 default: | 885 default: |
886 for (idx_32 = 0; idx_32 < 4; mip += offset_32[idx_32], ++idx_32) { | 886 for (idx_32 = 0; idx_32 < 4; mip += offset_32[idx_32], ++idx_32) { |
887 const int shift_y = shift_32_y[idx_32]; | 887 const int shift_y = shift_32_y[idx_32]; |
888 const int shift_uv = shift_32_uv[idx_32]; | 888 const int shift_uv = shift_32_uv[idx_32]; |
889 const int mi_32_col_offset = ((idx_32 & 1) << 2); | 889 const int mi_32_col_offset = ((idx_32 & 1) << 2); |
890 const int mi_32_row_offset = ((idx_32 >> 1) << 2); | 890 const int mi_32_row_offset = ((idx_32 >> 1) << 2); |
891 if (mi_32_col_offset >= max_cols || mi_32_row_offset >= max_rows) | 891 if (mi_32_col_offset >= max_cols || mi_32_row_offset >= max_rows) |
892 continue; | 892 continue; |
893 switch (mip->mbmi.sb_type) { | 893 switch (mip[0]->mbmi.sb_type) { |
894 case BLOCK_32X32: | 894 case BLOCK_32X32: |
895 build_masks(lfi_n, mip, shift_y, shift_uv, lfm); | 895 build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); |
896 break; | 896 break; |
897 case BLOCK_32X16: | 897 case BLOCK_32X16: |
898 build_masks(lfi_n, mip, shift_y, shift_uv, lfm); | 898 build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); |
899 if (mi_32_row_offset + 2 >= max_rows) | 899 if (mi_32_row_offset + 2 >= max_rows) |
900 continue; | 900 continue; |
901 mip2 = mip + mode_info_stride * 2; | 901 mip2 = mip + mode_info_stride * 2; |
902 build_masks(lfi_n, mip2, shift_y + 16, shift_uv + 4, lfm); | 902 build_masks(lfi_n, mip2[0], shift_y + 16, shift_uv + 4, lfm); |
903 break; | 903 break; |
904 case BLOCK_16X32: | 904 case BLOCK_16X32: |
905 build_masks(lfi_n, mip, shift_y, shift_uv, lfm); | 905 build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); |
906 if (mi_32_col_offset + 2 >= max_cols) | 906 if (mi_32_col_offset + 2 >= max_cols) |
907 continue; | 907 continue; |
908 mip2 = mip + 2; | 908 mip2 = mip + 2; |
909 build_masks(lfi_n, mip2, shift_y + 2, shift_uv + 1, lfm); | 909 build_masks(lfi_n, mip2[0], shift_y + 2, shift_uv + 1, lfm); |
910 break; | 910 break; |
911 default: | 911 default: |
912 for (idx_16 = 0; idx_16 < 4; mip += offset_16[idx_16], ++idx_16) { | 912 for (idx_16 = 0; idx_16 < 4; mip += offset_16[idx_16], ++idx_16) { |
913 const int shift_y = shift_32_y[idx_32] + shift_16_y[idx_16]; | 913 const int shift_y = shift_32_y[idx_32] + shift_16_y[idx_16]; |
914 const int shift_uv = shift_32_uv[idx_32] + shift_16_uv[idx_16]; | 914 const int shift_uv = shift_32_uv[idx_32] + shift_16_uv[idx_16]; |
915 const int mi_16_col_offset = mi_32_col_offset + | 915 const int mi_16_col_offset = mi_32_col_offset + |
916 ((idx_16 & 1) << 1); | 916 ((idx_16 & 1) << 1); |
917 const int mi_16_row_offset = mi_32_row_offset + | 917 const int mi_16_row_offset = mi_32_row_offset + |
918 ((idx_16 >> 1) << 1); | 918 ((idx_16 >> 1) << 1); |
919 | 919 |
920 if (mi_16_col_offset >= max_cols || mi_16_row_offset >= max_rows) | 920 if (mi_16_col_offset >= max_cols || mi_16_row_offset >= max_rows) |
921 continue; | 921 continue; |
922 | 922 |
923 switch (mip->mbmi.sb_type) { | 923 switch (mip[0]->mbmi.sb_type) { |
924 case BLOCK_16X16: | 924 case BLOCK_16X16: |
925 build_masks(lfi_n, mip, shift_y, shift_uv, lfm); | 925 build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); |
926 break; | 926 break; |
927 case BLOCK_16X8: | 927 case BLOCK_16X8: |
928 build_masks(lfi_n, mip, shift_y, shift_uv, lfm); | 928 build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); |
929 if (mi_16_row_offset + 1 >= max_rows) | 929 if (mi_16_row_offset + 1 >= max_rows) |
930 continue; | 930 continue; |
931 mip2 = mip + mode_info_stride; | 931 mip2 = mip + mode_info_stride; |
932 build_y_mask(lfi_n, mip2, shift_y+8, lfm); | 932 build_y_mask(lfi_n, mip2[0], shift_y+8, lfm); |
933 break; | 933 break; |
934 case BLOCK_8X16: | 934 case BLOCK_8X16: |
935 build_masks(lfi_n, mip, shift_y, shift_uv, lfm); | 935 build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); |
936 if (mi_16_col_offset +1 >= max_cols) | 936 if (mi_16_col_offset +1 >= max_cols) |
937 continue; | 937 continue; |
938 mip2 = mip + 1; | 938 mip2 = mip + 1; |
939 build_y_mask(lfi_n, mip2, shift_y+1, lfm); | 939 build_y_mask(lfi_n, mip2[0], shift_y+1, lfm); |
940 break; | 940 break; |
941 default: { | 941 default: { |
942 const int shift_y = shift_32_y[idx_32] + | 942 const int shift_y = shift_32_y[idx_32] + |
943 shift_16_y[idx_16] + | 943 shift_16_y[idx_16] + |
944 shift_8_y[0]; | 944 shift_8_y[0]; |
945 build_masks(lfi_n, mip, shift_y, shift_uv, lfm); | 945 build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); |
946 mip += offset[0]; | 946 mip += offset[0]; |
947 for (idx_8 = 1; idx_8 < 4; mip += offset[idx_8], ++idx_8) { | 947 for (idx_8 = 1; idx_8 < 4; mip += offset[idx_8], ++idx_8) { |
948 const int shift_y = shift_32_y[idx_32] + | 948 const int shift_y = shift_32_y[idx_32] + |
949 shift_16_y[idx_16] + | 949 shift_16_y[idx_16] + |
950 shift_8_y[idx_8]; | 950 shift_8_y[idx_8]; |
951 const int mi_8_col_offset = mi_16_col_offset + | 951 const int mi_8_col_offset = mi_16_col_offset + |
952 ((idx_8 & 1)); | 952 ((idx_8 & 1)); |
953 const int mi_8_row_offset = mi_16_row_offset + | 953 const int mi_8_row_offset = mi_16_row_offset + |
954 ((idx_8 >> 1)); | 954 ((idx_8 >> 1)); |
955 | 955 |
956 if (mi_8_col_offset >= max_cols || | 956 if (mi_8_col_offset >= max_cols || |
957 mi_8_row_offset >= max_rows) | 957 mi_8_row_offset >= max_rows) |
958 continue; | 958 continue; |
959 build_y_mask(lfi_n, mip, shift_y, lfm); | 959 build_y_mask(lfi_n, mip[0], shift_y, lfm); |
960 } | 960 } |
961 break; | 961 break; |
962 } | 962 } |
963 } | 963 } |
964 } | 964 } |
965 break; | 965 break; |
966 } | 966 } |
967 } | 967 } |
968 break; | 968 break; |
969 } | 969 } |
(...skipping 174 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1144 mask_16x16 >>= 1; | 1144 mask_16x16 >>= 1; |
1145 mask_8x8 >>= 1; | 1145 mask_8x8 >>= 1; |
1146 mask_4x4 >>= 1; | 1146 mask_4x4 >>= 1; |
1147 mask_4x4_int >>= 1; | 1147 mask_4x4_int >>= 1; |
1148 } | 1148 } |
1149 } | 1149 } |
1150 #endif // CONFIG_VP9_HIGHBITDEPTH | 1150 #endif // CONFIG_VP9_HIGHBITDEPTH |
1151 | 1151 |
1152 void vp9_filter_block_plane_non420(VP9_COMMON *cm, | 1152 void vp9_filter_block_plane_non420(VP9_COMMON *cm, |
1153 struct macroblockd_plane *plane, | 1153 struct macroblockd_plane *plane, |
1154 MODE_INFO *mi_8x8, | 1154 MODE_INFO **mi_8x8, |
1155 int mi_row, int mi_col) { | 1155 int mi_row, int mi_col) { |
1156 const int ss_x = plane->subsampling_x; | 1156 const int ss_x = plane->subsampling_x; |
1157 const int ss_y = plane->subsampling_y; | 1157 const int ss_y = plane->subsampling_y; |
1158 const int row_step = 1 << ss_y; | 1158 const int row_step = 1 << ss_y; |
1159 const int col_step = 1 << ss_x; | 1159 const int col_step = 1 << ss_x; |
1160 const int row_step_stride = cm->mi_stride * row_step; | 1160 const int row_step_stride = cm->mi_stride * row_step; |
1161 struct buf_2d *const dst = &plane->dst; | 1161 struct buf_2d *const dst = &plane->dst; |
1162 uint8_t* const dst0 = dst->buf; | 1162 uint8_t* const dst0 = dst->buf; |
1163 unsigned int mask_16x16[MI_BLOCK_SIZE] = {0}; | 1163 unsigned int mask_16x16[MI_BLOCK_SIZE] = {0}; |
1164 unsigned int mask_8x8[MI_BLOCK_SIZE] = {0}; | 1164 unsigned int mask_8x8[MI_BLOCK_SIZE] = {0}; |
1165 unsigned int mask_4x4[MI_BLOCK_SIZE] = {0}; | 1165 unsigned int mask_4x4[MI_BLOCK_SIZE] = {0}; |
1166 unsigned int mask_4x4_int[MI_BLOCK_SIZE] = {0}; | 1166 unsigned int mask_4x4_int[MI_BLOCK_SIZE] = {0}; |
1167 uint8_t lfl[MI_BLOCK_SIZE * MI_BLOCK_SIZE]; | 1167 uint8_t lfl[MI_BLOCK_SIZE * MI_BLOCK_SIZE]; |
1168 int r, c; | 1168 int r, c; |
1169 | 1169 |
1170 for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += row_step) { | 1170 for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += row_step) { |
1171 unsigned int mask_16x16_c = 0; | 1171 unsigned int mask_16x16_c = 0; |
1172 unsigned int mask_8x8_c = 0; | 1172 unsigned int mask_8x8_c = 0; |
1173 unsigned int mask_4x4_c = 0; | 1173 unsigned int mask_4x4_c = 0; |
1174 unsigned int border_mask; | 1174 unsigned int border_mask; |
1175 | 1175 |
1176 // Determine the vertical edges that need filtering | 1176 // Determine the vertical edges that need filtering |
1177 for (c = 0; c < MI_BLOCK_SIZE && mi_col + c < cm->mi_cols; c += col_step) { | 1177 for (c = 0; c < MI_BLOCK_SIZE && mi_col + c < cm->mi_cols; c += col_step) { |
1178 const MODE_INFO *mi = mi_8x8[c].src_mi; | 1178 const MODE_INFO *mi = mi_8x8[c]; |
1179 const BLOCK_SIZE sb_type = mi[0].mbmi.sb_type; | 1179 const BLOCK_SIZE sb_type = mi[0].mbmi.sb_type; |
1180 const int skip_this = mi[0].mbmi.skip && is_inter_block(&mi[0].mbmi); | 1180 const int skip_this = mi[0].mbmi.skip && is_inter_block(&mi[0].mbmi); |
1181 // left edge of current unit is block/partition edge -> no skip | 1181 // left edge of current unit is block/partition edge -> no skip |
1182 const int block_edge_left = (num_4x4_blocks_wide_lookup[sb_type] > 1) ? | 1182 const int block_edge_left = (num_4x4_blocks_wide_lookup[sb_type] > 1) ? |
1183 !(c & (num_8x8_blocks_wide_lookup[sb_type] - 1)) : 1; | 1183 !(c & (num_8x8_blocks_wide_lookup[sb_type] - 1)) : 1; |
1184 const int skip_this_c = skip_this && !block_edge_left; | 1184 const int skip_this_c = skip_this && !block_edge_left; |
1185 // top edge of current unit is block/partition edge -> no skip | 1185 // top edge of current unit is block/partition edge -> no skip |
1186 const int block_edge_above = (num_4x4_blocks_high_lookup[sb_type] > 1) ? | 1186 const int block_edge_above = (num_4x4_blocks_high_lookup[sb_type] > 1) ? |
1187 !(r & (num_8x8_blocks_high_lookup[sb_type] - 1)) : 1; | 1187 !(r & (num_8x8_blocks_high_lookup[sb_type] - 1)) : 1; |
1188 const int skip_this_r = skip_this && !block_edge_above; | 1188 const int skip_this_r = skip_this && !block_edge_above; |
(...skipping 130 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1319 mask_16x16_r, | 1319 mask_16x16_r, |
1320 mask_8x8_r, | 1320 mask_8x8_r, |
1321 mask_4x4_r, | 1321 mask_4x4_r, |
1322 mask_4x4_int_r, | 1322 mask_4x4_int_r, |
1323 &cm->lf_info, &lfl[r << 3]); | 1323 &cm->lf_info, &lfl[r << 3]); |
1324 #endif // CONFIG_VP9_HIGHBITDEPTH | 1324 #endif // CONFIG_VP9_HIGHBITDEPTH |
1325 dst->buf += 8 * dst->stride; | 1325 dst->buf += 8 * dst->stride; |
1326 } | 1326 } |
1327 } | 1327 } |
1328 | 1328 |
1329 void vp9_filter_block_plane(VP9_COMMON *const cm, | 1329 void vp9_filter_block_plane_ss00(VP9_COMMON *const cm, |
1330 struct macroblockd_plane *const plane, | 1330 struct macroblockd_plane *const plane, |
1331 int mi_row, | 1331 int mi_row, |
1332 LOOP_FILTER_MASK *lfm) { | 1332 LOOP_FILTER_MASK *lfm) { |
1333 struct buf_2d *const dst = &plane->dst; | 1333 struct buf_2d *const dst = &plane->dst; |
1334 uint8_t* const dst0 = dst->buf; | 1334 uint8_t *const dst0 = dst->buf; |
| 1335 int r; |
| 1336 uint64_t mask_16x16 = lfm->left_y[TX_16X16]; |
| 1337 uint64_t mask_8x8 = lfm->left_y[TX_8X8]; |
| 1338 uint64_t mask_4x4 = lfm->left_y[TX_4X4]; |
| 1339 uint64_t mask_4x4_int = lfm->int_4x4_y; |
| 1340 |
| 1341 assert(plane->subsampling_x == 0 && plane->subsampling_y == 0); |
| 1342 |
| 1343 // Vertical pass: do 2 rows at one time |
| 1344 for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) { |
| 1345 unsigned int mask_16x16_l = mask_16x16 & 0xffff; |
| 1346 unsigned int mask_8x8_l = mask_8x8 & 0xffff; |
| 1347 unsigned int mask_4x4_l = mask_4x4 & 0xffff; |
| 1348 unsigned int mask_4x4_int_l = mask_4x4_int & 0xffff; |
| 1349 |
| 1350 // Disable filtering on the leftmost column. |
| 1351 #if CONFIG_VP9_HIGHBITDEPTH |
| 1352 if (cm->use_highbitdepth) { |
| 1353 highbd_filter_selectively_vert_row2( |
| 1354 plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride, |
| 1355 mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info, |
| 1356 &lfm->lfl_y[r << 3], (int)cm->bit_depth); |
| 1357 } else { |
| 1358 filter_selectively_vert_row2( |
| 1359 plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l, |
| 1360 mask_4x4_l, mask_4x4_int_l, &cm->lf_info, &lfm->lfl_y[r << 3]); |
| 1361 } |
| 1362 #else |
| 1363 filter_selectively_vert_row2( |
| 1364 plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l, |
| 1365 mask_4x4_l, mask_4x4_int_l, &cm->lf_info, &lfm->lfl_y[r << 3]); |
| 1366 #endif // CONFIG_VP9_HIGHBITDEPTH |
| 1367 dst->buf += 16 * dst->stride; |
| 1368 mask_16x16 >>= 16; |
| 1369 mask_8x8 >>= 16; |
| 1370 mask_4x4 >>= 16; |
| 1371 mask_4x4_int >>= 16; |
| 1372 } |
| 1373 |
| 1374 // Horizontal pass |
| 1375 dst->buf = dst0; |
| 1376 mask_16x16 = lfm->above_y[TX_16X16]; |
| 1377 mask_8x8 = lfm->above_y[TX_8X8]; |
| 1378 mask_4x4 = lfm->above_y[TX_4X4]; |
| 1379 mask_4x4_int = lfm->int_4x4_y; |
| 1380 |
| 1381 for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r++) { |
| 1382 unsigned int mask_16x16_r; |
| 1383 unsigned int mask_8x8_r; |
| 1384 unsigned int mask_4x4_r; |
| 1385 |
| 1386 if (mi_row + r == 0) { |
| 1387 mask_16x16_r = 0; |
| 1388 mask_8x8_r = 0; |
| 1389 mask_4x4_r = 0; |
| 1390 } else { |
| 1391 mask_16x16_r = mask_16x16 & 0xff; |
| 1392 mask_8x8_r = mask_8x8 & 0xff; |
| 1393 mask_4x4_r = mask_4x4 & 0xff; |
| 1394 } |
| 1395 |
| 1396 #if CONFIG_VP9_HIGHBITDEPTH |
| 1397 if (cm->use_highbitdepth) { |
| 1398 highbd_filter_selectively_horiz( |
| 1399 CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_r, mask_8x8_r, |
| 1400 mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info, &lfm->lfl_y[r << 3], |
| 1401 (int)cm->bit_depth); |
| 1402 } else { |
| 1403 filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, |
| 1404 mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info, |
| 1405 &lfm->lfl_y[r << 3]); |
| 1406 } |
| 1407 #else |
| 1408 filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, |
| 1409 mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info, |
| 1410 &lfm->lfl_y[r << 3]); |
| 1411 #endif // CONFIG_VP9_HIGHBITDEPTH |
| 1412 |
| 1413 dst->buf += 8 * dst->stride; |
| 1414 mask_16x16 >>= 8; |
| 1415 mask_8x8 >>= 8; |
| 1416 mask_4x4 >>= 8; |
| 1417 mask_4x4_int >>= 8; |
| 1418 } |
| 1419 } |
| 1420 |
| 1421 void vp9_filter_block_plane_ss11(VP9_COMMON *const cm, |
| 1422 struct macroblockd_plane *const plane, |
| 1423 int mi_row, |
| 1424 LOOP_FILTER_MASK *lfm) { |
| 1425 struct buf_2d *const dst = &plane->dst; |
| 1426 uint8_t *const dst0 = dst->buf; |
1335 int r, c; | 1427 int r, c; |
1336 | 1428 |
1337 if (!plane->plane_type) { | 1429 uint16_t mask_16x16 = lfm->left_uv[TX_16X16]; |
1338 uint64_t mask_16x16 = lfm->left_y[TX_16X16]; | 1430 uint16_t mask_8x8 = lfm->left_uv[TX_8X8]; |
1339 uint64_t mask_8x8 = lfm->left_y[TX_8X8]; | 1431 uint16_t mask_4x4 = lfm->left_uv[TX_4X4]; |
1340 uint64_t mask_4x4 = lfm->left_y[TX_4X4]; | 1432 uint16_t mask_4x4_int = lfm->int_4x4_uv; |
1341 uint64_t mask_4x4_int = lfm->int_4x4_y; | 1433 |
1342 | 1434 assert(plane->subsampling_x == 1 && plane->subsampling_y == 1); |
1343 // Vertical pass: do 2 rows at one time | 1435 |
1344 for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) { | 1436 // Vertical pass: do 2 rows at one time |
1345 unsigned int mask_16x16_l = mask_16x16 & 0xffff; | 1437 for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 4) { |
1346 unsigned int mask_8x8_l = mask_8x8 & 0xffff; | 1438 if (plane->plane_type == 1) { |
1347 unsigned int mask_4x4_l = mask_4x4 & 0xffff; | 1439 for (c = 0; c < (MI_BLOCK_SIZE >> 1); c++) { |
1348 unsigned int mask_4x4_int_l = mask_4x4_int & 0xffff; | 1440 lfm->lfl_uv[(r << 1) + c] = lfm->lfl_y[(r << 3) + (c << 1)]; |
1349 | 1441 lfm->lfl_uv[((r + 2) << 1) + c] = lfm->lfl_y[((r + 2) << 3) + (c << 1)]; |
1350 // Disable filtering on the leftmost column. | 1442 } |
| 1443 } |
| 1444 |
| 1445 { |
| 1446 unsigned int mask_16x16_l = mask_16x16 & 0xff; |
| 1447 unsigned int mask_8x8_l = mask_8x8 & 0xff; |
| 1448 unsigned int mask_4x4_l = mask_4x4 & 0xff; |
| 1449 unsigned int mask_4x4_int_l = mask_4x4_int & 0xff; |
| 1450 |
| 1451 // Disable filtering on the leftmost column. |
1351 #if CONFIG_VP9_HIGHBITDEPTH | 1452 #if CONFIG_VP9_HIGHBITDEPTH |
1352 if (cm->use_highbitdepth) { | 1453 if (cm->use_highbitdepth) { |
1353 highbd_filter_selectively_vert_row2(plane->plane_type, | 1454 highbd_filter_selectively_vert_row2( |
1354 CONVERT_TO_SHORTPTR(dst->buf), | 1455 plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride, |
1355 dst->stride, | 1456 mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info, |
1356 mask_16x16_l, | 1457 &lfm->lfl_uv[r << 1], (int)cm->bit_depth); |
1357 mask_8x8_l, | |
1358 mask_4x4_l, | |
1359 mask_4x4_int_l, | |
1360 &cm->lf_info, &lfm->lfl_y[r << 3], | |
1361 (int)cm->bit_depth); | |
1362 } else { | 1458 } else { |
1363 filter_selectively_vert_row2(plane->plane_type, | 1459 filter_selectively_vert_row2( |
1364 dst->buf, dst->stride, | 1460 plane->subsampling_x, dst->buf, dst->stride, |
1365 mask_16x16_l, | 1461 mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info, |
1366 mask_8x8_l, | 1462 &lfm->lfl_uv[r << 1]); |
1367 mask_4x4_l, | |
1368 mask_4x4_int_l, | |
1369 &cm->lf_info, | |
1370 &lfm->lfl_y[r << 3]); | |
1371 } | 1463 } |
1372 #else | 1464 #else |
1373 filter_selectively_vert_row2(plane->plane_type, | 1465 filter_selectively_vert_row2( |
1374 dst->buf, dst->stride, | 1466 plane->subsampling_x, dst->buf, dst->stride, |
1375 mask_16x16_l, | 1467 mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info, |
1376 mask_8x8_l, | 1468 &lfm->lfl_uv[r << 1]); |
1377 mask_4x4_l, | 1469 #endif // CONFIG_VP9_HIGHBITDEPTH |
1378 mask_4x4_int_l, | 1470 |
1379 &cm->lf_info, &lfm->lfl_y[r << 3]); | |
1380 #endif // CONFIG_VP9_HIGHBITDEPTH | |
1381 dst->buf += 16 * dst->stride; | 1471 dst->buf += 16 * dst->stride; |
1382 mask_16x16 >>= 16; | |
1383 mask_8x8 >>= 16; | |
1384 mask_4x4 >>= 16; | |
1385 mask_4x4_int >>= 16; | |
1386 } | |
1387 | |
1388 // Horizontal pass | |
1389 dst->buf = dst0; | |
1390 mask_16x16 = lfm->above_y[TX_16X16]; | |
1391 mask_8x8 = lfm->above_y[TX_8X8]; | |
1392 mask_4x4 = lfm->above_y[TX_4X4]; | |
1393 mask_4x4_int = lfm->int_4x4_y; | |
1394 | |
1395 for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r++) { | |
1396 unsigned int mask_16x16_r; | |
1397 unsigned int mask_8x8_r; | |
1398 unsigned int mask_4x4_r; | |
1399 | |
1400 if (mi_row + r == 0) { | |
1401 mask_16x16_r = 0; | |
1402 mask_8x8_r = 0; | |
1403 mask_4x4_r = 0; | |
1404 } else { | |
1405 mask_16x16_r = mask_16x16 & 0xff; | |
1406 mask_8x8_r = mask_8x8 & 0xff; | |
1407 mask_4x4_r = mask_4x4 & 0xff; | |
1408 } | |
1409 | |
1410 #if CONFIG_VP9_HIGHBITDEPTH | |
1411 if (cm->use_highbitdepth) { | |
1412 highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf), | |
1413 dst->stride, | |
1414 mask_16x16_r, | |
1415 mask_8x8_r, | |
1416 mask_4x4_r, | |
1417 mask_4x4_int & 0xff, | |
1418 &cm->lf_info, | |
1419 &lfm->lfl_y[r << 3], | |
1420 (int)cm->bit_depth); | |
1421 } else { | |
1422 filter_selectively_horiz(dst->buf, dst->stride, | |
1423 mask_16x16_r, | |
1424 mask_8x8_r, | |
1425 mask_4x4_r, | |
1426 mask_4x4_int & 0xff, | |
1427 &cm->lf_info, | |
1428 &lfm->lfl_y[r << 3]); | |
1429 } | |
1430 #else | |
1431 filter_selectively_horiz(dst->buf, dst->stride, | |
1432 mask_16x16_r, | |
1433 mask_8x8_r, | |
1434 mask_4x4_r, | |
1435 mask_4x4_int & 0xff, | |
1436 &cm->lf_info, | |
1437 &lfm->lfl_y[r << 3]); | |
1438 #endif // CONFIG_VP9_HIGHBITDEPTH | |
1439 | |
1440 dst->buf += 8 * dst->stride; | |
1441 mask_16x16 >>= 8; | 1472 mask_16x16 >>= 8; |
1442 mask_8x8 >>= 8; | 1473 mask_8x8 >>= 8; |
1443 mask_4x4 >>= 8; | 1474 mask_4x4 >>= 8; |
1444 mask_4x4_int >>= 8; | 1475 mask_4x4_int >>= 8; |
1445 } | 1476 } |
1446 } else { | 1477 } |
1447 uint16_t mask_16x16 = lfm->left_uv[TX_16X16]; | 1478 |
1448 uint16_t mask_8x8 = lfm->left_uv[TX_8X8]; | 1479 // Horizontal pass |
1449 uint16_t mask_4x4 = lfm->left_uv[TX_4X4]; | 1480 dst->buf = dst0; |
1450 uint16_t mask_4x4_int = lfm->int_4x4_uv; | 1481 mask_16x16 = lfm->above_uv[TX_16X16]; |
1451 | 1482 mask_8x8 = lfm->above_uv[TX_8X8]; |
1452 // Vertical pass: do 2 rows at one time | 1483 mask_4x4 = lfm->above_uv[TX_4X4]; |
1453 for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 4) { | 1484 mask_4x4_int = lfm->int_4x4_uv; |
1454 if (plane->plane_type == 1) { | 1485 |
1455 for (c = 0; c < (MI_BLOCK_SIZE >> 1); c++) { | 1486 for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) { |
1456 lfm->lfl_uv[(r << 1) + c] = lfm->lfl_y[(r << 3) + (c << 1)]; | 1487 const int skip_border_4x4_r = mi_row + r == cm->mi_rows - 1; |
1457 lfm->lfl_uv[((r + 2) << 1) + c] = lfm->lfl_y[((r + 2) << 3) + | 1488 const unsigned int mask_4x4_int_r = |
1458 (c << 1)]; | 1489 skip_border_4x4_r ? 0 : (mask_4x4_int & 0xf); |
1459 } | 1490 unsigned int mask_16x16_r; |
1460 } | 1491 unsigned int mask_8x8_r; |
1461 | 1492 unsigned int mask_4x4_r; |
1462 { | 1493 |
1463 unsigned int mask_16x16_l = mask_16x16 & 0xff; | 1494 if (mi_row + r == 0) { |
1464 unsigned int mask_8x8_l = mask_8x8 & 0xff; | 1495 mask_16x16_r = 0; |
1465 unsigned int mask_4x4_l = mask_4x4 & 0xff; | 1496 mask_8x8_r = 0; |
1466 unsigned int mask_4x4_int_l = mask_4x4_int & 0xff; | 1497 mask_4x4_r = 0; |
1467 | 1498 } else { |
1468 // Disable filtering on the leftmost column. | 1499 mask_16x16_r = mask_16x16 & 0xf; |
1469 #if CONFIG_VP9_HIGHBITDEPTH | 1500 mask_8x8_r = mask_8x8 & 0xf; |
1470 if (cm->use_highbitdepth) { | 1501 mask_4x4_r = mask_4x4 & 0xf; |
1471 highbd_filter_selectively_vert_row2(plane->plane_type, | 1502 } |
1472 CONVERT_TO_SHORTPTR(dst->buf), | 1503 |
1473 dst->stride, | 1504 #if CONFIG_VP9_HIGHBITDEPTH |
1474 mask_16x16_l, | 1505 if (cm->use_highbitdepth) { |
1475 mask_8x8_l, | 1506 highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf), |
1476 mask_4x4_l, | 1507 dst->stride, mask_16x16_r, mask_8x8_r, |
1477 mask_4x4_int_l, | 1508 mask_4x4_r, mask_4x4_int_r, &cm->lf_info, |
1478 &cm->lf_info, | 1509 &lfm->lfl_uv[r << 1], (int)cm->bit_depth); |
1479 &lfm->lfl_uv[r << 1], | 1510 } else { |
1480 (int)cm->bit_depth); | 1511 filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, |
1481 } else { | 1512 mask_4x4_r, mask_4x4_int_r, &cm->lf_info, |
1482 filter_selectively_vert_row2(plane->plane_type, | |
1483 dst->buf, dst->stride, | |
1484 mask_16x16_l, | |
1485 mask_8x8_l, | |
1486 mask_4x4_l, | |
1487 mask_4x4_int_l, | |
1488 &cm->lf_info, | |
1489 &lfm->lfl_uv[r << 1]); | |
1490 } | |
1491 #else | |
1492 filter_selectively_vert_row2(plane->plane_type, | |
1493 dst->buf, dst->stride, | |
1494 mask_16x16_l, | |
1495 mask_8x8_l, | |
1496 mask_4x4_l, | |
1497 mask_4x4_int_l, | |
1498 &cm->lf_info, | |
1499 &lfm->lfl_uv[r << 1]); | |
1500 #endif // CONFIG_VP9_HIGHBITDEPTH | |
1501 | |
1502 dst->buf += 16 * dst->stride; | |
1503 mask_16x16 >>= 8; | |
1504 mask_8x8 >>= 8; | |
1505 mask_4x4 >>= 8; | |
1506 mask_4x4_int >>= 8; | |
1507 } | |
1508 } | |
1509 | |
1510 // Horizontal pass | |
1511 dst->buf = dst0; | |
1512 mask_16x16 = lfm->above_uv[TX_16X16]; | |
1513 mask_8x8 = lfm->above_uv[TX_8X8]; | |
1514 mask_4x4 = lfm->above_uv[TX_4X4]; | |
1515 mask_4x4_int = lfm->int_4x4_uv; | |
1516 | |
1517 for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) { | |
1518 const int skip_border_4x4_r = mi_row + r == cm->mi_rows - 1; | |
1519 const unsigned int mask_4x4_int_r = skip_border_4x4_r ? | |
1520 0 : (mask_4x4_int & 0xf); | |
1521 unsigned int mask_16x16_r; | |
1522 unsigned int mask_8x8_r; | |
1523 unsigned int mask_4x4_r; | |
1524 | |
1525 if (mi_row + r == 0) { | |
1526 mask_16x16_r = 0; | |
1527 mask_8x8_r = 0; | |
1528 mask_4x4_r = 0; | |
1529 } else { | |
1530 mask_16x16_r = mask_16x16 & 0xf; | |
1531 mask_8x8_r = mask_8x8 & 0xf; | |
1532 mask_4x4_r = mask_4x4 & 0xf; | |
1533 } | |
1534 | |
1535 #if CONFIG_VP9_HIGHBITDEPTH | |
1536 if (cm->use_highbitdepth) { | |
1537 highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf), | |
1538 dst->stride, | |
1539 mask_16x16_r, | |
1540 mask_8x8_r, | |
1541 mask_4x4_r, | |
1542 mask_4x4_int_r, | |
1543 &cm->lf_info, | |
1544 &lfm->lfl_uv[r << 1], | |
1545 (int)cm->bit_depth); | |
1546 } else { | |
1547 filter_selectively_horiz(dst->buf, dst->stride, | |
1548 mask_16x16_r, | |
1549 mask_8x8_r, | |
1550 mask_4x4_r, | |
1551 mask_4x4_int_r, | |
1552 &cm->lf_info, | |
1553 &lfm->lfl_uv[r << 1]); | |
1554 } | |
1555 #else | |
1556 filter_selectively_horiz(dst->buf, dst->stride, | |
1557 mask_16x16_r, | |
1558 mask_8x8_r, | |
1559 mask_4x4_r, | |
1560 mask_4x4_int_r, | |
1561 &cm->lf_info, | |
1562 &lfm->lfl_uv[r << 1]); | 1513 &lfm->lfl_uv[r << 1]); |
1563 #endif // CONFIG_VP9_HIGHBITDEPTH | 1514 } |
1564 | 1515 #else |
1565 dst->buf += 8 * dst->stride; | 1516 filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, |
1566 mask_16x16 >>= 4; | 1517 mask_4x4_r, mask_4x4_int_r, &cm->lf_info, |
1567 mask_8x8 >>= 4; | 1518 &lfm->lfl_uv[r << 1]); |
1568 mask_4x4 >>= 4; | 1519 #endif // CONFIG_VP9_HIGHBITDEPTH |
1569 mask_4x4_int >>= 4; | 1520 |
1570 } | 1521 dst->buf += 8 * dst->stride; |
| 1522 mask_16x16 >>= 4; |
| 1523 mask_8x8 >>= 4; |
| 1524 mask_4x4 >>= 4; |
| 1525 mask_4x4_int >>= 4; |
1571 } | 1526 } |
1572 } | 1527 } |
1573 | 1528 |
1574 void vp9_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, | 1529 void vp9_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, |
1575 VP9_COMMON *cm, | 1530 VP9_COMMON *cm, |
1576 struct macroblockd_plane planes[MAX_MB_PLANE], | 1531 struct macroblockd_plane planes[MAX_MB_PLANE], |
1577 int start, int stop, int y_only) { | 1532 int start, int stop, int y_only) { |
1578 const int num_planes = y_only ? 1 : MAX_MB_PLANE; | 1533 const int num_planes = y_only ? 1 : MAX_MB_PLANE; |
1579 const int use_420 = y_only || (planes[1].subsampling_y == 1 && | 1534 enum lf_path path; |
1580 planes[1].subsampling_x == 1); | |
1581 LOOP_FILTER_MASK lfm; | 1535 LOOP_FILTER_MASK lfm; |
1582 int mi_row, mi_col; | 1536 int mi_row, mi_col; |
1583 | 1537 |
| 1538 if (y_only) |
| 1539 path = LF_PATH_444; |
| 1540 else if (planes[1].subsampling_y == 1 && planes[1].subsampling_x == 1) |
| 1541 path = LF_PATH_420; |
| 1542 else if (planes[1].subsampling_y == 0 && planes[1].subsampling_x == 0) |
| 1543 path = LF_PATH_444; |
| 1544 else |
| 1545 path = LF_PATH_SLOW; |
| 1546 |
1584 for (mi_row = start; mi_row < stop; mi_row += MI_BLOCK_SIZE) { | 1547 for (mi_row = start; mi_row < stop; mi_row += MI_BLOCK_SIZE) { |
1585 MODE_INFO *mi = cm->mi + mi_row * cm->mi_stride; | 1548 MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride; |
1586 | 1549 |
1587 for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) { | 1550 for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) { |
1588 int plane; | 1551 int plane; |
1589 | 1552 |
1590 vp9_setup_dst_planes(planes, frame_buffer, mi_row, mi_col); | 1553 vp9_setup_dst_planes(planes, frame_buffer, mi_row, mi_col); |
1591 | 1554 |
1592 // TODO(JBB): Make setup_mask work for non 420. | 1555 // TODO(JBB): Make setup_mask work for non 420. |
1593 if (use_420) | 1556 vp9_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, |
1594 vp9_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, | 1557 &lfm); |
1595 &lfm); | |
1596 | 1558 |
1597 for (plane = 0; plane < num_planes; ++plane) { | 1559 vp9_filter_block_plane_ss00(cm, &planes[0], mi_row, &lfm); |
1598 if (use_420) | 1560 for (plane = 1; plane < num_planes; ++plane) { |
1599 vp9_filter_block_plane(cm, &planes[plane], mi_row, &lfm); | 1561 switch (path) { |
1600 else | 1562 case LF_PATH_420: |
1601 vp9_filter_block_plane_non420(cm, &planes[plane], mi + mi_col, | 1563 vp9_filter_block_plane_ss11(cm, &planes[plane], mi_row, &lfm); |
1602 mi_row, mi_col); | 1564 break; |
| 1565 case LF_PATH_444: |
| 1566 vp9_filter_block_plane_ss00(cm, &planes[plane], mi_row, &lfm); |
| 1567 break; |
| 1568 case LF_PATH_SLOW: |
| 1569 vp9_filter_block_plane_non420(cm, &planes[plane], mi + mi_col, |
| 1570 mi_row, mi_col); |
| 1571 break; |
| 1572 } |
1603 } | 1573 } |
1604 } | 1574 } |
1605 } | 1575 } |
1606 } | 1576 } |
1607 | 1577 |
1608 void vp9_loop_filter_frame(YV12_BUFFER_CONFIG *frame, | 1578 void vp9_loop_filter_frame(YV12_BUFFER_CONFIG *frame, |
1609 VP9_COMMON *cm, MACROBLOCKD *xd, | 1579 VP9_COMMON *cm, MACROBLOCKD *xd, |
1610 int frame_filter_level, | 1580 int frame_filter_level, |
1611 int y_only, int partial_frame) { | 1581 int y_only, int partial_frame) { |
1612 int start_mi_row, end_mi_row, mi_rows_to_filter; | 1582 int start_mi_row, end_mi_row, mi_rows_to_filter; |
(...skipping 13 matching lines...) Expand all Loading... |
1626 } | 1596 } |
1627 | 1597 |
1628 void vp9_loop_filter_data_reset( | 1598 void vp9_loop_filter_data_reset( |
1629 LFWorkerData *lf_data, YV12_BUFFER_CONFIG *frame_buffer, | 1599 LFWorkerData *lf_data, YV12_BUFFER_CONFIG *frame_buffer, |
1630 struct VP9Common *cm, const struct macroblockd_plane planes[MAX_MB_PLANE]) { | 1600 struct VP9Common *cm, const struct macroblockd_plane planes[MAX_MB_PLANE]) { |
1631 lf_data->frame_buffer = frame_buffer; | 1601 lf_data->frame_buffer = frame_buffer; |
1632 lf_data->cm = cm; | 1602 lf_data->cm = cm; |
1633 lf_data->start = 0; | 1603 lf_data->start = 0; |
1634 lf_data->stop = 0; | 1604 lf_data->stop = 0; |
1635 lf_data->y_only = 0; | 1605 lf_data->y_only = 0; |
1636 vpx_memcpy(lf_data->planes, planes, sizeof(lf_data->planes)); | 1606 memcpy(lf_data->planes, planes, sizeof(lf_data->planes)); |
1637 } | 1607 } |
1638 | 1608 |
1639 int vp9_loop_filter_worker(LFWorkerData *const lf_data, void *unused) { | 1609 int vp9_loop_filter_worker(LFWorkerData *const lf_data, void *unused) { |
1640 (void)unused; | 1610 (void)unused; |
1641 vp9_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes, | 1611 vp9_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes, |
1642 lf_data->start, lf_data->stop, lf_data->y_only); | 1612 lf_data->start, lf_data->stop, lf_data->y_only); |
1643 return 1; | 1613 return 1; |
1644 } | 1614 } |
OLD | NEW |