Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(283)

Side by Side Diff: source/libvpx/vp9/common/vp9_loopfilter.c

Issue 1124333011: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master
Patch Set: only update to last nights LKGR Created 5 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/libvpx/vp9/common/vp9_loopfilter.h ('k') | source/libvpx/vp9/common/vp9_mfqe.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 204 matching lines...) Expand 10 before | Expand all | Expand 10 after
215 int block_inside_limit = lvl >> ((sharpness_lvl > 0) + (sharpness_lvl > 4)); 215 int block_inside_limit = lvl >> ((sharpness_lvl > 0) + (sharpness_lvl > 4));
216 216
217 if (sharpness_lvl > 0) { 217 if (sharpness_lvl > 0) {
218 if (block_inside_limit > (9 - sharpness_lvl)) 218 if (block_inside_limit > (9 - sharpness_lvl))
219 block_inside_limit = (9 - sharpness_lvl); 219 block_inside_limit = (9 - sharpness_lvl);
220 } 220 }
221 221
222 if (block_inside_limit < 1) 222 if (block_inside_limit < 1)
223 block_inside_limit = 1; 223 block_inside_limit = 1;
224 224
225 vpx_memset(lfi->lfthr[lvl].lim, block_inside_limit, SIMD_WIDTH); 225 memset(lfi->lfthr[lvl].lim, block_inside_limit, SIMD_WIDTH);
226 vpx_memset(lfi->lfthr[lvl].mblim, (2 * (lvl + 2) + block_inside_limit), 226 memset(lfi->lfthr[lvl].mblim, (2 * (lvl + 2) + block_inside_limit),
227 SIMD_WIDTH); 227 SIMD_WIDTH);
228 } 228 }
229 } 229 }
230 230
231 static uint8_t get_filter_level(const loop_filter_info_n *lfi_n, 231 static uint8_t get_filter_level(const loop_filter_info_n *lfi_n,
232 const MB_MODE_INFO *mbmi) { 232 const MB_MODE_INFO *mbmi) {
233 return lfi_n->lvl[mbmi->segment_id][mbmi->ref_frame[0]] 233 return lfi_n->lvl[mbmi->segment_id][mbmi->ref_frame[0]]
234 [mode_lf_lut[mbmi->mode]]; 234 [mode_lf_lut[mbmi->mode]];
235 } 235 }
236 236
237 void vp9_loop_filter_init(VP9_COMMON *cm) { 237 void vp9_loop_filter_init(VP9_COMMON *cm) {
238 loop_filter_info_n *lfi = &cm->lf_info; 238 loop_filter_info_n *lfi = &cm->lf_info;
239 struct loopfilter *lf = &cm->lf; 239 struct loopfilter *lf = &cm->lf;
240 int lvl; 240 int lvl;
241 241
242 // init limits for given sharpness 242 // init limits for given sharpness
243 update_sharpness(lfi, lf->sharpness_level); 243 update_sharpness(lfi, lf->sharpness_level);
244 lf->last_sharpness_level = lf->sharpness_level; 244 lf->last_sharpness_level = lf->sharpness_level;
245 245
246 // init hev threshold const vectors 246 // init hev threshold const vectors
247 for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++) 247 for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++)
248 vpx_memset(lfi->lfthr[lvl].hev_thr, (lvl >> 4), SIMD_WIDTH); 248 memset(lfi->lfthr[lvl].hev_thr, (lvl >> 4), SIMD_WIDTH);
249 } 249 }
250 250
251 void vp9_loop_filter_frame_init(VP9_COMMON *cm, int default_filt_lvl) { 251 void vp9_loop_filter_frame_init(VP9_COMMON *cm, int default_filt_lvl) {
252 int seg_id; 252 int seg_id;
253 // n_shift is the multiplier for lf_deltas 253 // n_shift is the multiplier for lf_deltas
254 // the multiplier is 1 for when filter_lvl is between 0 and 31; 254 // the multiplier is 1 for when filter_lvl is between 0 and 31;
255 // 2 when filter_lvl is between 32 and 63 255 // 2 when filter_lvl is between 32 and 63
256 const int scale = 1 << (default_filt_lvl >> 5); 256 const int scale = 1 << (default_filt_lvl >> 5);
257 loop_filter_info_n *const lfi = &cm->lf_info; 257 loop_filter_info_n *const lfi = &cm->lf_info;
258 struct loopfilter *const lf = &cm->lf; 258 struct loopfilter *const lf = &cm->lf;
(...skipping 10 matching lines...) Expand all
269 if (vp9_segfeature_active(seg, seg_id, SEG_LVL_ALT_LF)) { 269 if (vp9_segfeature_active(seg, seg_id, SEG_LVL_ALT_LF)) {
270 const int data = vp9_get_segdata(seg, seg_id, SEG_LVL_ALT_LF); 270 const int data = vp9_get_segdata(seg, seg_id, SEG_LVL_ALT_LF);
271 lvl_seg = clamp(seg->abs_delta == SEGMENT_ABSDATA ? 271 lvl_seg = clamp(seg->abs_delta == SEGMENT_ABSDATA ?
272 data : default_filt_lvl + data, 272 data : default_filt_lvl + data,
273 0, MAX_LOOP_FILTER); 273 0, MAX_LOOP_FILTER);
274 } 274 }
275 275
276 if (!lf->mode_ref_delta_enabled) { 276 if (!lf->mode_ref_delta_enabled) {
277 // we could get rid of this if we assume that deltas are set to 277 // we could get rid of this if we assume that deltas are set to
278 // zero when not in use; encoder always uses deltas 278 // zero when not in use; encoder always uses deltas
279 vpx_memset(lfi->lvl[seg_id], lvl_seg, sizeof(lfi->lvl[seg_id])); 279 memset(lfi->lvl[seg_id], lvl_seg, sizeof(lfi->lvl[seg_id]));
280 } else { 280 } else {
281 int ref, mode; 281 int ref, mode;
282 const int intra_lvl = lvl_seg + lf->ref_deltas[INTRA_FRAME] * scale; 282 const int intra_lvl = lvl_seg + lf->ref_deltas[INTRA_FRAME] * scale;
283 lfi->lvl[seg_id][INTRA_FRAME][0] = clamp(intra_lvl, 0, MAX_LOOP_FILTER); 283 lfi->lvl[seg_id][INTRA_FRAME][0] = clamp(intra_lvl, 0, MAX_LOOP_FILTER);
284 284
285 for (ref = LAST_FRAME; ref < MAX_REF_FRAMES; ++ref) { 285 for (ref = LAST_FRAME; ref < MAX_REF_FRAMES; ++ref) {
286 for (mode = 0; mode < MAX_MODE_LF_DELTAS; ++mode) { 286 for (mode = 0; mode < MAX_MODE_LF_DELTAS; ++mode) {
287 const int inter_lvl = lvl_seg + lf->ref_deltas[ref] * scale 287 const int inter_lvl = lvl_seg + lf->ref_deltas[ref] * scale
288 + lf->mode_deltas[mode] * scale; 288 + lf->mode_deltas[mode] * scale;
289 lfi->lvl[seg_id][ref][mode] = clamp(inter_lvl, 0, MAX_LOOP_FILTER); 289 lfi->lvl[seg_id][ref][mode] = clamp(inter_lvl, 0, MAX_LOOP_FILTER);
290 } 290 }
291 } 291 }
292 } 292 }
293 } 293 }
294 } 294 }
295 295
296 static void filter_selectively_vert_row2(PLANE_TYPE plane_type, 296 static void filter_selectively_vert_row2(int subsampling_factor,
297 uint8_t *s, int pitch, 297 uint8_t *s, int pitch,
298 unsigned int mask_16x16_l, 298 unsigned int mask_16x16_l,
299 unsigned int mask_8x8_l, 299 unsigned int mask_8x8_l,
300 unsigned int mask_4x4_l, 300 unsigned int mask_4x4_l,
301 unsigned int mask_4x4_int_l, 301 unsigned int mask_4x4_int_l,
302 const loop_filter_info_n *lfi_n, 302 const loop_filter_info_n *lfi_n,
303 const uint8_t *lfl) { 303 const uint8_t *lfl) {
304 const int mask_shift = plane_type ? 4 : 8; 304 const int mask_shift = subsampling_factor ? 4 : 8;
305 const int mask_cutoff = plane_type ? 0xf : 0xff; 305 const int mask_cutoff = subsampling_factor ? 0xf : 0xff;
306 const int lfl_forward = plane_type ? 4 : 8; 306 const int lfl_forward = subsampling_factor ? 4 : 8;
307 307
308 unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff; 308 unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff;
309 unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff; 309 unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff;
310 unsigned int mask_4x4_0 = mask_4x4_l & mask_cutoff; 310 unsigned int mask_4x4_0 = mask_4x4_l & mask_cutoff;
311 unsigned int mask_4x4_int_0 = mask_4x4_int_l & mask_cutoff; 311 unsigned int mask_4x4_int_0 = mask_4x4_int_l & mask_cutoff;
312 unsigned int mask_16x16_1 = (mask_16x16_l >> mask_shift) & mask_cutoff; 312 unsigned int mask_16x16_1 = (mask_16x16_l >> mask_shift) & mask_cutoff;
313 unsigned int mask_8x8_1 = (mask_8x8_l >> mask_shift) & mask_cutoff; 313 unsigned int mask_8x8_1 = (mask_8x8_l >> mask_shift) & mask_cutoff;
314 unsigned int mask_4x4_1 = (mask_4x4_l >> mask_shift) & mask_cutoff; 314 unsigned int mask_4x4_1 = (mask_4x4_l >> mask_shift) & mask_cutoff;
315 unsigned int mask_4x4_int_1 = (mask_4x4_int_l >> mask_shift) & mask_cutoff; 315 unsigned int mask_4x4_int_1 = (mask_4x4_int_l >> mask_shift) & mask_cutoff;
316 unsigned int mask; 316 unsigned int mask;
(...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after
386 mask_4x4_0 >>= 1; 386 mask_4x4_0 >>= 1;
387 mask_4x4_int_0 >>= 1; 387 mask_4x4_int_0 >>= 1;
388 mask_16x16_1 >>= 1; 388 mask_16x16_1 >>= 1;
389 mask_8x8_1 >>= 1; 389 mask_8x8_1 >>= 1;
390 mask_4x4_1 >>= 1; 390 mask_4x4_1 >>= 1;
391 mask_4x4_int_1 >>= 1; 391 mask_4x4_int_1 >>= 1;
392 } 392 }
393 } 393 }
394 394
395 #if CONFIG_VP9_HIGHBITDEPTH 395 #if CONFIG_VP9_HIGHBITDEPTH
396 static void highbd_filter_selectively_vert_row2(PLANE_TYPE plane_type, 396 static void highbd_filter_selectively_vert_row2(int subsampling_factor,
397 uint16_t *s, int pitch, 397 uint16_t *s, int pitch,
398 unsigned int mask_16x16_l, 398 unsigned int mask_16x16_l,
399 unsigned int mask_8x8_l, 399 unsigned int mask_8x8_l,
400 unsigned int mask_4x4_l, 400 unsigned int mask_4x4_l,
401 unsigned int mask_4x4_int_l, 401 unsigned int mask_4x4_int_l,
402 const loop_filter_info_n *lfi_n, 402 const loop_filter_info_n *lfi_n,
403 const uint8_t *lfl, int bd) { 403 const uint8_t *lfl, int bd) {
404 const int mask_shift = plane_type ? 4 : 8; 404 const int mask_shift = subsampling_factor ? 4 : 8;
405 const int mask_cutoff = plane_type ? 0xf : 0xff; 405 const int mask_cutoff = subsampling_factor ? 0xf : 0xff;
406 const int lfl_forward = plane_type ? 4 : 8; 406 const int lfl_forward = subsampling_factor ? 4 : 8;
407 407
408 unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff; 408 unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff;
409 unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff; 409 unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff;
410 unsigned int mask_4x4_0 = mask_4x4_l & mask_cutoff; 410 unsigned int mask_4x4_0 = mask_4x4_l & mask_cutoff;
411 unsigned int mask_4x4_int_0 = mask_4x4_int_l & mask_cutoff; 411 unsigned int mask_4x4_int_0 = mask_4x4_int_l & mask_cutoff;
412 unsigned int mask_16x16_1 = (mask_16x16_l >> mask_shift) & mask_cutoff; 412 unsigned int mask_16x16_1 = (mask_16x16_l >> mask_shift) & mask_cutoff;
413 unsigned int mask_8x8_1 = (mask_8x8_l >> mask_shift) & mask_cutoff; 413 unsigned int mask_8x8_1 = (mask_8x8_l >> mask_shift) & mask_cutoff;
414 unsigned int mask_4x4_1 = (mask_4x4_l >> mask_shift) & mask_cutoff; 414 unsigned int mask_4x4_1 = (mask_4x4_l >> mask_shift) & mask_cutoff;
415 unsigned int mask_4x4_int_1 = (mask_4x4_int_l >> mask_shift) & mask_cutoff; 415 unsigned int mask_4x4_int_1 = (mask_4x4_int_l >> mask_shift) & mask_cutoff;
416 unsigned int mask; 416 unsigned int mask;
(...skipping 303 matching lines...) Expand 10 before | Expand all | Expand 10 after
720 int i; 720 int i;
721 721
722 // If filter level is 0 we don't loop filter. 722 // If filter level is 0 we don't loop filter.
723 if (!filter_level) { 723 if (!filter_level) {
724 return; 724 return;
725 } else { 725 } else {
726 const int w = num_8x8_blocks_wide_lookup[block_size]; 726 const int w = num_8x8_blocks_wide_lookup[block_size];
727 const int h = num_8x8_blocks_high_lookup[block_size]; 727 const int h = num_8x8_blocks_high_lookup[block_size];
728 int index = shift_y; 728 int index = shift_y;
729 for (i = 0; i < h; i++) { 729 for (i = 0; i < h; i++) {
730 vpx_memset(&lfm->lfl_y[index], filter_level, w); 730 memset(&lfm->lfl_y[index], filter_level, w);
731 index += 8; 731 index += 8;
732 } 732 }
733 } 733 }
734 734
735 // These set 1 in the current block size for the block size edges. 735 // These set 1 in the current block size for the block size edges.
736 // For instance if the block size is 32x16, we'll set: 736 // For instance if the block size is 32x16, we'll set:
737 // above = 1111 737 // above = 1111
738 // 0000 738 // 0000
739 // and 739 // and
740 // left = 1000 740 // left = 1000
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after
794 uint64_t *const int_4x4_y = &lfm->int_4x4_y; 794 uint64_t *const int_4x4_y = &lfm->int_4x4_y;
795 int i; 795 int i;
796 796
797 if (!filter_level) { 797 if (!filter_level) {
798 return; 798 return;
799 } else { 799 } else {
800 const int w = num_8x8_blocks_wide_lookup[block_size]; 800 const int w = num_8x8_blocks_wide_lookup[block_size];
801 const int h = num_8x8_blocks_high_lookup[block_size]; 801 const int h = num_8x8_blocks_high_lookup[block_size];
802 int index = shift_y; 802 int index = shift_y;
803 for (i = 0; i < h; i++) { 803 for (i = 0; i < h; i++) {
804 vpx_memset(&lfm->lfl_y[index], filter_level, w); 804 memset(&lfm->lfl_y[index], filter_level, w);
805 index += 8; 805 index += 8;
806 } 806 }
807 } 807 }
808 808
809 *above_y |= above_prediction_mask[block_size] << shift_y; 809 *above_y |= above_prediction_mask[block_size] << shift_y;
810 *left_y |= left_prediction_mask[block_size] << shift_y; 810 *left_y |= left_prediction_mask[block_size] << shift_y;
811 811
812 if (mbmi->skip && is_inter_block(mbmi)) 812 if (mbmi->skip && is_inter_block(mbmi))
813 return; 813 return;
814 814
815 *above_y |= (size_mask[block_size] & 815 *above_y |= (size_mask[block_size] &
816 above_64x64_txform_mask[tx_size_y]) << shift_y; 816 above_64x64_txform_mask[tx_size_y]) << shift_y;
817 817
818 *left_y |= (size_mask[block_size] & 818 *left_y |= (size_mask[block_size] &
819 left_64x64_txform_mask[tx_size_y]) << shift_y; 819 left_64x64_txform_mask[tx_size_y]) << shift_y;
820 820
821 if (tx_size_y == TX_4X4) 821 if (tx_size_y == TX_4X4)
822 *int_4x4_y |= (size_mask[block_size] & 0xffffffffffffffffULL) << shift_y; 822 *int_4x4_y |= (size_mask[block_size] & 0xffffffffffffffffULL) << shift_y;
823 } 823 }
824 824
825 // This function sets up the bit masks for the entire 64x64 region represented 825 // This function sets up the bit masks for the entire 64x64 region represented
826 // by mi_row, mi_col. 826 // by mi_row, mi_col.
827 // TODO(JBB): This function only works for yv12. 827 // TODO(JBB): This function only works for yv12.
828 void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col, 828 void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col,
829 MODE_INFO *mi, const int mode_info_stride, 829 MODE_INFO **mi, const int mode_info_stride,
830 LOOP_FILTER_MASK *lfm) { 830 LOOP_FILTER_MASK *lfm) {
831 int idx_32, idx_16, idx_8; 831 int idx_32, idx_16, idx_8;
832 const loop_filter_info_n *const lfi_n = &cm->lf_info; 832 const loop_filter_info_n *const lfi_n = &cm->lf_info;
833 MODE_INFO *mip = mi; 833 MODE_INFO **mip = mi;
834 MODE_INFO *mip2 = mi; 834 MODE_INFO **mip2 = mi;
835 835
836 // These are offsets to the next mi in the 64x64 block. It is what gets 836 // These are offsets to the next mi in the 64x64 block. It is what gets
837 // added to the mi ptr as we go through each loop. It helps us to avoid 837 // added to the mi ptr as we go through each loop. It helps us to avoid
838 // setting up special row and column counters for each index. The last step 838 // setting up special row and column counters for each index. The last step
839 // brings us out back to the starting position. 839 // brings us out back to the starting position.
840 const int offset_32[] = {4, (mode_info_stride << 2) - 4, 4, 840 const int offset_32[] = {4, (mode_info_stride << 2) - 4, 4,
841 -(mode_info_stride << 2) - 4}; 841 -(mode_info_stride << 2) - 4};
842 const int offset_16[] = {2, (mode_info_stride << 1) - 2, 2, 842 const int offset_16[] = {2, (mode_info_stride << 1) - 2, 2,
843 -(mode_info_stride << 1) - 2}; 843 -(mode_info_stride << 1) - 2};
844 const int offset[] = {1, mode_info_stride - 1, 1, -mode_info_stride - 1}; 844 const int offset[] = {1, mode_info_stride - 1, 1, -mode_info_stride - 1};
845 845
846 // Following variables represent shifts to position the current block 846 // Following variables represent shifts to position the current block
847 // mask over the appropriate block. A shift of 36 to the left will move 847 // mask over the appropriate block. A shift of 36 to the left will move
848 // the bits for the final 32 by 32 block in the 64x64 up 4 rows and left 848 // the bits for the final 32 by 32 block in the 64x64 up 4 rows and left
849 // 4 rows to the appropriate spot. 849 // 4 rows to the appropriate spot.
850 const int shift_32_y[] = {0, 4, 32, 36}; 850 const int shift_32_y[] = {0, 4, 32, 36};
851 const int shift_16_y[] = {0, 2, 16, 18}; 851 const int shift_16_y[] = {0, 2, 16, 18};
852 const int shift_8_y[] = {0, 1, 8, 9}; 852 const int shift_8_y[] = {0, 1, 8, 9};
853 const int shift_32_uv[] = {0, 2, 8, 10}; 853 const int shift_32_uv[] = {0, 2, 8, 10};
854 const int shift_16_uv[] = {0, 1, 4, 5}; 854 const int shift_16_uv[] = {0, 1, 4, 5};
855 int i; 855 int i;
856 const int max_rows = (mi_row + MI_BLOCK_SIZE > cm->mi_rows ? 856 const int max_rows = (mi_row + MI_BLOCK_SIZE > cm->mi_rows ?
857 cm->mi_rows - mi_row : MI_BLOCK_SIZE); 857 cm->mi_rows - mi_row : MI_BLOCK_SIZE);
858 const int max_cols = (mi_col + MI_BLOCK_SIZE > cm->mi_cols ? 858 const int max_cols = (mi_col + MI_BLOCK_SIZE > cm->mi_cols ?
859 cm->mi_cols - mi_col : MI_BLOCK_SIZE); 859 cm->mi_cols - mi_col : MI_BLOCK_SIZE);
860 860
861 vp9_zero(*lfm); 861 vp9_zero(*lfm);
862 assert(mip != NULL); 862 assert(mip[0] != NULL);
863 863
864 // TODO(jimbankoski): Try moving most of the following code into decode 864 // TODO(jimbankoski): Try moving most of the following code into decode
865 // loop and storing lfm in the mbmi structure so that we don't have to go 865 // loop and storing lfm in the mbmi structure so that we don't have to go
866 // through the recursive loop structure multiple times. 866 // through the recursive loop structure multiple times.
867 switch (mip->mbmi.sb_type) { 867 switch (mip[0]->mbmi.sb_type) {
868 case BLOCK_64X64: 868 case BLOCK_64X64:
869 build_masks(lfi_n, mip , 0, 0, lfm); 869 build_masks(lfi_n, mip[0] , 0, 0, lfm);
870 break; 870 break;
871 case BLOCK_64X32: 871 case BLOCK_64X32:
872 build_masks(lfi_n, mip, 0, 0, lfm); 872 build_masks(lfi_n, mip[0], 0, 0, lfm);
873 mip2 = mip + mode_info_stride * 4; 873 mip2 = mip + mode_info_stride * 4;
874 if (4 >= max_rows) 874 if (4 >= max_rows)
875 break; 875 break;
876 build_masks(lfi_n, mip2, 32, 8, lfm); 876 build_masks(lfi_n, mip2[0], 32, 8, lfm);
877 break; 877 break;
878 case BLOCK_32X64: 878 case BLOCK_32X64:
879 build_masks(lfi_n, mip, 0, 0, lfm); 879 build_masks(lfi_n, mip[0], 0, 0, lfm);
880 mip2 = mip + 4; 880 mip2 = mip + 4;
881 if (4 >= max_cols) 881 if (4 >= max_cols)
882 break; 882 break;
883 build_masks(lfi_n, mip2, 4, 2, lfm); 883 build_masks(lfi_n, mip2[0], 4, 2, lfm);
884 break; 884 break;
885 default: 885 default:
886 for (idx_32 = 0; idx_32 < 4; mip += offset_32[idx_32], ++idx_32) { 886 for (idx_32 = 0; idx_32 < 4; mip += offset_32[idx_32], ++idx_32) {
887 const int shift_y = shift_32_y[idx_32]; 887 const int shift_y = shift_32_y[idx_32];
888 const int shift_uv = shift_32_uv[idx_32]; 888 const int shift_uv = shift_32_uv[idx_32];
889 const int mi_32_col_offset = ((idx_32 & 1) << 2); 889 const int mi_32_col_offset = ((idx_32 & 1) << 2);
890 const int mi_32_row_offset = ((idx_32 >> 1) << 2); 890 const int mi_32_row_offset = ((idx_32 >> 1) << 2);
891 if (mi_32_col_offset >= max_cols || mi_32_row_offset >= max_rows) 891 if (mi_32_col_offset >= max_cols || mi_32_row_offset >= max_rows)
892 continue; 892 continue;
893 switch (mip->mbmi.sb_type) { 893 switch (mip[0]->mbmi.sb_type) {
894 case BLOCK_32X32: 894 case BLOCK_32X32:
895 build_masks(lfi_n, mip, shift_y, shift_uv, lfm); 895 build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
896 break; 896 break;
897 case BLOCK_32X16: 897 case BLOCK_32X16:
898 build_masks(lfi_n, mip, shift_y, shift_uv, lfm); 898 build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
899 if (mi_32_row_offset + 2 >= max_rows) 899 if (mi_32_row_offset + 2 >= max_rows)
900 continue; 900 continue;
901 mip2 = mip + mode_info_stride * 2; 901 mip2 = mip + mode_info_stride * 2;
902 build_masks(lfi_n, mip2, shift_y + 16, shift_uv + 4, lfm); 902 build_masks(lfi_n, mip2[0], shift_y + 16, shift_uv + 4, lfm);
903 break; 903 break;
904 case BLOCK_16X32: 904 case BLOCK_16X32:
905 build_masks(lfi_n, mip, shift_y, shift_uv, lfm); 905 build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
906 if (mi_32_col_offset + 2 >= max_cols) 906 if (mi_32_col_offset + 2 >= max_cols)
907 continue; 907 continue;
908 mip2 = mip + 2; 908 mip2 = mip + 2;
909 build_masks(lfi_n, mip2, shift_y + 2, shift_uv + 1, lfm); 909 build_masks(lfi_n, mip2[0], shift_y + 2, shift_uv + 1, lfm);
910 break; 910 break;
911 default: 911 default:
912 for (idx_16 = 0; idx_16 < 4; mip += offset_16[idx_16], ++idx_16) { 912 for (idx_16 = 0; idx_16 < 4; mip += offset_16[idx_16], ++idx_16) {
913 const int shift_y = shift_32_y[idx_32] + shift_16_y[idx_16]; 913 const int shift_y = shift_32_y[idx_32] + shift_16_y[idx_16];
914 const int shift_uv = shift_32_uv[idx_32] + shift_16_uv[idx_16]; 914 const int shift_uv = shift_32_uv[idx_32] + shift_16_uv[idx_16];
915 const int mi_16_col_offset = mi_32_col_offset + 915 const int mi_16_col_offset = mi_32_col_offset +
916 ((idx_16 & 1) << 1); 916 ((idx_16 & 1) << 1);
917 const int mi_16_row_offset = mi_32_row_offset + 917 const int mi_16_row_offset = mi_32_row_offset +
918 ((idx_16 >> 1) << 1); 918 ((idx_16 >> 1) << 1);
919 919
920 if (mi_16_col_offset >= max_cols || mi_16_row_offset >= max_rows) 920 if (mi_16_col_offset >= max_cols || mi_16_row_offset >= max_rows)
921 continue; 921 continue;
922 922
923 switch (mip->mbmi.sb_type) { 923 switch (mip[0]->mbmi.sb_type) {
924 case BLOCK_16X16: 924 case BLOCK_16X16:
925 build_masks(lfi_n, mip, shift_y, shift_uv, lfm); 925 build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
926 break; 926 break;
927 case BLOCK_16X8: 927 case BLOCK_16X8:
928 build_masks(lfi_n, mip, shift_y, shift_uv, lfm); 928 build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
929 if (mi_16_row_offset + 1 >= max_rows) 929 if (mi_16_row_offset + 1 >= max_rows)
930 continue; 930 continue;
931 mip2 = mip + mode_info_stride; 931 mip2 = mip + mode_info_stride;
932 build_y_mask(lfi_n, mip2, shift_y+8, lfm); 932 build_y_mask(lfi_n, mip2[0], shift_y+8, lfm);
933 break; 933 break;
934 case BLOCK_8X16: 934 case BLOCK_8X16:
935 build_masks(lfi_n, mip, shift_y, shift_uv, lfm); 935 build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
936 if (mi_16_col_offset +1 >= max_cols) 936 if (mi_16_col_offset +1 >= max_cols)
937 continue; 937 continue;
938 mip2 = mip + 1; 938 mip2 = mip + 1;
939 build_y_mask(lfi_n, mip2, shift_y+1, lfm); 939 build_y_mask(lfi_n, mip2[0], shift_y+1, lfm);
940 break; 940 break;
941 default: { 941 default: {
942 const int shift_y = shift_32_y[idx_32] + 942 const int shift_y = shift_32_y[idx_32] +
943 shift_16_y[idx_16] + 943 shift_16_y[idx_16] +
944 shift_8_y[0]; 944 shift_8_y[0];
945 build_masks(lfi_n, mip, shift_y, shift_uv, lfm); 945 build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
946 mip += offset[0]; 946 mip += offset[0];
947 for (idx_8 = 1; idx_8 < 4; mip += offset[idx_8], ++idx_8) { 947 for (idx_8 = 1; idx_8 < 4; mip += offset[idx_8], ++idx_8) {
948 const int shift_y = shift_32_y[idx_32] + 948 const int shift_y = shift_32_y[idx_32] +
949 shift_16_y[idx_16] + 949 shift_16_y[idx_16] +
950 shift_8_y[idx_8]; 950 shift_8_y[idx_8];
951 const int mi_8_col_offset = mi_16_col_offset + 951 const int mi_8_col_offset = mi_16_col_offset +
952 ((idx_8 & 1)); 952 ((idx_8 & 1));
953 const int mi_8_row_offset = mi_16_row_offset + 953 const int mi_8_row_offset = mi_16_row_offset +
954 ((idx_8 >> 1)); 954 ((idx_8 >> 1));
955 955
956 if (mi_8_col_offset >= max_cols || 956 if (mi_8_col_offset >= max_cols ||
957 mi_8_row_offset >= max_rows) 957 mi_8_row_offset >= max_rows)
958 continue; 958 continue;
959 build_y_mask(lfi_n, mip, shift_y, lfm); 959 build_y_mask(lfi_n, mip[0], shift_y, lfm);
960 } 960 }
961 break; 961 break;
962 } 962 }
963 } 963 }
964 } 964 }
965 break; 965 break;
966 } 966 }
967 } 967 }
968 break; 968 break;
969 } 969 }
(...skipping 174 matching lines...) Expand 10 before | Expand all | Expand 10 after
1144 mask_16x16 >>= 1; 1144 mask_16x16 >>= 1;
1145 mask_8x8 >>= 1; 1145 mask_8x8 >>= 1;
1146 mask_4x4 >>= 1; 1146 mask_4x4 >>= 1;
1147 mask_4x4_int >>= 1; 1147 mask_4x4_int >>= 1;
1148 } 1148 }
1149 } 1149 }
1150 #endif // CONFIG_VP9_HIGHBITDEPTH 1150 #endif // CONFIG_VP9_HIGHBITDEPTH
1151 1151
1152 void vp9_filter_block_plane_non420(VP9_COMMON *cm, 1152 void vp9_filter_block_plane_non420(VP9_COMMON *cm,
1153 struct macroblockd_plane *plane, 1153 struct macroblockd_plane *plane,
1154 MODE_INFO *mi_8x8, 1154 MODE_INFO **mi_8x8,
1155 int mi_row, int mi_col) { 1155 int mi_row, int mi_col) {
1156 const int ss_x = plane->subsampling_x; 1156 const int ss_x = plane->subsampling_x;
1157 const int ss_y = plane->subsampling_y; 1157 const int ss_y = plane->subsampling_y;
1158 const int row_step = 1 << ss_y; 1158 const int row_step = 1 << ss_y;
1159 const int col_step = 1 << ss_x; 1159 const int col_step = 1 << ss_x;
1160 const int row_step_stride = cm->mi_stride * row_step; 1160 const int row_step_stride = cm->mi_stride * row_step;
1161 struct buf_2d *const dst = &plane->dst; 1161 struct buf_2d *const dst = &plane->dst;
1162 uint8_t* const dst0 = dst->buf; 1162 uint8_t* const dst0 = dst->buf;
1163 unsigned int mask_16x16[MI_BLOCK_SIZE] = {0}; 1163 unsigned int mask_16x16[MI_BLOCK_SIZE] = {0};
1164 unsigned int mask_8x8[MI_BLOCK_SIZE] = {0}; 1164 unsigned int mask_8x8[MI_BLOCK_SIZE] = {0};
1165 unsigned int mask_4x4[MI_BLOCK_SIZE] = {0}; 1165 unsigned int mask_4x4[MI_BLOCK_SIZE] = {0};
1166 unsigned int mask_4x4_int[MI_BLOCK_SIZE] = {0}; 1166 unsigned int mask_4x4_int[MI_BLOCK_SIZE] = {0};
1167 uint8_t lfl[MI_BLOCK_SIZE * MI_BLOCK_SIZE]; 1167 uint8_t lfl[MI_BLOCK_SIZE * MI_BLOCK_SIZE];
1168 int r, c; 1168 int r, c;
1169 1169
1170 for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += row_step) { 1170 for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += row_step) {
1171 unsigned int mask_16x16_c = 0; 1171 unsigned int mask_16x16_c = 0;
1172 unsigned int mask_8x8_c = 0; 1172 unsigned int mask_8x8_c = 0;
1173 unsigned int mask_4x4_c = 0; 1173 unsigned int mask_4x4_c = 0;
1174 unsigned int border_mask; 1174 unsigned int border_mask;
1175 1175
1176 // Determine the vertical edges that need filtering 1176 // Determine the vertical edges that need filtering
1177 for (c = 0; c < MI_BLOCK_SIZE && mi_col + c < cm->mi_cols; c += col_step) { 1177 for (c = 0; c < MI_BLOCK_SIZE && mi_col + c < cm->mi_cols; c += col_step) {
1178 const MODE_INFO *mi = mi_8x8[c].src_mi; 1178 const MODE_INFO *mi = mi_8x8[c];
1179 const BLOCK_SIZE sb_type = mi[0].mbmi.sb_type; 1179 const BLOCK_SIZE sb_type = mi[0].mbmi.sb_type;
1180 const int skip_this = mi[0].mbmi.skip && is_inter_block(&mi[0].mbmi); 1180 const int skip_this = mi[0].mbmi.skip && is_inter_block(&mi[0].mbmi);
1181 // left edge of current unit is block/partition edge -> no skip 1181 // left edge of current unit is block/partition edge -> no skip
1182 const int block_edge_left = (num_4x4_blocks_wide_lookup[sb_type] > 1) ? 1182 const int block_edge_left = (num_4x4_blocks_wide_lookup[sb_type] > 1) ?
1183 !(c & (num_8x8_blocks_wide_lookup[sb_type] - 1)) : 1; 1183 !(c & (num_8x8_blocks_wide_lookup[sb_type] - 1)) : 1;
1184 const int skip_this_c = skip_this && !block_edge_left; 1184 const int skip_this_c = skip_this && !block_edge_left;
1185 // top edge of current unit is block/partition edge -> no skip 1185 // top edge of current unit is block/partition edge -> no skip
1186 const int block_edge_above = (num_4x4_blocks_high_lookup[sb_type] > 1) ? 1186 const int block_edge_above = (num_4x4_blocks_high_lookup[sb_type] > 1) ?
1187 !(r & (num_8x8_blocks_high_lookup[sb_type] - 1)) : 1; 1187 !(r & (num_8x8_blocks_high_lookup[sb_type] - 1)) : 1;
1188 const int skip_this_r = skip_this && !block_edge_above; 1188 const int skip_this_r = skip_this && !block_edge_above;
(...skipping 130 matching lines...) Expand 10 before | Expand all | Expand 10 after
1319 mask_16x16_r, 1319 mask_16x16_r,
1320 mask_8x8_r, 1320 mask_8x8_r,
1321 mask_4x4_r, 1321 mask_4x4_r,
1322 mask_4x4_int_r, 1322 mask_4x4_int_r,
1323 &cm->lf_info, &lfl[r << 3]); 1323 &cm->lf_info, &lfl[r << 3]);
1324 #endif // CONFIG_VP9_HIGHBITDEPTH 1324 #endif // CONFIG_VP9_HIGHBITDEPTH
1325 dst->buf += 8 * dst->stride; 1325 dst->buf += 8 * dst->stride;
1326 } 1326 }
1327 } 1327 }
1328 1328
1329 void vp9_filter_block_plane(VP9_COMMON *const cm, 1329 void vp9_filter_block_plane_ss00(VP9_COMMON *const cm,
1330 struct macroblockd_plane *const plane, 1330 struct macroblockd_plane *const plane,
1331 int mi_row, 1331 int mi_row,
1332 LOOP_FILTER_MASK *lfm) { 1332 LOOP_FILTER_MASK *lfm) {
1333 struct buf_2d *const dst = &plane->dst; 1333 struct buf_2d *const dst = &plane->dst;
1334 uint8_t* const dst0 = dst->buf; 1334 uint8_t *const dst0 = dst->buf;
1335 int r;
1336 uint64_t mask_16x16 = lfm->left_y[TX_16X16];
1337 uint64_t mask_8x8 = lfm->left_y[TX_8X8];
1338 uint64_t mask_4x4 = lfm->left_y[TX_4X4];
1339 uint64_t mask_4x4_int = lfm->int_4x4_y;
1340
1341 assert(plane->subsampling_x == 0 && plane->subsampling_y == 0);
1342
1343 // Vertical pass: do 2 rows at one time
1344 for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) {
1345 unsigned int mask_16x16_l = mask_16x16 & 0xffff;
1346 unsigned int mask_8x8_l = mask_8x8 & 0xffff;
1347 unsigned int mask_4x4_l = mask_4x4 & 0xffff;
1348 unsigned int mask_4x4_int_l = mask_4x4_int & 0xffff;
1349
1350 // Disable filtering on the leftmost column.
1351 #if CONFIG_VP9_HIGHBITDEPTH
1352 if (cm->use_highbitdepth) {
1353 highbd_filter_selectively_vert_row2(
1354 plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride,
1355 mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
1356 &lfm->lfl_y[r << 3], (int)cm->bit_depth);
1357 } else {
1358 filter_selectively_vert_row2(
1359 plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l,
1360 mask_4x4_l, mask_4x4_int_l, &cm->lf_info, &lfm->lfl_y[r << 3]);
1361 }
1362 #else
1363 filter_selectively_vert_row2(
1364 plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l,
1365 mask_4x4_l, mask_4x4_int_l, &cm->lf_info, &lfm->lfl_y[r << 3]);
1366 #endif // CONFIG_VP9_HIGHBITDEPTH
1367 dst->buf += 16 * dst->stride;
1368 mask_16x16 >>= 16;
1369 mask_8x8 >>= 16;
1370 mask_4x4 >>= 16;
1371 mask_4x4_int >>= 16;
1372 }
1373
1374 // Horizontal pass
1375 dst->buf = dst0;
1376 mask_16x16 = lfm->above_y[TX_16X16];
1377 mask_8x8 = lfm->above_y[TX_8X8];
1378 mask_4x4 = lfm->above_y[TX_4X4];
1379 mask_4x4_int = lfm->int_4x4_y;
1380
1381 for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r++) {
1382 unsigned int mask_16x16_r;
1383 unsigned int mask_8x8_r;
1384 unsigned int mask_4x4_r;
1385
1386 if (mi_row + r == 0) {
1387 mask_16x16_r = 0;
1388 mask_8x8_r = 0;
1389 mask_4x4_r = 0;
1390 } else {
1391 mask_16x16_r = mask_16x16 & 0xff;
1392 mask_8x8_r = mask_8x8 & 0xff;
1393 mask_4x4_r = mask_4x4 & 0xff;
1394 }
1395
1396 #if CONFIG_VP9_HIGHBITDEPTH
1397 if (cm->use_highbitdepth) {
1398 highbd_filter_selectively_horiz(
1399 CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_r, mask_8x8_r,
1400 mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info, &lfm->lfl_y[r << 3],
1401 (int)cm->bit_depth);
1402 } else {
1403 filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
1404 mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info,
1405 &lfm->lfl_y[r << 3]);
1406 }
1407 #else
1408 filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
1409 mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info,
1410 &lfm->lfl_y[r << 3]);
1411 #endif // CONFIG_VP9_HIGHBITDEPTH
1412
1413 dst->buf += 8 * dst->stride;
1414 mask_16x16 >>= 8;
1415 mask_8x8 >>= 8;
1416 mask_4x4 >>= 8;
1417 mask_4x4_int >>= 8;
1418 }
1419 }
1420
1421 void vp9_filter_block_plane_ss11(VP9_COMMON *const cm,
1422 struct macroblockd_plane *const plane,
1423 int mi_row,
1424 LOOP_FILTER_MASK *lfm) {
1425 struct buf_2d *const dst = &plane->dst;
1426 uint8_t *const dst0 = dst->buf;
1335 int r, c; 1427 int r, c;
1336 1428
1337 if (!plane->plane_type) { 1429 uint16_t mask_16x16 = lfm->left_uv[TX_16X16];
1338 uint64_t mask_16x16 = lfm->left_y[TX_16X16]; 1430 uint16_t mask_8x8 = lfm->left_uv[TX_8X8];
1339 uint64_t mask_8x8 = lfm->left_y[TX_8X8]; 1431 uint16_t mask_4x4 = lfm->left_uv[TX_4X4];
1340 uint64_t mask_4x4 = lfm->left_y[TX_4X4]; 1432 uint16_t mask_4x4_int = lfm->int_4x4_uv;
1341 uint64_t mask_4x4_int = lfm->int_4x4_y; 1433
1342 1434 assert(plane->subsampling_x == 1 && plane->subsampling_y == 1);
1343 // Vertical pass: do 2 rows at one time 1435
1344 for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) { 1436 // Vertical pass: do 2 rows at one time
1345 unsigned int mask_16x16_l = mask_16x16 & 0xffff; 1437 for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 4) {
1346 unsigned int mask_8x8_l = mask_8x8 & 0xffff; 1438 if (plane->plane_type == 1) {
1347 unsigned int mask_4x4_l = mask_4x4 & 0xffff; 1439 for (c = 0; c < (MI_BLOCK_SIZE >> 1); c++) {
1348 unsigned int mask_4x4_int_l = mask_4x4_int & 0xffff; 1440 lfm->lfl_uv[(r << 1) + c] = lfm->lfl_y[(r << 3) + (c << 1)];
1349 1441 lfm->lfl_uv[((r + 2) << 1) + c] = lfm->lfl_y[((r + 2) << 3) + (c << 1)];
1350 // Disable filtering on the leftmost column. 1442 }
1443 }
1444
1445 {
1446 unsigned int mask_16x16_l = mask_16x16 & 0xff;
1447 unsigned int mask_8x8_l = mask_8x8 & 0xff;
1448 unsigned int mask_4x4_l = mask_4x4 & 0xff;
1449 unsigned int mask_4x4_int_l = mask_4x4_int & 0xff;
1450
1451 // Disable filtering on the leftmost column.
1351 #if CONFIG_VP9_HIGHBITDEPTH 1452 #if CONFIG_VP9_HIGHBITDEPTH
1352 if (cm->use_highbitdepth) { 1453 if (cm->use_highbitdepth) {
1353 highbd_filter_selectively_vert_row2(plane->plane_type, 1454 highbd_filter_selectively_vert_row2(
1354 CONVERT_TO_SHORTPTR(dst->buf), 1455 plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride,
1355 dst->stride, 1456 mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
1356 mask_16x16_l, 1457 &lfm->lfl_uv[r << 1], (int)cm->bit_depth);
1357 mask_8x8_l,
1358 mask_4x4_l,
1359 mask_4x4_int_l,
1360 &cm->lf_info, &lfm->lfl_y[r << 3],
1361 (int)cm->bit_depth);
1362 } else { 1458 } else {
1363 filter_selectively_vert_row2(plane->plane_type, 1459 filter_selectively_vert_row2(
1364 dst->buf, dst->stride, 1460 plane->subsampling_x, dst->buf, dst->stride,
1365 mask_16x16_l, 1461 mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
1366 mask_8x8_l, 1462 &lfm->lfl_uv[r << 1]);
1367 mask_4x4_l,
1368 mask_4x4_int_l,
1369 &cm->lf_info,
1370 &lfm->lfl_y[r << 3]);
1371 } 1463 }
1372 #else 1464 #else
1373 filter_selectively_vert_row2(plane->plane_type, 1465 filter_selectively_vert_row2(
1374 dst->buf, dst->stride, 1466 plane->subsampling_x, dst->buf, dst->stride,
1375 mask_16x16_l, 1467 mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
1376 mask_8x8_l, 1468 &lfm->lfl_uv[r << 1]);
1377 mask_4x4_l, 1469 #endif // CONFIG_VP9_HIGHBITDEPTH
1378 mask_4x4_int_l, 1470
1379 &cm->lf_info, &lfm->lfl_y[r << 3]);
1380 #endif // CONFIG_VP9_HIGHBITDEPTH
1381 dst->buf += 16 * dst->stride; 1471 dst->buf += 16 * dst->stride;
1382 mask_16x16 >>= 16;
1383 mask_8x8 >>= 16;
1384 mask_4x4 >>= 16;
1385 mask_4x4_int >>= 16;
1386 }
1387
1388 // Horizontal pass
1389 dst->buf = dst0;
1390 mask_16x16 = lfm->above_y[TX_16X16];
1391 mask_8x8 = lfm->above_y[TX_8X8];
1392 mask_4x4 = lfm->above_y[TX_4X4];
1393 mask_4x4_int = lfm->int_4x4_y;
1394
1395 for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r++) {
1396 unsigned int mask_16x16_r;
1397 unsigned int mask_8x8_r;
1398 unsigned int mask_4x4_r;
1399
1400 if (mi_row + r == 0) {
1401 mask_16x16_r = 0;
1402 mask_8x8_r = 0;
1403 mask_4x4_r = 0;
1404 } else {
1405 mask_16x16_r = mask_16x16 & 0xff;
1406 mask_8x8_r = mask_8x8 & 0xff;
1407 mask_4x4_r = mask_4x4 & 0xff;
1408 }
1409
1410 #if CONFIG_VP9_HIGHBITDEPTH
1411 if (cm->use_highbitdepth) {
1412 highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf),
1413 dst->stride,
1414 mask_16x16_r,
1415 mask_8x8_r,
1416 mask_4x4_r,
1417 mask_4x4_int & 0xff,
1418 &cm->lf_info,
1419 &lfm->lfl_y[r << 3],
1420 (int)cm->bit_depth);
1421 } else {
1422 filter_selectively_horiz(dst->buf, dst->stride,
1423 mask_16x16_r,
1424 mask_8x8_r,
1425 mask_4x4_r,
1426 mask_4x4_int & 0xff,
1427 &cm->lf_info,
1428 &lfm->lfl_y[r << 3]);
1429 }
1430 #else
1431 filter_selectively_horiz(dst->buf, dst->stride,
1432 mask_16x16_r,
1433 mask_8x8_r,
1434 mask_4x4_r,
1435 mask_4x4_int & 0xff,
1436 &cm->lf_info,
1437 &lfm->lfl_y[r << 3]);
1438 #endif // CONFIG_VP9_HIGHBITDEPTH
1439
1440 dst->buf += 8 * dst->stride;
1441 mask_16x16 >>= 8; 1472 mask_16x16 >>= 8;
1442 mask_8x8 >>= 8; 1473 mask_8x8 >>= 8;
1443 mask_4x4 >>= 8; 1474 mask_4x4 >>= 8;
1444 mask_4x4_int >>= 8; 1475 mask_4x4_int >>= 8;
1445 } 1476 }
1446 } else { 1477 }
1447 uint16_t mask_16x16 = lfm->left_uv[TX_16X16]; 1478
1448 uint16_t mask_8x8 = lfm->left_uv[TX_8X8]; 1479 // Horizontal pass
1449 uint16_t mask_4x4 = lfm->left_uv[TX_4X4]; 1480 dst->buf = dst0;
1450 uint16_t mask_4x4_int = lfm->int_4x4_uv; 1481 mask_16x16 = lfm->above_uv[TX_16X16];
1451 1482 mask_8x8 = lfm->above_uv[TX_8X8];
1452 // Vertical pass: do 2 rows at one time 1483 mask_4x4 = lfm->above_uv[TX_4X4];
1453 for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 4) { 1484 mask_4x4_int = lfm->int_4x4_uv;
1454 if (plane->plane_type == 1) { 1485
1455 for (c = 0; c < (MI_BLOCK_SIZE >> 1); c++) { 1486 for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) {
1456 lfm->lfl_uv[(r << 1) + c] = lfm->lfl_y[(r << 3) + (c << 1)]; 1487 const int skip_border_4x4_r = mi_row + r == cm->mi_rows - 1;
1457 lfm->lfl_uv[((r + 2) << 1) + c] = lfm->lfl_y[((r + 2) << 3) + 1488 const unsigned int mask_4x4_int_r =
1458 (c << 1)]; 1489 skip_border_4x4_r ? 0 : (mask_4x4_int & 0xf);
1459 } 1490 unsigned int mask_16x16_r;
1460 } 1491 unsigned int mask_8x8_r;
1461 1492 unsigned int mask_4x4_r;
1462 { 1493
1463 unsigned int mask_16x16_l = mask_16x16 & 0xff; 1494 if (mi_row + r == 0) {
1464 unsigned int mask_8x8_l = mask_8x8 & 0xff; 1495 mask_16x16_r = 0;
1465 unsigned int mask_4x4_l = mask_4x4 & 0xff; 1496 mask_8x8_r = 0;
1466 unsigned int mask_4x4_int_l = mask_4x4_int & 0xff; 1497 mask_4x4_r = 0;
1467 1498 } else {
1468 // Disable filtering on the leftmost column. 1499 mask_16x16_r = mask_16x16 & 0xf;
1469 #if CONFIG_VP9_HIGHBITDEPTH 1500 mask_8x8_r = mask_8x8 & 0xf;
1470 if (cm->use_highbitdepth) { 1501 mask_4x4_r = mask_4x4 & 0xf;
1471 highbd_filter_selectively_vert_row2(plane->plane_type, 1502 }
1472 CONVERT_TO_SHORTPTR(dst->buf), 1503
1473 dst->stride, 1504 #if CONFIG_VP9_HIGHBITDEPTH
1474 mask_16x16_l, 1505 if (cm->use_highbitdepth) {
1475 mask_8x8_l, 1506 highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf),
1476 mask_4x4_l, 1507 dst->stride, mask_16x16_r, mask_8x8_r,
1477 mask_4x4_int_l, 1508 mask_4x4_r, mask_4x4_int_r, &cm->lf_info,
1478 &cm->lf_info, 1509 &lfm->lfl_uv[r << 1], (int)cm->bit_depth);
1479 &lfm->lfl_uv[r << 1], 1510 } else {
1480 (int)cm->bit_depth); 1511 filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
1481 } else { 1512 mask_4x4_r, mask_4x4_int_r, &cm->lf_info,
1482 filter_selectively_vert_row2(plane->plane_type,
1483 dst->buf, dst->stride,
1484 mask_16x16_l,
1485 mask_8x8_l,
1486 mask_4x4_l,
1487 mask_4x4_int_l,
1488 &cm->lf_info,
1489 &lfm->lfl_uv[r << 1]);
1490 }
1491 #else
1492 filter_selectively_vert_row2(plane->plane_type,
1493 dst->buf, dst->stride,
1494 mask_16x16_l,
1495 mask_8x8_l,
1496 mask_4x4_l,
1497 mask_4x4_int_l,
1498 &cm->lf_info,
1499 &lfm->lfl_uv[r << 1]);
1500 #endif // CONFIG_VP9_HIGHBITDEPTH
1501
1502 dst->buf += 16 * dst->stride;
1503 mask_16x16 >>= 8;
1504 mask_8x8 >>= 8;
1505 mask_4x4 >>= 8;
1506 mask_4x4_int >>= 8;
1507 }
1508 }
1509
1510 // Horizontal pass
1511 dst->buf = dst0;
1512 mask_16x16 = lfm->above_uv[TX_16X16];
1513 mask_8x8 = lfm->above_uv[TX_8X8];
1514 mask_4x4 = lfm->above_uv[TX_4X4];
1515 mask_4x4_int = lfm->int_4x4_uv;
1516
1517 for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) {
1518 const int skip_border_4x4_r = mi_row + r == cm->mi_rows - 1;
1519 const unsigned int mask_4x4_int_r = skip_border_4x4_r ?
1520 0 : (mask_4x4_int & 0xf);
1521 unsigned int mask_16x16_r;
1522 unsigned int mask_8x8_r;
1523 unsigned int mask_4x4_r;
1524
1525 if (mi_row + r == 0) {
1526 mask_16x16_r = 0;
1527 mask_8x8_r = 0;
1528 mask_4x4_r = 0;
1529 } else {
1530 mask_16x16_r = mask_16x16 & 0xf;
1531 mask_8x8_r = mask_8x8 & 0xf;
1532 mask_4x4_r = mask_4x4 & 0xf;
1533 }
1534
1535 #if CONFIG_VP9_HIGHBITDEPTH
1536 if (cm->use_highbitdepth) {
1537 highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf),
1538 dst->stride,
1539 mask_16x16_r,
1540 mask_8x8_r,
1541 mask_4x4_r,
1542 mask_4x4_int_r,
1543 &cm->lf_info,
1544 &lfm->lfl_uv[r << 1],
1545 (int)cm->bit_depth);
1546 } else {
1547 filter_selectively_horiz(dst->buf, dst->stride,
1548 mask_16x16_r,
1549 mask_8x8_r,
1550 mask_4x4_r,
1551 mask_4x4_int_r,
1552 &cm->lf_info,
1553 &lfm->lfl_uv[r << 1]);
1554 }
1555 #else
1556 filter_selectively_horiz(dst->buf, dst->stride,
1557 mask_16x16_r,
1558 mask_8x8_r,
1559 mask_4x4_r,
1560 mask_4x4_int_r,
1561 &cm->lf_info,
1562 &lfm->lfl_uv[r << 1]); 1513 &lfm->lfl_uv[r << 1]);
1563 #endif // CONFIG_VP9_HIGHBITDEPTH 1514 }
1564 1515 #else
1565 dst->buf += 8 * dst->stride; 1516 filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
1566 mask_16x16 >>= 4; 1517 mask_4x4_r, mask_4x4_int_r, &cm->lf_info,
1567 mask_8x8 >>= 4; 1518 &lfm->lfl_uv[r << 1]);
1568 mask_4x4 >>= 4; 1519 #endif // CONFIG_VP9_HIGHBITDEPTH
1569 mask_4x4_int >>= 4; 1520
1570 } 1521 dst->buf += 8 * dst->stride;
1522 mask_16x16 >>= 4;
1523 mask_8x8 >>= 4;
1524 mask_4x4 >>= 4;
1525 mask_4x4_int >>= 4;
1571 } 1526 }
1572 } 1527 }
1573 1528
1574 void vp9_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, 1529 void vp9_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer,
1575 VP9_COMMON *cm, 1530 VP9_COMMON *cm,
1576 struct macroblockd_plane planes[MAX_MB_PLANE], 1531 struct macroblockd_plane planes[MAX_MB_PLANE],
1577 int start, int stop, int y_only) { 1532 int start, int stop, int y_only) {
1578 const int num_planes = y_only ? 1 : MAX_MB_PLANE; 1533 const int num_planes = y_only ? 1 : MAX_MB_PLANE;
1579 const int use_420 = y_only || (planes[1].subsampling_y == 1 && 1534 enum lf_path path;
1580 planes[1].subsampling_x == 1);
1581 LOOP_FILTER_MASK lfm; 1535 LOOP_FILTER_MASK lfm;
1582 int mi_row, mi_col; 1536 int mi_row, mi_col;
1583 1537
1538 if (y_only)
1539 path = LF_PATH_444;
1540 else if (planes[1].subsampling_y == 1 && planes[1].subsampling_x == 1)
1541 path = LF_PATH_420;
1542 else if (planes[1].subsampling_y == 0 && planes[1].subsampling_x == 0)
1543 path = LF_PATH_444;
1544 else
1545 path = LF_PATH_SLOW;
1546
1584 for (mi_row = start; mi_row < stop; mi_row += MI_BLOCK_SIZE) { 1547 for (mi_row = start; mi_row < stop; mi_row += MI_BLOCK_SIZE) {
1585 MODE_INFO *mi = cm->mi + mi_row * cm->mi_stride; 1548 MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
1586 1549
1587 for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) { 1550 for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) {
1588 int plane; 1551 int plane;
1589 1552
1590 vp9_setup_dst_planes(planes, frame_buffer, mi_row, mi_col); 1553 vp9_setup_dst_planes(planes, frame_buffer, mi_row, mi_col);
1591 1554
1592 // TODO(JBB): Make setup_mask work for non 420. 1555 // TODO(JBB): Make setup_mask work for non 420.
1593 if (use_420) 1556 vp9_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride,
1594 vp9_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, 1557 &lfm);
1595 &lfm);
1596 1558
1597 for (plane = 0; plane < num_planes; ++plane) { 1559 vp9_filter_block_plane_ss00(cm, &planes[0], mi_row, &lfm);
1598 if (use_420) 1560 for (plane = 1; plane < num_planes; ++plane) {
1599 vp9_filter_block_plane(cm, &planes[plane], mi_row, &lfm); 1561 switch (path) {
1600 else 1562 case LF_PATH_420:
1601 vp9_filter_block_plane_non420(cm, &planes[plane], mi + mi_col, 1563 vp9_filter_block_plane_ss11(cm, &planes[plane], mi_row, &lfm);
1602 mi_row, mi_col); 1564 break;
1565 case LF_PATH_444:
1566 vp9_filter_block_plane_ss00(cm, &planes[plane], mi_row, &lfm);
1567 break;
1568 case LF_PATH_SLOW:
1569 vp9_filter_block_plane_non420(cm, &planes[plane], mi + mi_col,
1570 mi_row, mi_col);
1571 break;
1572 }
1603 } 1573 }
1604 } 1574 }
1605 } 1575 }
1606 } 1576 }
1607 1577
1608 void vp9_loop_filter_frame(YV12_BUFFER_CONFIG *frame, 1578 void vp9_loop_filter_frame(YV12_BUFFER_CONFIG *frame,
1609 VP9_COMMON *cm, MACROBLOCKD *xd, 1579 VP9_COMMON *cm, MACROBLOCKD *xd,
1610 int frame_filter_level, 1580 int frame_filter_level,
1611 int y_only, int partial_frame) { 1581 int y_only, int partial_frame) {
1612 int start_mi_row, end_mi_row, mi_rows_to_filter; 1582 int start_mi_row, end_mi_row, mi_rows_to_filter;
(...skipping 13 matching lines...) Expand all
1626 } 1596 }
1627 1597
1628 void vp9_loop_filter_data_reset( 1598 void vp9_loop_filter_data_reset(
1629 LFWorkerData *lf_data, YV12_BUFFER_CONFIG *frame_buffer, 1599 LFWorkerData *lf_data, YV12_BUFFER_CONFIG *frame_buffer,
1630 struct VP9Common *cm, const struct macroblockd_plane planes[MAX_MB_PLANE]) { 1600 struct VP9Common *cm, const struct macroblockd_plane planes[MAX_MB_PLANE]) {
1631 lf_data->frame_buffer = frame_buffer; 1601 lf_data->frame_buffer = frame_buffer;
1632 lf_data->cm = cm; 1602 lf_data->cm = cm;
1633 lf_data->start = 0; 1603 lf_data->start = 0;
1634 lf_data->stop = 0; 1604 lf_data->stop = 0;
1635 lf_data->y_only = 0; 1605 lf_data->y_only = 0;
1636 vpx_memcpy(lf_data->planes, planes, sizeof(lf_data->planes)); 1606 memcpy(lf_data->planes, planes, sizeof(lf_data->planes));
1637 } 1607 }
1638 1608
1639 int vp9_loop_filter_worker(LFWorkerData *const lf_data, void *unused) { 1609 int vp9_loop_filter_worker(LFWorkerData *const lf_data, void *unused) {
1640 (void)unused; 1610 (void)unused;
1641 vp9_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes, 1611 vp9_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes,
1642 lf_data->start, lf_data->stop, lf_data->y_only); 1612 lf_data->start, lf_data->stop, lf_data->y_only);
1643 return 1; 1613 return 1;
1644 } 1614 }
OLDNEW
« no previous file with comments | « source/libvpx/vp9/common/vp9_loopfilter.h ('k') | source/libvpx/vp9/common/vp9_mfqe.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698