source/libvpx/vp9/common/vp9_thread_common.c - Issue 1124333011: libvpx: Pull from upstream

Side by Side Diff: source/libvpx/vp9/common/vp9_thread_common.c

Issue 1124333011: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master

Patch Set: only update to last nights LKGR Created 5 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved.	2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

11 #include "./vpx_config.h"	11 #include "./vpx_config.h"

12 #include "vpx_mem/vpx_mem.h"	12 #include "vpx_mem/vpx_mem.h"

13 #include "vp9/common/vp9_entropymode.h"	13 #include "vp9/common/vp9_entropymode.h"

14 #include "vp9/common/vp9_thread_common.h"	14 #include "vp9/common/vp9_thread_common.h"

15 #include "vp9/common/vp9_reconinter.h"	15 #include "vp9/common/vp9_reconinter.h"

	16 #include "vp9/common/vp9_loopfilter.h"

16	17

17 #if CONFIG_MULTITHREAD	18 #if CONFIG_MULTITHREAD

18 static INLINE void mutex_lock(pthread_mutex_t *const mutex) {	19 static INLINE void mutex_lock(pthread_mutex_t *const mutex) {

19 const int kMaxTryLocks = 4000;	20 const int kMaxTryLocks = 4000;

20 int locked = 0;	21 int locked = 0;

21 int i;	22 int i;

22	23

23 for (i = 0; i < kMaxTryLocks; ++i) {	24 for (i = 0; i < kMaxTryLocks; ++i) {

24 if (!pthread_mutex_trylock(mutex)) {	25 if (!pthread_mutex_trylock(mutex)) {

25 locked = 1;	26 locked = 1;

(...skipping 59 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
85 }	86 }

86	87

87 // Implement row loopfiltering for each thread.	88 // Implement row loopfiltering for each thread.

88 static INLINE	89 static INLINE

89 void thread_loop_filter_rows(const YV12_BUFFER_CONFIG *const frame_buffer,	90 void thread_loop_filter_rows(const YV12_BUFFER_CONFIG *const frame_buffer,

90 VP9_COMMON *const cm,	91 VP9_COMMON *const cm,

91 struct macroblockd_plane planes[MAX_MB_PLANE],	92 struct macroblockd_plane planes[MAX_MB_PLANE],

92 int start, int stop, int y_only,	93 int start, int stop, int y_only,

93 VP9LfSync *const lf_sync) {	94 VP9LfSync *const lf_sync) {

94 const int num_planes = y_only ? 1 : MAX_MB_PLANE;	95 const int num_planes = y_only ? 1 : MAX_MB_PLANE;

95 const int use_420 = y_only \|\| (planes[1].subsampling_y == 1 &&

96 planes[1].subsampling_x == 1);

97 const int sb_cols = mi_cols_aligned_to_sb(cm->mi_cols) >> MI_BLOCK_SIZE_LOG2;	96 const int sb_cols = mi_cols_aligned_to_sb(cm->mi_cols) >> MI_BLOCK_SIZE_LOG2;

98 int mi_row, mi_col;	97 int mi_row, mi_col;

	98 enum lf_path path;

	99 if (y_only)

	100 path = LF_PATH_444;

	101 else if (planes[1].subsampling_y == 1 && planes[1].subsampling_x == 1)

	102 path = LF_PATH_420;

	103 else if (planes[1].subsampling_y == 0 && planes[1].subsampling_x == 0)

	104 path = LF_PATH_444;

	105 else

	106 path = LF_PATH_SLOW;

99	107

100 for (mi_row = start; mi_row < stop;	108 for (mi_row = start; mi_row < stop;

101 mi_row += lf_sync->num_workers * MI_BLOCK_SIZE) {	109 mi_row += lf_sync->num_workers * MI_BLOCK_SIZE) {

102 MODE_INFO const mi = cm->mi + mi_row cm->mi_stride;	110 MODE_INFO *const mi = cm->mi_grid_visible + mi_row cm->mi_stride;

103	111

104 for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) {	112 for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) {

105 const int r = mi_row >> MI_BLOCK_SIZE_LOG2;	113 const int r = mi_row >> MI_BLOCK_SIZE_LOG2;

106 const int c = mi_col >> MI_BLOCK_SIZE_LOG2;	114 const int c = mi_col >> MI_BLOCK_SIZE_LOG2;

107 LOOP_FILTER_MASK lfm;	115 LOOP_FILTER_MASK lfm;

108 int plane;	116 int plane;

109	117

110 sync_read(lf_sync, r, c);	118 sync_read(lf_sync, r, c);

111	119

112 vp9_setup_dst_planes(planes, frame_buffer, mi_row, mi_col);	120 vp9_setup_dst_planes(planes, frame_buffer, mi_row, mi_col);

113	121

114 // TODO(JBB): Make setup_mask work for non 420.	122 // TODO(JBB): Make setup_mask work for non 420.

115 if (use_420)	123 vp9_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride,

116 vp9_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride,	124 &lfm);

117 &lfm);

118	125

119 for (plane = 0; plane < num_planes; ++plane) {	126 vp9_filter_block_plane_ss00(cm, &planes[0], mi_row, &lfm);

120 if (use_420)	127 for (plane = 1; plane < num_planes; ++plane) {

121 vp9_filter_block_plane(cm, &planes[plane], mi_row, &lfm);	128 switch (path) {

122 else	129 case LF_PATH_420:

123 vp9_filter_block_plane_non420(cm, &planes[plane], mi + mi_col,	130 vp9_filter_block_plane_ss11(cm, &planes[plane], mi_row, &lfm);

124 mi_row, mi_col);	131 break;

	132 case LF_PATH_444:

	133 vp9_filter_block_plane_ss00(cm, &planes[plane], mi_row, &lfm);

	134 break;

	135 case LF_PATH_SLOW:

	136 vp9_filter_block_plane_non420(cm, &planes[plane], mi + mi_col,

	137 mi_row, mi_col);

	138 break;

	139 }

125 }	140 }

126	141

127 sync_write(lf_sync, r, c, sb_cols);	142 sync_write(lf_sync, r, c, sb_cols);

128 }	143 }

129 }	144 }

130 }	145 }

131	146

132 // Row-based multi-threaded loopfilter hook	147 // Row-based multi-threaded loopfilter hook

133 static int loop_filter_row_worker(VP9LfSync *const lf_sync,	148 static int loop_filter_row_worker(VP9LfSync *const lf_sync,

134 LFWorkerData *const lf_data) {	149 LFWorkerData *const lf_data) {

(...skipping 18 matching lines...) Expand all Loading...
153 const int num_workers = MIN(nworkers, tile_cols);	168 const int num_workers = MIN(nworkers, tile_cols);

154 int i;	169 int i;

155	170

156 if (!lf_sync->sync_range \|\| sb_rows != lf_sync->rows \|\|	171 if (!lf_sync->sync_range \|\| sb_rows != lf_sync->rows \|\|

157 num_workers > lf_sync->num_workers) {	172 num_workers > lf_sync->num_workers) {

158 vp9_loop_filter_dealloc(lf_sync);	173 vp9_loop_filter_dealloc(lf_sync);

159 vp9_loop_filter_alloc(lf_sync, cm, sb_rows, cm->width, num_workers);	174 vp9_loop_filter_alloc(lf_sync, cm, sb_rows, cm->width, num_workers);

160 }	175 }

161	176

162 // Initialize cur_sb_col to -1 for all SB rows.	177 // Initialize cur_sb_col to -1 for all SB rows.

163 vpx_memset(lf_sync->cur_sb_col, -1, sizeof(lf_sync->cur_sb_col) sb_rows);	178 memset(lf_sync->cur_sb_col, -1, sizeof(lf_sync->cur_sb_col) sb_rows);

164	179

165 // Set up loopfilter thread data.	180 // Set up loopfilter thread data.

166 // The decoder is capping num_workers because it has been observed that using	181 // The decoder is capping num_workers because it has been observed that using

167 // more threads on the loopfilter than there are cores will hurt performance	182 // more threads on the loopfilter than there are cores will hurt performance

168 // on Android. This is because the system will only schedule the tile decode	183 // on Android. This is because the system will only schedule the tile decode

169 // workers on cores equal to the number of tile columns. Then if the decoder	184 // workers on cores equal to the number of tile columns. Then if the decoder

170 // tries to use more threads for the loopfilter, it will hurt performance	185 // tries to use more threads for the loopfilter, it will hurt performance

171 // because of contention. If the multithreading code changes in the future	186 // because of contention. If the multithreading code changes in the future

172 // then the number of workers used by the loopfilter should be revisited.	187 // then the number of workers used by the loopfilter should be revisited.

173 for (i = 0; i < num_workers; ++i) {	188 for (i = 0; i < num_workers; ++i) {

(...skipping 238 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
412 }	427 }

413	428

414 for (i = 0; i < MV_OFFSET_BITS; i++)	429 for (i = 0; i < MV_OFFSET_BITS; i++)

415 for (j = 0; j < 2; j++)	430 for (j = 0; j < 2; j++)

416 comps->bits[i][j] += comps_t->bits[i][j];	431 comps->bits[i][j] += comps_t->bits[i][j];

417	432

418 for (i = 0; i < MV_FP_SIZE; i++)	433 for (i = 0; i < MV_FP_SIZE; i++)

419 comps->fp[i] += comps_t->fp[i];	434 comps->fp[i] += comps_t->fp[i];

420 }	435 }

421 }	436 }

OLD	NEW

« no previous file with comments | « source/libvpx/vp9/common/vp9_rtcd_defs.pl ('k') | source/libvpx/vp9/common/x86/vp9_asm_stubs.c » ('j') | no next file with comments »