source/libvpx/vp9/decoder/vp9_dthread.c - Issue 232133009: libvpx: Pull from upstream

Side by Side Diff: source/libvpx/vp9/decoder/vp9_dthread.c

Issue 232133009: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/

Patch Set: Created 6 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved.	2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

11 #include "./vpx_config.h"	11 #include "./vpx_config.h"

	12

	13 #include "vpx_mem/vpx_mem.h"

	14

12 #include "vp9/common/vp9_reconinter.h"	15 #include "vp9/common/vp9_reconinter.h"

	16

13 #include "vp9/decoder/vp9_dthread.h"	17 #include "vp9/decoder/vp9_dthread.h"

14 #include "vp9/decoder/vp9_onyxd_int.h"	18 #include "vp9/decoder/vp9_decoder.h"

15 #include "vpx_mem/vpx_mem.h"

16	19

17 #if CONFIG_MULTITHREAD	20 #if CONFIG_MULTITHREAD

18 static INLINE void mutex_lock(pthread_mutex_t *const mutex) {	21 static INLINE void mutex_lock(pthread_mutex_t *const mutex) {

19 const int kMaxTryLocks = 4000;	22 const int kMaxTryLocks = 4000;

20 int locked = 0;	23 int locked = 0;

21 int i;	24 int i;

22	25

23 for (i = 0; i < kMaxTryLocks; ++i) {	26 for (i = 0; i < kMaxTryLocks; ++i) {

24 if (!pthread_mutex_trylock(mutex)) {	27 if (!pthread_mutex_trylock(mutex)) {

25 locked = 1;	28 locked = 1;

(...skipping 63 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
89 VP9_COMMON const cm, MACROBLOCKD const xd,	92 VP9_COMMON const cm, MACROBLOCKD const xd,

90 int start, int stop, int y_only,	93 int start, int stop, int y_only,

91 VP9LfSync *const lf_sync, int num_lf_workers) {	94 VP9LfSync *const lf_sync, int num_lf_workers) {

92 const int num_planes = y_only ? 1 : MAX_MB_PLANE;	95 const int num_planes = y_only ? 1 : MAX_MB_PLANE;

93 int r, c; // SB row and col	96 int r, c; // SB row and col

94 LOOP_FILTER_MASK lfm;	97 LOOP_FILTER_MASK lfm;

95 const int sb_cols = mi_cols_aligned_to_sb(cm->mi_cols) >> MI_BLOCK_SIZE_LOG2;	98 const int sb_cols = mi_cols_aligned_to_sb(cm->mi_cols) >> MI_BLOCK_SIZE_LOG2;

96	99

97 for (r = start; r < stop; r += num_lf_workers) {	100 for (r = start; r < stop; r += num_lf_workers) {

98 const int mi_row = r << MI_BLOCK_SIZE_LOG2;	101 const int mi_row = r << MI_BLOCK_SIZE_LOG2;

99 MODE_INFO *mi_8x8 = cm->mi_grid_visible + mi_row cm->mode_info_stride;	102 MODE_INFO *mi_8x8 = cm->mi_grid_visible + mi_row cm->mi_stride;

100	103

101 for (c = 0; c < sb_cols; ++c) {	104 for (c = 0; c < sb_cols; ++c) {

102 const int mi_col = c << MI_BLOCK_SIZE_LOG2;	105 const int mi_col = c << MI_BLOCK_SIZE_LOG2;

103 int plane;	106 int plane;

104	107

105 sync_read(lf_sync, r, c);	108 sync_read(lf_sync, r, c);

106	109

107 setup_dst_planes(xd, frame_buffer, mi_row, mi_col);	110 vp9_setup_dst_planes(xd, frame_buffer, mi_row, mi_col);

108 vp9_setup_mask(cm, mi_row, mi_col, mi_8x8 + mi_col, cm->mode_info_stride,	111 vp9_setup_mask(cm, mi_row, mi_col, mi_8x8 + mi_col, cm->mi_stride, &lfm);

109 &lfm);

110	112

111 for (plane = 0; plane < num_planes; ++plane) {	113 for (plane = 0; plane < num_planes; ++plane) {

112 vp9_filter_block_plane(cm, &xd->plane[plane], mi_row, &lfm);	114 vp9_filter_block_plane(cm, &xd->plane[plane], mi_row, &lfm);

113 }	115 }

114	116

115 sync_write(lf_sync, r, c, sb_cols);	117 sync_write(lf_sync, r, c, sb_cols);

116 }	118 }

117 }	119 }

118 }	120 }

119	121

120 // Row-based multi-threaded loopfilter hook	122 // Row-based multi-threaded loopfilter hook

121 static int loop_filter_row_worker(void arg1, void arg2) {	123 static int loop_filter_row_worker(void arg1, void arg2) {

122 TileWorkerData const tile_data = (TileWorkerData)arg1;	124 TileWorkerData const tile_data = (TileWorkerData)arg1;

123 LFWorkerData *const lf_data = &tile_data->lfdata;	125 LFWorkerData *const lf_data = &tile_data->lfdata;

124	126

125 loop_filter_rows_mt(lf_data->frame_buffer, lf_data->cm, &lf_data->xd,	127 loop_filter_rows_mt(lf_data->frame_buffer, lf_data->cm, &lf_data->xd,

126 lf_data->start, lf_data->stop, lf_data->y_only,	128 lf_data->start, lf_data->stop, lf_data->y_only,

127 lf_data->lf_sync, lf_data->num_lf_workers);	129 lf_data->lf_sync, lf_data->num_lf_workers);

128 return 1;	130 return 1;

129 }	131 }

130	132

131 // VP9 decoder: Implement multi-threaded loopfilter that uses the tile	133 // VP9 decoder: Implement multi-threaded loopfilter that uses the tile

132 // threads.	134 // threads.

133 void vp9_loop_filter_frame_mt(VP9D_COMP *pbi,	135 void vp9_loop_filter_frame_mt(VP9Decoder *pbi,

134 VP9_COMMON *cm,	136 VP9_COMMON *cm,

135 MACROBLOCKD *xd,	137 MACROBLOCKD *xd,

136 int frame_filter_level,	138 int frame_filter_level,

137 int y_only, int partial_frame) {	139 int y_only, int partial_frame) {

138 // Number of superblock rows and cols	140 // Number of superblock rows and cols

139 const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;	141 const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;

	142 const int tile_cols = 1 << cm->log2_tile_cols;

	143 const int num_workers = MIN(pbi->oxcf.max_threads & ~1, tile_cols);

140 int i;	144 int i;

141	145

142 // Allocate memory used in thread synchronization.	146 // Allocate memory used in thread synchronization.

143 // This always needs to be done even if frame_filter_level is 0.	147 // This always needs to be done even if frame_filter_level is 0.

144 if (!cm->current_video_frame \|\| cm->last_height != cm->height) {	148 if (!cm->current_video_frame \|\| cm->last_height != cm->height) {

145 VP9LfSync *const lf_sync = &pbi->lf_row_sync;	149 VP9LfSync *const lf_sync = &pbi->lf_row_sync;

146	150

147 if (cm->last_height != cm->height) {	151 if (cm->last_height != cm->height) {

148 const int aligned_last_height =	152 const int aligned_last_height =

149 ALIGN_POWER_OF_TWO(cm->last_height, MI_SIZE_LOG2);	153 ALIGN_POWER_OF_TWO(cm->last_height, MI_SIZE_LOG2);

150 const int last_sb_rows =	154 const int last_sb_rows =

151 mi_cols_aligned_to_sb(aligned_last_height >> MI_SIZE_LOG2) >>	155 mi_cols_aligned_to_sb(aligned_last_height >> MI_SIZE_LOG2) >>

152 MI_BLOCK_SIZE_LOG2;	156 MI_BLOCK_SIZE_LOG2;

153	157

154 vp9_loop_filter_dealloc(lf_sync, last_sb_rows);	158 vp9_loop_filter_dealloc(lf_sync, last_sb_rows);

155 }	159 }

156	160

157 vp9_loop_filter_alloc(cm, lf_sync, sb_rows, cm->width);	161 vp9_loop_filter_alloc(cm, lf_sync, sb_rows, cm->width);

158 }	162 }

159	163

160 if (!frame_filter_level) return;	164 if (!frame_filter_level) return;

161	165

162 vp9_loop_filter_frame_init(cm, frame_filter_level);	166 vp9_loop_filter_frame_init(cm, frame_filter_level);

163	167

164 // Initialize cur_sb_col to -1 for all SB rows.	168 // Initialize cur_sb_col to -1 for all SB rows.

165 vpx_memset(pbi->lf_row_sync.cur_sb_col, -1,	169 vpx_memset(pbi->lf_row_sync.cur_sb_col, -1,

166 sizeof(pbi->lf_row_sync.cur_sb_col) sb_rows);	170 sizeof(pbi->lf_row_sync.cur_sb_col) sb_rows);

167	171

168 // Set up loopfilter thread data.	172 // Set up loopfilter thread data.

169 for (i = 0; i < pbi->num_tile_workers; ++i) {	173 // The decoder is using num_workers instead of pbi->num_tile_workers

	174 // because it has been observed that using more threads on the

	175 // loopfilter, than there are tile columns in the frame will hurt

	176 // performance on Android. This is because the system will only

	177 // schedule the tile decode workers on cores equal to the number

	178 // of tile columns. Then if the decoder tries to use more threads for the

	179 // loopfilter, it will hurt performance because of contention. If the

	180 // multithreading code changes in the future then the number of workers

	181 // used by the loopfilter should be revisited.

	182 for (i = 0; i < num_workers; ++i) {

170 VP9Worker *const worker = &pbi->tile_workers[i];	183 VP9Worker *const worker = &pbi->tile_workers[i];

171 TileWorkerData const tile_data = (TileWorkerData)worker->data1;	184 TileWorkerData const tile_data = (TileWorkerData)worker->data1;

172 LFWorkerData *const lf_data = &tile_data->lfdata;	185 LFWorkerData *const lf_data = &tile_data->lfdata;

173	186

174 worker->hook = (VP9WorkerHook)loop_filter_row_worker;	187 worker->hook = (VP9WorkerHook)loop_filter_row_worker;

175	188

176 // Loopfilter data	189 // Loopfilter data

177 lf_data->frame_buffer = get_frame_new_buffer(cm);	190 lf_data->frame_buffer = get_frame_new_buffer(cm);

178 lf_data->cm = cm;	191 lf_data->cm = cm;

179 lf_data->xd = pbi->mb;	192 lf_data->xd = pbi->mb;

180 lf_data->start = i;	193 lf_data->start = i;

181 lf_data->stop = sb_rows;	194 lf_data->stop = sb_rows;

182 lf_data->y_only = y_only; // always do all planes in decoder	195 lf_data->y_only = y_only; // always do all planes in decoder

183	196

184 lf_data->lf_sync = &pbi->lf_row_sync;	197 lf_data->lf_sync = &pbi->lf_row_sync;

185 lf_data->num_lf_workers = pbi->num_tile_workers;	198 lf_data->num_lf_workers = num_workers;

186	199

187 // Start loopfiltering	200 // Start loopfiltering

188 if (i == pbi->num_tile_workers - 1) {	201 if (i == num_workers - 1) {

189 vp9_worker_execute(worker);	202 vp9_worker_execute(worker);

190 } else {	203 } else {

191 vp9_worker_launch(worker);	204 vp9_worker_launch(worker);

192 }	205 }

193 }	206 }

194	207

195 // Wait till all rows are finished	208 // Wait till all rows are finished

196 for (i = 0; i < pbi->num_tile_workers; ++i) {	209 for (i = 0; i < num_workers; ++i) {

197 vp9_worker_sync(&pbi->tile_workers[i]);	210 vp9_worker_sync(&pbi->tile_workers[i]);

198 }	211 }

199 }	212 }

200	213

201 // Set up nsync by width.	214 // Set up nsync by width.

202 static int get_sync_range(int width) {	215 static int get_sync_range(int width) {

203 // nsync numbers are picked by testing. For example, for 4k	216 // nsync numbers are picked by testing. For example, for 4k

204 // video, using 4 gives best performance.	217 // video, using 4 gives best performance.

205 if (width < 640)	218 if (width < 640)

206 return 1;	219 return 1;

(...skipping 56 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
263 vpx_memset(lf_sync, 0, sizeof(*lf_sync));	276 vpx_memset(lf_sync, 0, sizeof(*lf_sync));

264 }	277 }

265 #else	278 #else

266 (void)rows;	279 (void)rows;

267 if (lf_sync != NULL) {	280 if (lf_sync != NULL) {

268 vpx_free(lf_sync->cur_sb_col);	281 vpx_free(lf_sync->cur_sb_col);

269 vpx_memset(lf_sync, 0, sizeof(*lf_sync));	282 vpx_memset(lf_sync, 0, sizeof(*lf_sync));

270 }	283 }

271 #endif // CONFIG_MULTITHREAD	284 #endif // CONFIG_MULTITHREAD

272 }	285 }

OLD	NEW

« no previous file with comments | « source/libvpx/vp9/decoder/vp9_dthread.h ('k') | source/libvpx/vp9/decoder/vp9_onyxd.h » ('j') | no next file with comments »