source/libvpx/vp9/decoder/vp9_dthread.c - Issue 756673003: libvpx: Pull from upstream

Unified Diff: source/libvpx/vp9/decoder/vp9_dthread.c

Issue 756673003: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/

Patch Set: Created 6 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: source/libvpx/vp9/decoder/vp9_dthread.c

===================================================================

--- source/libvpx/vp9/decoder/vp9_dthread.c (revision 293081)

+++ source/libvpx/vp9/decoder/vp9_dthread.c (working copy)

@@ -92,12 +92,12 @@

VP9_COMMON *const cm,

struct macroblockd_plane planes[MAX_MB_PLANE],

int start, int stop, int y_only,

- VP9LfSync *const lf_sync, int num_lf_workers) {

+ VP9LfSync *const lf_sync) {

const int num_planes = y_only ? 1 : MAX_MB_PLANE;

int r, c; // SB row and col

const int sb_cols = mi_cols_aligned_to_sb(cm->mi_cols) >> MI_BLOCK_SIZE_LOG2;

- for (r = start; r < stop; r += num_lf_workers) {

+ for (r = start; r < stop; r += lf_sync->num_workers) {

const int mi_row = r << MI_BLOCK_SIZE_LOG2;

MODE_INFO *const mi = cm->mi + mi_row * cm->mi_stride;

@@ -121,35 +121,35 @@

}

// Row-based multi-threaded loopfilter hook

-static int loop_filter_row_worker(TileWorkerData *const tile_data,

- void *unused) {

- LFWorkerData *const lf_data = &tile_data->lfdata;

- (void)unused;

+static int loop_filter_row_worker(VP9LfSync *const lf_sync,

+ LFWorkerData *const lf_data) {

loop_filter_rows_mt(lf_data->frame_buffer, lf_data->cm, lf_data->planes,

- lf_data->start, lf_data->stop, lf_data->y_only,

- lf_data->lf_sync, lf_data->num_lf_workers);

+ lf_data->start, lf_data->stop, lf_data->y_only, lf_sync);

return 1;

}

// VP9 decoder: Implement multi-threaded loopfilter that uses the tile

// threads.

-void vp9_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame,

- VP9Decoder *pbi, VP9_COMMON *cm,

+void vp9_loop_filter_frame_mt(VP9LfSync *lf_sync,

+ YV12_BUFFER_CONFIG *frame,

+ struct macroblockd_plane planes[MAX_MB_PLANE],

+ VP9_COMMON *cm,

+ VP9Worker *workers, int nworkers,

int frame_filter_level,

int y_only) {

- VP9LfSync *const lf_sync = &pbi->lf_row_sync;

const VP9WorkerInterface *const winterface = vp9_get_worker_interface();

// Number of superblock rows and cols

const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;

const int tile_cols = 1 << cm->log2_tile_cols;

- const int num_workers = MIN(pbi->max_threads & ~1, tile_cols);

+ const int num_workers = MIN(nworkers, tile_cols);

int i;

if (!frame_filter_level) return;

- if (!lf_sync->sync_range || cm->last_height != cm->height) {

+ if (!lf_sync->sync_range || cm->last_height != cm->height ||

+ num_workers > lf_sync->num_workers) {

vp9_loop_filter_dealloc(lf_sync);

- vp9_loop_filter_alloc(lf_sync, cm, sb_rows, cm->width);

+ vp9_loop_filter_alloc(lf_sync, cm, sb_rows, cm->width, num_workers);

}

vp9_loop_filter_frame_init(cm, frame_filter_level);

@@ -158,33 +158,27 @@

vpx_memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows);

// Set up loopfilter thread data.

- // The decoder is using num_workers instead of pbi->num_tile_workers

- // because it has been observed that using more threads on the

- // loopfilter, than there are tile columns in the frame will hurt

- // performance on Android. This is because the system will only

- // schedule the tile decode workers on cores equal to the number

- // of tile columns. Then if the decoder tries to use more threads for the

- // loopfilter, it will hurt performance because of contention. If the

- // multithreading code changes in the future then the number of workers

- // used by the loopfilter should be revisited.

+ // The decoder is capping num_workers because it has been observed that using

+ // more threads on the loopfilter than there are cores will hurt performance

+ // on Android. This is because the system will only schedule the tile decode

+ // workers on cores equal to the number of tile columns. Then if the decoder

+ // tries to use more threads for the loopfilter, it will hurt performance

+ // because of contention. If the multithreading code changes in the future

+ // then the number of workers used by the loopfilter should be revisited.

for (i = 0; i < num_workers; ++i) {

- VP9Worker *const worker = &pbi->tile_workers[i];

- TileWorkerData *const tile_data = (TileWorkerData*)worker->data1;

- LFWorkerData *const lf_data = &tile_data->lfdata;

+ VP9Worker *const worker = &workers[i];

+ LFWorkerData *const lf_data = &lf_sync->lfdata[i];

worker->hook = (VP9WorkerHook)loop_filter_row_worker;

+ worker->data1 = lf_sync;

+ worker->data2 = lf_data;

// Loopfilter data

- lf_data->frame_buffer = frame;

- lf_data->cm = cm;

- vp9_copy(lf_data->planes, pbi->mb.plane);

+ vp9_loop_filter_data_reset(lf_data, frame, cm, planes);

lf_data->start = i;

lf_data->stop = sb_rows;

- lf_data->y_only = y_only; // always do all planes in decoder

+ lf_data->y_only = y_only;

- lf_data->lf_sync = lf_sync;

- lf_data->num_lf_workers = num_workers;

// Start loopfiltering

if (i == num_workers - 1) {

winterface->execute(worker);

@@ -195,7 +189,7 @@

// Wait till all rows are finished

for (i = 0; i < num_workers; ++i) {

- winterface->sync(&pbi->tile_workers[i]);

+ winterface->sync(&workers[i]);

}

@@ -215,7 +209,7 @@

// Allocate memory for lf row synchronization

void vp9_loop_filter_alloc(VP9LfSync *lf_sync, VP9_COMMON *cm, int rows,

- int width) {

+ int width, int num_workers) {

lf_sync->rows = rows;

#if CONFIG_MULTITHREAD

{

@@ -239,6 +233,10 @@

}

#endif // CONFIG_MULTITHREAD

+ CHECK_MEM_ERROR(cm, lf_sync->lfdata,

+ vpx_malloc(num_workers * sizeof(*lf_sync->lfdata)));

+ lf_sync->num_workers = num_workers;

CHECK_MEM_ERROR(cm, lf_sync->cur_sb_col,

vpx_malloc(sizeof(*lf_sync->cur_sb_col) * rows));

@@ -265,6 +263,7 @@

vpx_free(lf_sync->cond_);

}

#endif // CONFIG_MULTITHREAD

+ vpx_free(lf_sync->lfdata);

vpx_free(lf_sync->cur_sb_col);

// clear the structure as the source of this call may be a resize in which

// case this call will be followed by an _alloc() which may fail.

« no previous file with comments | « source/libvpx/vp9/decoder/vp9_dthread.h ('k') | source/libvpx/vp9/encoder/vp9_aq_complexity.h » ('j') | no next file with comments »