| Index: source/libvpx/vp9/decoder/vp9_decodeframe.c
|
| ===================================================================
|
| --- source/libvpx/vp9/decoder/vp9_decodeframe.c (revision 271012)
|
| +++ source/libvpx/vp9/decoder/vp9_decodeframe.c (working copy)
|
| @@ -676,13 +676,13 @@
|
| }
|
|
|
| static void decode_tile(VP9Decoder *pbi, const TileInfo *const tile,
|
| - vp9_reader *r) {
|
| - const int num_threads = pbi->oxcf.max_threads;
|
| + int do_loopfilter_inline, vp9_reader *r) {
|
| + const int num_threads = pbi->max_threads;
|
| VP9_COMMON *const cm = &pbi->common;
|
| int mi_row, mi_col;
|
| MACROBLOCKD *xd = &pbi->mb;
|
|
|
| - if (pbi->do_loopfilter_inline) {
|
| + if (do_loopfilter_inline) {
|
| LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
|
| lf_data->frame_buffer = get_frame_new_buffer(cm);
|
| lf_data->cm = cm;
|
| @@ -702,7 +702,7 @@
|
| decode_partition(cm, xd, tile, mi_row, mi_col, r, BLOCK_64X64);
|
| }
|
|
|
| - if (pbi->do_loopfilter_inline) {
|
| + if (do_loopfilter_inline) {
|
| const int lf_start = mi_row - MI_BLOCK_SIZE;
|
| LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
|
|
|
| @@ -723,7 +723,7 @@
|
| }
|
| }
|
|
|
| - if (pbi->do_loopfilter_inline) {
|
| + if (do_loopfilter_inline) {
|
| LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
|
|
|
| vp9_worker_sync(&pbi->lf_worker);
|
| @@ -749,14 +749,20 @@
|
| cm->log2_tile_rows += vp9_rb_read_bit(rb);
|
| }
|
|
|
| +typedef struct TileBuffer {
|
| + const uint8_t *data;
|
| + size_t size;
|
| + int col; // only used with multi-threaded decoding
|
| +} TileBuffer;
|
| +
|
| // Reads the next tile returning its size and adjusting '*data' accordingly
|
| // based on 'is_last'.
|
| -static size_t get_tile(const uint8_t *const data_end,
|
| - int is_last,
|
| - struct vpx_internal_error_info *error_info,
|
| - const uint8_t **data,
|
| - vpx_decrypt_cb decrypt_cb,
|
| - void *decrypt_state) {
|
| +static void get_tile_buffer(const uint8_t *const data_end,
|
| + int is_last,
|
| + struct vpx_internal_error_info *error_info,
|
| + const uint8_t **data,
|
| + vpx_decrypt_cb decrypt_cb, void *decrypt_state,
|
| + TileBuffer *buf) {
|
| size_t size;
|
|
|
| if (!is_last) {
|
| @@ -779,18 +785,34 @@
|
| } else {
|
| size = data_end - *data;
|
| }
|
| - return size;
|
| +
|
| + buf->data = *data;
|
| + buf->size = size;
|
| +
|
| + *data += size;
|
| }
|
|
|
| -typedef struct TileBuffer {
|
| - const uint8_t *data;
|
| - size_t size;
|
| - int col; // only used with multi-threaded decoding
|
| -} TileBuffer;
|
| +static void get_tile_buffers(VP9Decoder *pbi,
|
| + const uint8_t *data, const uint8_t *data_end,
|
| + int tile_cols, int tile_rows,
|
| + TileBuffer (*tile_buffers)[1 << 6]) {
|
| + int r, c;
|
|
|
| + for (r = 0; r < tile_rows; ++r) {
|
| + for (c = 0; c < tile_cols; ++c) {
|
| + const int is_last = (r == tile_rows - 1) && (c == tile_cols - 1);
|
| + TileBuffer *const buf = &tile_buffers[r][c];
|
| + buf->col = c;
|
| + get_tile_buffer(data_end, is_last, &pbi->common.error, &data,
|
| + pbi->decrypt_cb, pbi->decrypt_state, buf);
|
| + }
|
| + }
|
| +}
|
| +
|
| static const uint8_t *decode_tiles(VP9Decoder *pbi,
|
| const uint8_t *data,
|
| - const uint8_t *data_end) {
|
| + const uint8_t *data_end,
|
| + int do_loopfilter_inline) {
|
| VP9_COMMON *const cm = &pbi->common;
|
| const int aligned_cols = mi_cols_aligned_to_sb(cm->mi_cols);
|
| const int tile_cols = 1 << cm->log2_tile_cols;
|
| @@ -811,25 +833,12 @@
|
| vpx_memset(cm->above_seg_context, 0,
|
| sizeof(*cm->above_seg_context) * aligned_cols);
|
|
|
| - // Load tile data into tile_buffers
|
| - for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
|
| - for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
|
| - const int last_tile = tile_row == tile_rows - 1 &&
|
| - tile_col == tile_cols - 1;
|
| - const size_t size = get_tile(data_end, last_tile, &cm->error, &data,
|
| - pbi->decrypt_cb, pbi->decrypt_state);
|
| - TileBuffer *const buf = &tile_buffers[tile_row][tile_col];
|
| - buf->data = data;
|
| - buf->size = size;
|
| - data += size;
|
| - }
|
| - }
|
| + get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, tile_buffers);
|
|
|
| // Decode tiles using data from tile_buffers
|
| for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
|
| for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
|
| - const int col = pbi->oxcf.inv_tile_order ? tile_cols - tile_col - 1
|
| - : tile_col;
|
| + const int col = pbi->inv_tile_order ? tile_cols - tile_col - 1 : tile_col;
|
| const int last_tile = tile_row == tile_rows - 1 &&
|
| col == tile_cols - 1;
|
| const TileBuffer *const buf = &tile_buffers[tile_row][col];
|
| @@ -838,7 +847,7 @@
|
| vp9_tile_init(&tile, cm, tile_row, col);
|
| setup_token_decoder(buf->data, data_end, buf->size, &cm->error, &r,
|
| pbi->decrypt_cb, pbi->decrypt_state);
|
| - decode_tile(pbi, &tile, &r);
|
| + decode_tile(pbi, &tile, do_loopfilter_inline, &r);
|
|
|
| if (last_tile)
|
| end = vp9_reader_find_end(&r);
|
| @@ -887,8 +896,8 @@
|
| const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
|
| const int tile_cols = 1 << cm->log2_tile_cols;
|
| const int tile_rows = 1 << cm->log2_tile_rows;
|
| - const int num_workers = MIN(pbi->oxcf.max_threads & ~1, tile_cols);
|
| - TileBuffer tile_buffers[1 << 6];
|
| + const int num_workers = MIN(pbi->max_threads & ~1, tile_cols);
|
| + TileBuffer tile_buffers[1][1 << 6];
|
| int n;
|
| int final_worker = -1;
|
|
|
| @@ -899,7 +908,7 @@
|
| // TODO(jzern): See if we can remove the restriction of passing in max
|
| // threads to the decoder.
|
| if (pbi->num_tile_workers == 0) {
|
| - const int num_threads = pbi->oxcf.max_threads & ~1;
|
| + const int num_threads = pbi->max_threads & ~1;
|
| int i;
|
| // TODO(jzern): Allocate one less worker, as in the current code we only
|
| // use num_threads - 1 workers.
|
| @@ -933,19 +942,11 @@
|
| sizeof(*cm->above_seg_context) * aligned_mi_cols);
|
|
|
| // Load tile data into tile_buffers
|
| - for (n = 0; n < tile_cols; ++n) {
|
| - const size_t size =
|
| - get_tile(data_end, n == tile_cols - 1, &cm->error, &data,
|
| - pbi->decrypt_cb, pbi->decrypt_state);
|
| - TileBuffer *const buf = &tile_buffers[n];
|
| - buf->data = data;
|
| - buf->size = size;
|
| - buf->col = n;
|
| - data += size;
|
| - }
|
| + get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, tile_buffers);
|
|
|
| // Sort the buffers based on size in descending order.
|
| - qsort(tile_buffers, tile_cols, sizeof(tile_buffers[0]), compare_tile_buffers);
|
| + qsort(tile_buffers[0], tile_cols, sizeof(tile_buffers[0][0]),
|
| + compare_tile_buffers);
|
|
|
| // Rearrange the tile buffers such that per-tile group the largest, and
|
| // presumably the most difficult, tile will be decoded in the main thread.
|
| @@ -954,11 +955,11 @@
|
| {
|
| int group_start = 0;
|
| while (group_start < tile_cols) {
|
| - const TileBuffer largest = tile_buffers[group_start];
|
| + const TileBuffer largest = tile_buffers[0][group_start];
|
| const int group_end = MIN(group_start + num_workers, tile_cols) - 1;
|
| - memmove(tile_buffers + group_start, tile_buffers + group_start + 1,
|
| - (group_end - group_start) * sizeof(tile_buffers[0]));
|
| - tile_buffers[group_end] = largest;
|
| + memmove(tile_buffers[0] + group_start, tile_buffers[0] + group_start + 1,
|
| + (group_end - group_start) * sizeof(tile_buffers[0][0]));
|
| + tile_buffers[0][group_end] = largest;
|
| group_start = group_end + 1;
|
| }
|
| }
|
| @@ -970,7 +971,7 @@
|
| VP9Worker *const worker = &pbi->tile_workers[i];
|
| TileWorkerData *const tile_data = (TileWorkerData*)worker->data1;
|
| TileInfo *const tile = (TileInfo*)worker->data2;
|
| - TileBuffer *const buf = &tile_buffers[n];
|
| + TileBuffer *const buf = &tile_buffers[0][n];
|
|
|
| tile_data->cm = cm;
|
| tile_data->xd = pbi->mb;
|
| @@ -1278,6 +1279,7 @@
|
| const uint8_t *data,
|
| const uint8_t *data_end,
|
| uint8_t *clear_data /* buffer size MAX_VP9_HEADER_SIZE */) {
|
| + vp9_zero(*rb);
|
| rb->bit_offset = 0;
|
| rb->error_handler = error_handler;
|
| rb->error_handler_data = &pbi->common;
|
| @@ -1298,7 +1300,7 @@
|
| const uint8_t **p_data_end) {
|
| VP9_COMMON *const cm = &pbi->common;
|
| MACROBLOCKD *const xd = &pbi->mb;
|
| - struct vp9_read_bit_buffer rb = { 0 };
|
| + struct vp9_read_bit_buffer rb;
|
| uint8_t clear_data[MAX_VP9_HEADER_SIZE];
|
| const size_t first_partition_size = read_uncompressed_header(pbi,
|
| init_read_bit_buffer(pbi, &rb, data, data_end, clear_data));
|
| @@ -1306,6 +1308,8 @@
|
| const int tile_rows = 1 << cm->log2_tile_rows;
|
| const int tile_cols = 1 << cm->log2_tile_cols;
|
| YV12_BUFFER_CONFIG *const new_fb = get_frame_new_buffer(cm);
|
| + const int do_loopfilter_inline = tile_rows == 1 && tile_cols == 1 &&
|
| + cm->lf.filter_level;
|
| xd->cur_buf = new_fb;
|
|
|
| if (!first_partition_size) {
|
| @@ -1322,18 +1326,6 @@
|
| vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
|
| "Truncated packet or corrupt header length");
|
|
|
| - pbi->do_loopfilter_inline =
|
| - (cm->log2_tile_rows | cm->log2_tile_cols) == 0 && cm->lf.filter_level;
|
| - if (pbi->do_loopfilter_inline && pbi->lf_worker.data1 == NULL) {
|
| - CHECK_MEM_ERROR(cm, pbi->lf_worker.data1,
|
| - vpx_memalign(32, sizeof(LFWorkerData)));
|
| - pbi->lf_worker.hook = (VP9WorkerHook)vp9_loop_filter_worker;
|
| - if (pbi->oxcf.max_threads > 1 && !vp9_worker_reset(&pbi->lf_worker)) {
|
| - vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
|
| - "Loop filter thread creation failed");
|
| - }
|
| - }
|
| -
|
| init_macroblockd(cm, &pbi->mb);
|
|
|
| if (cm->coding_use_prev_mi)
|
| @@ -1353,11 +1345,26 @@
|
|
|
| // TODO(jzern): remove frame_parallel_decoding_mode restriction for
|
| // single-frame tile decoding.
|
| - if (pbi->oxcf.max_threads > 1 && tile_rows == 1 && tile_cols > 1 &&
|
| + if (pbi->max_threads > 1 && tile_rows == 1 && tile_cols > 1 &&
|
| cm->frame_parallel_decoding_mode) {
|
| *p_data_end = decode_tiles_mt(pbi, data + first_partition_size, data_end);
|
| + // If multiple threads are used to decode tiles, then we use those threads
|
| + // to do parallel loopfiltering.
|
| + vp9_loop_filter_frame_mt(new_fb, pbi, cm, cm->lf.filter_level, 0);
|
| } else {
|
| - *p_data_end = decode_tiles(pbi, data + first_partition_size, data_end);
|
| + if (do_loopfilter_inline && pbi->lf_worker.data1 == NULL) {
|
| + CHECK_MEM_ERROR(cm, pbi->lf_worker.data1,
|
| + vpx_memalign(32, sizeof(LFWorkerData)));
|
| + pbi->lf_worker.hook = (VP9WorkerHook)vp9_loop_filter_worker;
|
| + if (pbi->max_threads > 1 && !vp9_worker_reset(&pbi->lf_worker)) {
|
| + vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
|
| + "Loop filter thread creation failed");
|
| + }
|
| + }
|
| + *p_data_end = decode_tiles(pbi, data + first_partition_size, data_end,
|
| + do_loopfilter_inline);
|
| + if (!do_loopfilter_inline)
|
| + vp9_loop_filter_frame(new_fb, cm, &pbi->mb, cm->lf.filter_level, 0, 0);
|
| }
|
|
|
| new_fb->corrupted |= xd->corrupted;
|
| @@ -1370,16 +1377,17 @@
|
| "A stream must start with a complete key frame");
|
| }
|
|
|
| - if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode &&
|
| - !new_fb->corrupted) {
|
| - vp9_adapt_coef_probs(cm);
|
| + if (!new_fb->corrupted) {
|
| + if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode) {
|
| + vp9_adapt_coef_probs(cm);
|
|
|
| - if (!frame_is_intra_only(cm)) {
|
| - vp9_adapt_mode_probs(cm);
|
| - vp9_adapt_mv_probs(cm, cm->allow_high_precision_mv);
|
| + if (!frame_is_intra_only(cm)) {
|
| + vp9_adapt_mode_probs(cm);
|
| + vp9_adapt_mv_probs(cm, cm->allow_high_precision_mv);
|
| + }
|
| + } else {
|
| + debug_check_frame_counts(cm);
|
| }
|
| - } else {
|
| - debug_check_frame_counts(cm);
|
| }
|
|
|
| if (cm->refresh_frame_context)
|
|
|