OLD | NEW |
(Empty) | |
| 1 /* |
| 2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. |
| 3 * |
| 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ |
| 10 |
| 11 #include "vp9/encoder/vp9_encodeframe.h" |
| 12 #include "vp9/encoder/vp9_encoder.h" |
| 13 #include "vp9/encoder/vp9_ethread.h" |
| 14 |
| 15 static void accumulate_frame_counts(VP9_COMMON *cm, ThreadData *td) { |
| 16 int i, j, k, l, m; |
| 17 |
| 18 for (i = 0; i < BLOCK_SIZE_GROUPS; i++) |
| 19 for (j = 0; j < INTRA_MODES; j++) |
| 20 cm->counts.y_mode[i][j] += td->counts->y_mode[i][j]; |
| 21 |
| 22 for (i = 0; i < INTRA_MODES; i++) |
| 23 for (j = 0; j < INTRA_MODES; j++) |
| 24 cm->counts.uv_mode[i][j] += td->counts->uv_mode[i][j]; |
| 25 |
| 26 for (i = 0; i < PARTITION_CONTEXTS; i++) |
| 27 for (j = 0; j < PARTITION_TYPES; j++) |
| 28 cm->counts.partition[i][j] += td->counts->partition[i][j]; |
| 29 |
| 30 for (i = 0; i < TX_SIZES; i++) |
| 31 for (j = 0; j < PLANE_TYPES; j++) |
| 32 for (k = 0; k < REF_TYPES; k++) |
| 33 for (l = 0; l < COEF_BANDS; l++) |
| 34 for (m = 0; m < COEFF_CONTEXTS; m++) |
| 35 cm->counts.eob_branch[i][j][k][l][m] += |
| 36 td->counts->eob_branch[i][j][k][l][m]; |
| 37 // cm->counts.coef is only updated at frame level, so not need |
| 38 // to accumulate it here. |
| 39 // for (n = 0; n < UNCONSTRAINED_NODES + 1; n++) |
| 40 // cm->counts.coef[i][j][k][l][m][n] += |
| 41 // td->counts->coef[i][j][k][l][m][n]; |
| 42 |
| 43 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) |
| 44 for (j = 0; j < SWITCHABLE_FILTERS; j++) |
| 45 cm->counts.switchable_interp[i][j] += td->counts->switchable_interp[i][j]; |
| 46 |
| 47 for (i = 0; i < INTER_MODE_CONTEXTS; i++) |
| 48 for (j = 0; j < INTER_MODES; j++) |
| 49 cm->counts.inter_mode[i][j] += td->counts->inter_mode[i][j]; |
| 50 |
| 51 for (i = 0; i < INTRA_INTER_CONTEXTS; i++) |
| 52 for (j = 0; j < 2; j++) |
| 53 cm->counts.intra_inter[i][j] += td->counts->intra_inter[i][j]; |
| 54 |
| 55 for (i = 0; i < COMP_INTER_CONTEXTS; i++) |
| 56 for (j = 0; j < 2; j++) |
| 57 cm->counts.comp_inter[i][j] += td->counts->comp_inter[i][j]; |
| 58 |
| 59 for (i = 0; i < REF_CONTEXTS; i++) |
| 60 for (j = 0; j < 2; j++) |
| 61 for (k = 0; k < 2; k++) |
| 62 cm->counts.single_ref[i][j][k] += td->counts->single_ref[i][j][k]; |
| 63 |
| 64 for (i = 0; i < REF_CONTEXTS; i++) |
| 65 for (j = 0; j < 2; j++) |
| 66 cm->counts.comp_ref[i][j] += td->counts->comp_ref[i][j]; |
| 67 |
| 68 for (i = 0; i < TX_SIZE_CONTEXTS; i++) { |
| 69 for (j = 0; j < TX_SIZES; j++) |
| 70 cm->counts.tx.p32x32[i][j] += td->counts->tx.p32x32[i][j]; |
| 71 |
| 72 for (j = 0; j < TX_SIZES - 1; j++) |
| 73 cm->counts.tx.p16x16[i][j] += td->counts->tx.p16x16[i][j]; |
| 74 |
| 75 for (j = 0; j < TX_SIZES - 2; j++) |
| 76 cm->counts.tx.p8x8[i][j] += td->counts->tx.p8x8[i][j]; |
| 77 } |
| 78 |
| 79 for (i = 0; i < SKIP_CONTEXTS; i++) |
| 80 for (j = 0; j < 2; j++) |
| 81 cm->counts.skip[i][j] += td->counts->skip[i][j]; |
| 82 |
| 83 for (i = 0; i < MV_JOINTS; i++) |
| 84 cm->counts.mv.joints[i] += td->counts->mv.joints[i]; |
| 85 |
| 86 for (k = 0; k < 2; k++) { |
| 87 nmv_component_counts *comps = &cm->counts.mv.comps[k]; |
| 88 nmv_component_counts *comps_t = &td->counts->mv.comps[k]; |
| 89 |
| 90 for (i = 0; i < 2; i++) { |
| 91 comps->sign[i] += comps_t->sign[i]; |
| 92 comps->class0_hp[i] += comps_t->class0_hp[i]; |
| 93 comps->hp[i] += comps_t->hp[i]; |
| 94 } |
| 95 |
| 96 for (i = 0; i < MV_CLASSES; i++) |
| 97 comps->classes[i] += comps_t->classes[i]; |
| 98 |
| 99 for (i = 0; i < CLASS0_SIZE; i++) { |
| 100 comps->class0[i] += comps_t->class0[i]; |
| 101 for (j = 0; j < MV_FP_SIZE; j++) |
| 102 comps->class0_fp[i][j] += comps_t->class0_fp[i][j]; |
| 103 } |
| 104 |
| 105 for (i = 0; i < MV_OFFSET_BITS; i++) |
| 106 for (j = 0; j < 2; j++) |
| 107 comps->bits[i][j] += comps_t->bits[i][j]; |
| 108 |
| 109 for (i = 0; i < MV_FP_SIZE; i++) |
| 110 comps->fp[i] += comps_t->fp[i]; |
| 111 } |
| 112 } |
| 113 |
| 114 static void accumulate_rd_opt(ThreadData *td, ThreadData *td_t) { |
| 115 int i, j, k, l, m, n; |
| 116 |
| 117 for (i = 0; i < REFERENCE_MODES; i++) |
| 118 td->rd_counts.comp_pred_diff[i] += td_t->rd_counts.comp_pred_diff[i]; |
| 119 |
| 120 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) |
| 121 td->rd_counts.filter_diff[i] += td_t->rd_counts.filter_diff[i]; |
| 122 |
| 123 for (i = 0; i < TX_MODES; i++) |
| 124 td->rd_counts.tx_select_diff[i] += td_t->rd_counts.tx_select_diff[i]; |
| 125 |
| 126 for (i = 0; i < TX_SIZES; i++) |
| 127 for (j = 0; j < PLANE_TYPES; j++) |
| 128 for (k = 0; k < REF_TYPES; k++) |
| 129 for (l = 0; l < COEF_BANDS; l++) |
| 130 for (m = 0; m < COEFF_CONTEXTS; m++) |
| 131 for (n = 0; n < ENTROPY_TOKENS; n++) |
| 132 td->rd_counts.coef_counts[i][j][k][l][m][n] += |
| 133 td_t->rd_counts.coef_counts[i][j][k][l][m][n]; |
| 134 } |
| 135 |
| 136 static int enc_worker_hook(EncWorkerData *const thread_data, void *unused) { |
| 137 VP9_COMP *const cpi = thread_data->cpi; |
| 138 const VP9_COMMON *const cm = &cpi->common; |
| 139 const int tile_cols = 1 << cm->log2_tile_cols; |
| 140 const int tile_rows = 1 << cm->log2_tile_rows; |
| 141 int t; |
| 142 |
| 143 (void) unused; |
| 144 |
| 145 for (t = thread_data->start; t < tile_rows * tile_cols; |
| 146 t += cpi->num_workers) { |
| 147 int tile_row = t / tile_cols; |
| 148 int tile_col = t % tile_cols; |
| 149 |
| 150 vp9_encode_tile(cpi, thread_data->td, tile_row, tile_col); |
| 151 } |
| 152 |
| 153 return 0; |
| 154 } |
| 155 |
| 156 void vp9_encode_tiles_mt(VP9_COMP *cpi) { |
| 157 VP9_COMMON *const cm = &cpi->common; |
| 158 const int tile_cols = 1 << cm->log2_tile_cols; |
| 159 const VP9WorkerInterface *const winterface = vp9_get_worker_interface(); |
| 160 const int num_workers = MIN(cpi->oxcf.max_threads, tile_cols); |
| 161 int i; |
| 162 |
| 163 vp9_init_tile_data(cpi); |
| 164 |
| 165 // Only run once to create threads and allocate thread data. |
| 166 if (cpi->num_workers == 0) { |
| 167 CHECK_MEM_ERROR(cm, cpi->workers, |
| 168 vpx_malloc(num_workers * sizeof(*cpi->workers))); |
| 169 |
| 170 for (i = 0; i < num_workers; i++) { |
| 171 VP9Worker *const worker = &cpi->workers[i]; |
| 172 EncWorkerData *thread_data; |
| 173 |
| 174 ++cpi->num_workers; |
| 175 |
| 176 winterface->init(worker); |
| 177 CHECK_MEM_ERROR(cm, worker->data1, |
| 178 (EncWorkerData*)vpx_calloc(1, sizeof(EncWorkerData))); |
| 179 thread_data = (EncWorkerData*)worker->data1; |
| 180 |
| 181 if (i < num_workers - 1) { |
| 182 thread_data->cpi = cpi; |
| 183 |
| 184 // Allocate thread data. |
| 185 CHECK_MEM_ERROR(cm, thread_data->td, |
| 186 vpx_calloc(1, sizeof(*thread_data->td))); |
| 187 // Set up pc_tree. |
| 188 thread_data->td->leaf_tree = NULL; |
| 189 thread_data->td->pc_tree = NULL; |
| 190 vp9_setup_pc_tree(cm, thread_data->td); |
| 191 |
| 192 // Allocate frame counters in thread data. |
| 193 CHECK_MEM_ERROR(cm, thread_data->td->counts, |
| 194 vpx_calloc(1, sizeof(*thread_data->td->counts))); |
| 195 |
| 196 // Create threads |
| 197 if (!winterface->reset(worker)) |
| 198 vpx_internal_error(&cm->error, VPX_CODEC_ERROR, |
| 199 "Tile encoder thread creation failed"); |
| 200 } else { |
| 201 // Main thread acts as a worker and uses the thread data in cpi. |
| 202 thread_data->cpi = cpi; |
| 203 thread_data->td = &cpi->td; |
| 204 } |
| 205 |
| 206 // data2 is unused. |
| 207 worker->data2 = NULL; |
| 208 |
| 209 winterface->sync(worker); |
| 210 worker->hook = (VP9WorkerHook)enc_worker_hook; |
| 211 } |
| 212 } |
| 213 |
| 214 for (i = 0; i < num_workers; i++) { |
| 215 VP9Worker *const worker = &cpi->workers[i]; |
| 216 EncWorkerData *const thread_data = (EncWorkerData*)worker->data1; |
| 217 |
| 218 // Before encoding a frame, copy the thread data from cpi. |
| 219 thread_data->td->mb = cpi->td.mb; |
| 220 thread_data->td->rd_counts = cpi->td.rd_counts; |
| 221 vpx_memcpy(thread_data->td->counts, &cpi->common.counts, |
| 222 sizeof(cpi->common.counts)); |
| 223 |
| 224 // Handle use_nonrd_pick_mode case. |
| 225 if (cpi->sf.use_nonrd_pick_mode) { |
| 226 MACROBLOCK *const x = &thread_data->td->mb; |
| 227 MACROBLOCKD *const xd = &x->e_mbd; |
| 228 struct macroblock_plane *const p = x->plane; |
| 229 struct macroblockd_plane *const pd = xd->plane; |
| 230 PICK_MODE_CONTEXT *ctx = &thread_data->td->pc_root->none; |
| 231 int j; |
| 232 |
| 233 for (j = 0; j < MAX_MB_PLANE; ++j) { |
| 234 p[j].coeff = ctx->coeff_pbuf[j][0]; |
| 235 p[j].qcoeff = ctx->qcoeff_pbuf[j][0]; |
| 236 pd[j].dqcoeff = ctx->dqcoeff_pbuf[j][0]; |
| 237 p[j].eobs = ctx->eobs_pbuf[j][0]; |
| 238 } |
| 239 } |
| 240 } |
| 241 |
| 242 // Encode a frame |
| 243 for (i = 0; i < num_workers; i++) { |
| 244 VP9Worker *const worker = &cpi->workers[i]; |
| 245 EncWorkerData *const thread_data = (EncWorkerData*)worker->data1; |
| 246 |
| 247 // Set the starting tile for each thread. |
| 248 thread_data->start = i; |
| 249 |
| 250 if (i == num_workers - 1) |
| 251 winterface->execute(worker); |
| 252 else |
| 253 winterface->launch(worker); |
| 254 } |
| 255 |
| 256 // Encoding ends. |
| 257 for (i = 0; i < num_workers; i++) { |
| 258 VP9Worker *const worker = &cpi->workers[i]; |
| 259 winterface->sync(worker); |
| 260 } |
| 261 |
| 262 for (i = 0; i < num_workers; i++) { |
| 263 VP9Worker *const worker = &cpi->workers[i]; |
| 264 EncWorkerData *const thread_data = (EncWorkerData*)worker->data1; |
| 265 |
| 266 // Accumulate counters. |
| 267 if (i < num_workers - 1) { |
| 268 accumulate_frame_counts(&cpi->common, thread_data->td); |
| 269 accumulate_rd_opt(&cpi->td, thread_data->td); |
| 270 } |
| 271 } |
| 272 } |
OLD | NEW |