| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| 11 | 11 |
| 12 #include "./vp9_rtcd.h" | 12 #include "./vp9_rtcd.h" |
| 13 #include "./vpx_config.h" | 13 #include "./vpx_config.h" |
| 14 | 14 |
| 15 #include "vpx_mem/vpx_mem.h" | 15 #include "vpx_mem/vpx_mem.h" |
| 16 | 16 |
| 17 #include "vp9/common/vp9_idct.h" | 17 #include "vp9/common/vp9_idct.h" |
| 18 #include "vp9/common/vp9_reconinter.h" | 18 #include "vp9/common/vp9_reconinter.h" |
| 19 #include "vp9/common/vp9_reconintra.h" | 19 #include "vp9/common/vp9_reconintra.h" |
| 20 #include "vp9/common/vp9_systemdependent.h" | 20 #include "vp9/common/vp9_systemdependent.h" |
| 21 | 21 |
| 22 #include "vp9/encoder/vp9_dct.h" | 22 #include "vp9/encoder/vp9_dct.h" |
| 23 #include "vp9/encoder/vp9_encodemb.h" | 23 #include "vp9/encoder/vp9_encodemb.h" |
| 24 #include "vp9/encoder/vp9_quantize.h" | 24 #include "vp9/encoder/vp9_quantize.h" |
| 25 #include "vp9/encoder/vp9_rdopt.h" | 25 #include "vp9/encoder/vp9_rdopt.h" |
| 26 #include "vp9/encoder/vp9_tokenize.h" | 26 #include "vp9/encoder/vp9_tokenize.h" |
| 27 | 27 |
| 28 void vp9_setup_interp_filters(MACROBLOCKD *xd, |
| 29 INTERPOLATION_TYPE mcomp_filter_type, |
| 30 VP9_COMMON *cm) { |
| 31 if (xd->mi_8x8 && xd->mi_8x8[0]) { |
| 32 MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; |
| 33 |
| 34 set_scale_factors(xd, mbmi->ref_frame[0] - LAST_FRAME, |
| 35 mbmi->ref_frame[1] - LAST_FRAME, |
| 36 cm->active_ref_scale); |
| 37 } else { |
| 38 set_scale_factors(xd, -1, -1, cm->active_ref_scale); |
| 39 } |
| 40 |
| 41 xd->subpix.filter_x = xd->subpix.filter_y = |
| 42 vp9_get_filter_kernel(mcomp_filter_type == SWITCHABLE ? |
| 43 EIGHTTAP : mcomp_filter_type); |
| 44 |
| 45 assert(((intptr_t)xd->subpix.filter_x & 0xff) == 0); |
| 46 } |
| 47 |
| 28 void vp9_subtract_block_c(int rows, int cols, | 48 void vp9_subtract_block_c(int rows, int cols, |
| 29 int16_t *diff_ptr, ptrdiff_t diff_stride, | 49 int16_t *diff_ptr, ptrdiff_t diff_stride, |
| 30 const uint8_t *src_ptr, ptrdiff_t src_stride, | 50 const uint8_t *src_ptr, ptrdiff_t src_stride, |
| 31 const uint8_t *pred_ptr, ptrdiff_t pred_stride) { | 51 const uint8_t *pred_ptr, ptrdiff_t pred_stride) { |
| 32 int r, c; | 52 int r, c; |
| 33 | 53 |
| 34 for (r = 0; r < rows; r++) { | 54 for (r = 0; r < rows; r++) { |
| 35 for (c = 0; c < cols; c++) | 55 for (c = 0; c < cols; c++) |
| 36 diff_ptr[c] = src_ptr[c] - pred_ptr[c]; | 56 diff_ptr[c] = src_ptr[c] - pred_ptr[c]; |
| 37 | 57 |
| 38 diff_ptr += diff_stride; | 58 diff_ptr += diff_stride; |
| 39 pred_ptr += pred_stride; | 59 pred_ptr += pred_stride; |
| 40 src_ptr += src_stride; | 60 src_ptr += src_stride; |
| 41 } | 61 } |
| 42 } | 62 } |
| 43 | 63 |
| 44 static void subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) { | 64 static void subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) { |
| 45 struct macroblock_plane *const p = &x->plane[plane]; | 65 struct macroblock_plane *const p = &x->plane[plane]; |
| 46 const MACROBLOCKD *const xd = &x->e_mbd; | 66 const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane]; |
| 47 const struct macroblockd_plane *const pd = &xd->plane[plane]; | 67 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); |
| 48 const int bw = plane_block_width(bsize, pd); | 68 const int bw = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; |
| 49 const int bh = plane_block_height(bsize, pd); | 69 const int bh = 4 * num_4x4_blocks_high_lookup[plane_bsize]; |
| 50 | 70 |
| 51 vp9_subtract_block(bh, bw, p->src_diff, bw, | 71 vp9_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride, |
| 52 p->src.buf, p->src.stride, | |
| 53 pd->dst.buf, pd->dst.stride); | 72 pd->dst.buf, pd->dst.stride); |
| 54 } | 73 } |
| 55 | 74 |
| 56 void vp9_subtract_sby(MACROBLOCK *x, BLOCK_SIZE bsize) { | 75 void vp9_subtract_sby(MACROBLOCK *x, BLOCK_SIZE bsize) { |
| 57 subtract_plane(x, bsize, 0); | 76 subtract_plane(x, bsize, 0); |
| 58 } | 77 } |
| 59 | 78 |
| 60 void vp9_subtract_sbuv(MACROBLOCK *x, BLOCK_SIZE bsize) { | 79 void vp9_subtract_sbuv(MACROBLOCK *x, BLOCK_SIZE bsize) { |
| 61 int i; | 80 int i; |
| 62 | 81 |
| (...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 110 pt = get_coef_context(nb, token_cache, idx + 1); | 129 pt = get_coef_context(nb, token_cache, idx + 1); |
| 111 token_cache[scan[idx]] = bak; | 130 token_cache[scan[idx]] = bak; |
| 112 return pt; | 131 return pt; |
| 113 } | 132 } |
| 114 | 133 |
| 115 static void optimize_b(MACROBLOCK *mb, | 134 static void optimize_b(MACROBLOCK *mb, |
| 116 int plane, int block, BLOCK_SIZE plane_bsize, | 135 int plane, int block, BLOCK_SIZE plane_bsize, |
| 117 ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, | 136 ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, |
| 118 TX_SIZE tx_size) { | 137 TX_SIZE tx_size) { |
| 119 MACROBLOCKD *const xd = &mb->e_mbd; | 138 MACROBLOCKD *const xd = &mb->e_mbd; |
| 139 struct macroblock_plane *p = &mb->plane[plane]; |
| 120 struct macroblockd_plane *pd = &xd->plane[plane]; | 140 struct macroblockd_plane *pd = &xd->plane[plane]; |
| 121 const int ref = is_inter_block(&xd->mi_8x8[0]->mbmi); | 141 const int ref = is_inter_block(&xd->mi_8x8[0]->mbmi); |
| 122 vp9_token_state tokens[1025][2]; | 142 vp9_token_state tokens[1025][2]; |
| 123 unsigned best_index[1025][2]; | 143 unsigned best_index[1025][2]; |
| 124 const int16_t *coeff_ptr = BLOCK_OFFSET(mb->plane[plane].coeff, block); | 144 const int16_t *coeff_ptr = BLOCK_OFFSET(mb->plane[plane].coeff, block); |
| 125 int16_t *qcoeff_ptr; | 145 int16_t *qcoeff_ptr; |
| 126 int16_t *dqcoeff_ptr; | 146 int16_t *dqcoeff_ptr; |
| 127 int eob = pd->eobs[block], final_eob, sz = 0; | 147 int eob = p->eobs[block], final_eob, sz = 0; |
| 128 const int i0 = 0; | 148 const int i0 = 0; |
| 129 int rc, x, next, i; | 149 int rc, x, next, i; |
| 130 int64_t rdmult, rddiv, rd_cost0, rd_cost1; | 150 int64_t rdmult, rddiv, rd_cost0, rd_cost1; |
| 131 int rate0, rate1, error0, error1, t0, t1; | 151 int rate0, rate1, error0, error1, t0, t1; |
| 132 int best, band, pt; | 152 int best, band, pt; |
| 133 PLANE_TYPE type = pd->plane_type; | 153 PLANE_TYPE type = pd->plane_type; |
| 134 int err_mult = plane_rd_mult[type]; | 154 int err_mult = plane_rd_mult[type]; |
| 135 const int default_eob = 16 << (tx_size << 1); | 155 const int default_eob = 16 << (tx_size << 1); |
| 136 const int16_t *scan, *nb; | 156 |
| 137 const int mul = 1 + (tx_size == TX_32X32); | 157 const int mul = 1 + (tx_size == TX_32X32); |
| 138 uint8_t token_cache[1024]; | 158 uint8_t token_cache[1024]; |
| 139 const int ib = txfrm_block_to_raster_block(plane_bsize, tx_size, block); | |
| 140 const int16_t *dequant_ptr = pd->dequant; | 159 const int16_t *dequant_ptr = pd->dequant; |
| 141 const uint8_t *const band_translate = get_band_translate(tx_size); | 160 const uint8_t *const band_translate = get_band_translate(tx_size); |
| 161 const scan_order *so = get_scan(xd, tx_size, type, block); |
| 162 const int16_t *scan = so->scan; |
| 163 const int16_t *nb = so->neighbors; |
| 142 | 164 |
| 143 assert((!type && !plane) || (type && plane)); | 165 assert((!type && !plane) || (type && plane)); |
| 144 dqcoeff_ptr = BLOCK_OFFSET(pd->dqcoeff, block); | 166 dqcoeff_ptr = BLOCK_OFFSET(pd->dqcoeff, block); |
| 145 qcoeff_ptr = BLOCK_OFFSET(pd->qcoeff, block); | 167 qcoeff_ptr = BLOCK_OFFSET(p->qcoeff, block); |
| 146 get_scan(xd, tx_size, type, ib, &scan, &nb); | |
| 147 assert(eob <= default_eob); | 168 assert(eob <= default_eob); |
| 148 | 169 |
| 149 /* Now set up a Viterbi trellis to evaluate alternative roundings. */ | 170 /* Now set up a Viterbi trellis to evaluate alternative roundings. */ |
| 150 rdmult = mb->rdmult * err_mult; | 171 rdmult = mb->rdmult * err_mult; |
| 151 if (mb->e_mbd.mi_8x8[0]->mbmi.ref_frame[0] == INTRA_FRAME) | 172 if (!is_inter_block(&mb->e_mbd.mi_8x8[0]->mbmi)) |
| 152 rdmult = (rdmult * 9) >> 4; | 173 rdmult = (rdmult * 9) >> 4; |
| 153 rddiv = mb->rddiv; | 174 rddiv = mb->rddiv; |
| 154 /* Initialize the sentinel node of the trellis. */ | 175 /* Initialize the sentinel node of the trellis. */ |
| 155 tokens[eob][0].rate = 0; | 176 tokens[eob][0].rate = 0; |
| 156 tokens[eob][0].error = 0; | 177 tokens[eob][0].error = 0; |
| 157 tokens[eob][0].next = default_eob; | 178 tokens[eob][0].next = default_eob; |
| 158 tokens[eob][0].token = DCT_EOB_TOKEN; | 179 tokens[eob][0].token = EOB_TOKEN; |
| 159 tokens[eob][0].qc = 0; | 180 tokens[eob][0].qc = 0; |
| 160 *(tokens[eob] + 1) = *(tokens[eob] + 0); | 181 *(tokens[eob] + 1) = *(tokens[eob] + 0); |
| 161 next = eob; | 182 next = eob; |
| 162 for (i = 0; i < eob; i++) | 183 for (i = 0; i < eob; i++) |
| 163 token_cache[scan[i]] = vp9_pt_energy_class[vp9_dct_value_tokens_ptr[ | 184 token_cache[scan[i]] = vp9_pt_energy_class[vp9_dct_value_tokens_ptr[ |
| 164 qcoeff_ptr[scan[i]]].token]; | 185 qcoeff_ptr[scan[i]]].token]; |
| 165 | 186 |
| 166 for (i = eob; i-- > i0;) { | 187 for (i = eob; i-- > i0;) { |
| 167 int base_bits, d2, dx; | 188 int base_bits, d2, dx; |
| 168 | 189 |
| 169 rc = scan[i]; | 190 rc = scan[i]; |
| 170 x = qcoeff_ptr[rc]; | 191 x = qcoeff_ptr[rc]; |
| 171 /* Only add a trellis state for non-zero coefficients. */ | 192 /* Only add a trellis state for non-zero coefficients. */ |
| 172 if (x) { | 193 if (x) { |
| 173 int shortcut = 0; | 194 int shortcut = 0; |
| 174 error0 = tokens[next][0].error; | 195 error0 = tokens[next][0].error; |
| 175 error1 = tokens[next][1].error; | 196 error1 = tokens[next][1].error; |
| 176 /* Evaluate the first possibility for this state. */ | 197 /* Evaluate the first possibility for this state. */ |
| 177 rate0 = tokens[next][0].rate; | 198 rate0 = tokens[next][0].rate; |
| 178 rate1 = tokens[next][1].rate; | 199 rate1 = tokens[next][1].rate; |
| 179 t0 = (vp9_dct_value_tokens_ptr + x)->token; | 200 t0 = (vp9_dct_value_tokens_ptr + x)->token; |
| 180 /* Consider both possible successor states. */ | 201 /* Consider both possible successor states. */ |
| 181 if (next < default_eob) { | 202 if (next < default_eob) { |
| 182 band = get_coef_band(band_translate, i + 1); | 203 band = band_translate[i + 1]; |
| 183 pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache); | 204 pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache); |
| 184 rate0 += | 205 rate0 += |
| 185 mb->token_costs[tx_size][type][ref][band][0][pt] | 206 mb->token_costs[tx_size][type][ref][band][0][pt] |
| 186 [tokens[next][0].token]; | 207 [tokens[next][0].token]; |
| 187 rate1 += | 208 rate1 += |
| 188 mb->token_costs[tx_size][type][ref][band][0][pt] | 209 mb->token_costs[tx_size][type][ref][band][0][pt] |
| 189 [tokens[next][1].token]; | 210 [tokens[next][1].token]; |
| 190 } | 211 } |
| 191 UPDATE_RD_COST(); | 212 UPDATE_RD_COST(); |
| 192 /* And pick the best. */ | 213 /* And pick the best. */ |
| (...skipping 22 matching lines...) Expand all Loading... |
| 215 if (shortcut) { | 236 if (shortcut) { |
| 216 sz = -(x < 0); | 237 sz = -(x < 0); |
| 217 x -= 2 * sz + 1; | 238 x -= 2 * sz + 1; |
| 218 } | 239 } |
| 219 | 240 |
| 220 /* Consider both possible successor states. */ | 241 /* Consider both possible successor states. */ |
| 221 if (!x) { | 242 if (!x) { |
| 222 /* If we reduced this coefficient to zero, check to see if | 243 /* If we reduced this coefficient to zero, check to see if |
| 223 * we need to move the EOB back here. | 244 * we need to move the EOB back here. |
| 224 */ | 245 */ |
| 225 t0 = tokens[next][0].token == DCT_EOB_TOKEN ? | 246 t0 = tokens[next][0].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN; |
| 226 DCT_EOB_TOKEN : ZERO_TOKEN; | 247 t1 = tokens[next][1].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN; |
| 227 t1 = tokens[next][1].token == DCT_EOB_TOKEN ? | |
| 228 DCT_EOB_TOKEN : ZERO_TOKEN; | |
| 229 } else { | 248 } else { |
| 230 t0 = t1 = (vp9_dct_value_tokens_ptr + x)->token; | 249 t0 = t1 = (vp9_dct_value_tokens_ptr + x)->token; |
| 231 } | 250 } |
| 232 if (next < default_eob) { | 251 if (next < default_eob) { |
| 233 band = get_coef_band(band_translate, i + 1); | 252 band = band_translate[i + 1]; |
| 234 if (t0 != DCT_EOB_TOKEN) { | 253 if (t0 != EOB_TOKEN) { |
| 235 pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache); | 254 pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache); |
| 236 rate0 += mb->token_costs[tx_size][type][ref][band][!x][pt] | 255 rate0 += mb->token_costs[tx_size][type][ref][band][!x][pt] |
| 237 [tokens[next][0].token]; | 256 [tokens[next][0].token]; |
| 238 } | 257 } |
| 239 if (t1 != DCT_EOB_TOKEN) { | 258 if (t1 != EOB_TOKEN) { |
| 240 pt = trellis_get_coeff_context(scan, nb, i, t1, token_cache); | 259 pt = trellis_get_coeff_context(scan, nb, i, t1, token_cache); |
| 241 rate1 += mb->token_costs[tx_size][type][ref][band][!x][pt] | 260 rate1 += mb->token_costs[tx_size][type][ref][band][!x][pt] |
| 242 [tokens[next][1].token]; | 261 [tokens[next][1].token]; |
| 243 } | 262 } |
| 244 } | 263 } |
| 245 | 264 |
| 246 UPDATE_RD_COST(); | 265 UPDATE_RD_COST(); |
| 247 /* And pick the best. */ | 266 /* And pick the best. */ |
| 248 best = rd_cost1 < rd_cost0; | 267 best = rd_cost1 < rd_cost0; |
| 249 base_bits = *(vp9_dct_value_cost_ptr + x); | 268 base_bits = *(vp9_dct_value_cost_ptr + x); |
| 250 | 269 |
| 251 if (shortcut) { | 270 if (shortcut) { |
| 252 dx -= (dequant_ptr[rc != 0] + sz) ^ sz; | 271 dx -= (dequant_ptr[rc != 0] + sz) ^ sz; |
| 253 d2 = dx * dx; | 272 d2 = dx * dx; |
| 254 } | 273 } |
| 255 tokens[i][1].rate = base_bits + (best ? rate1 : rate0); | 274 tokens[i][1].rate = base_bits + (best ? rate1 : rate0); |
| 256 tokens[i][1].error = d2 + (best ? error1 : error0); | 275 tokens[i][1].error = d2 + (best ? error1 : error0); |
| 257 tokens[i][1].next = next; | 276 tokens[i][1].next = next; |
| 258 tokens[i][1].token = best ? t1 : t0; | 277 tokens[i][1].token = best ? t1 : t0; |
| 259 tokens[i][1].qc = x; | 278 tokens[i][1].qc = x; |
| 260 best_index[i][1] = best; | 279 best_index[i][1] = best; |
| 261 /* Finally, make this the new head of the trellis. */ | 280 /* Finally, make this the new head of the trellis. */ |
| 262 next = i; | 281 next = i; |
| 263 } else { | 282 } else { |
| 264 /* There's no choice to make for a zero coefficient, so we don't | 283 /* There's no choice to make for a zero coefficient, so we don't |
| 265 * add a new trellis node, but we do need to update the costs. | 284 * add a new trellis node, but we do need to update the costs. |
| 266 */ | 285 */ |
| 267 band = get_coef_band(band_translate, i + 1); | 286 band = band_translate[i + 1]; |
| 268 t0 = tokens[next][0].token; | 287 t0 = tokens[next][0].token; |
| 269 t1 = tokens[next][1].token; | 288 t1 = tokens[next][1].token; |
| 270 /* Update the cost of each path if we're past the EOB token. */ | 289 /* Update the cost of each path if we're past the EOB token. */ |
| 271 if (t0 != DCT_EOB_TOKEN) { | 290 if (t0 != EOB_TOKEN) { |
| 272 tokens[next][0].rate += | 291 tokens[next][0].rate += |
| 273 mb->token_costs[tx_size][type][ref][band][1][0][t0]; | 292 mb->token_costs[tx_size][type][ref][band][1][0][t0]; |
| 274 tokens[next][0].token = ZERO_TOKEN; | 293 tokens[next][0].token = ZERO_TOKEN; |
| 275 } | 294 } |
| 276 if (t1 != DCT_EOB_TOKEN) { | 295 if (t1 != EOB_TOKEN) { |
| 277 tokens[next][1].rate += | 296 tokens[next][1].rate += |
| 278 mb->token_costs[tx_size][type][ref][band][1][0][t1]; | 297 mb->token_costs[tx_size][type][ref][band][1][0][t1]; |
| 279 tokens[next][1].token = ZERO_TOKEN; | 298 tokens[next][1].token = ZERO_TOKEN; |
| 280 } | 299 } |
| 281 best_index[i][0] = best_index[i][1] = 0; | 300 best_index[i][0] = best_index[i][1] = 0; |
| 282 /* Don't update next, because we didn't add a new node. */ | 301 /* Don't update next, because we didn't add a new node. */ |
| 283 } | 302 } |
| 284 } | 303 } |
| 285 | 304 |
| 286 /* Now pick the best path through the whole trellis. */ | 305 /* Now pick the best path through the whole trellis. */ |
| 287 band = get_coef_band(band_translate, i + 1); | 306 band = band_translate[i + 1]; |
| 288 pt = combine_entropy_contexts(*a, *l); | 307 pt = combine_entropy_contexts(*a, *l); |
| 289 rate0 = tokens[next][0].rate; | 308 rate0 = tokens[next][0].rate; |
| 290 rate1 = tokens[next][1].rate; | 309 rate1 = tokens[next][1].rate; |
| 291 error0 = tokens[next][0].error; | 310 error0 = tokens[next][0].error; |
| 292 error1 = tokens[next][1].error; | 311 error1 = tokens[next][1].error; |
| 293 t0 = tokens[next][0].token; | 312 t0 = tokens[next][0].token; |
| 294 t1 = tokens[next][1].token; | 313 t1 = tokens[next][1].token; |
| 295 rate0 += mb->token_costs[tx_size][type][ref][band][0][pt][t0]; | 314 rate0 += mb->token_costs[tx_size][type][ref][band][0][pt][t0]; |
| 296 rate1 += mb->token_costs[tx_size][type][ref][band][0][pt][t1]; | 315 rate1 += mb->token_costs[tx_size][type][ref][band][0][pt][t1]; |
| 297 UPDATE_RD_COST(); | 316 UPDATE_RD_COST(); |
| 298 best = rd_cost1 < rd_cost0; | 317 best = rd_cost1 < rd_cost0; |
| 299 final_eob = i0 - 1; | 318 final_eob = i0 - 1; |
| 300 vpx_memset(qcoeff_ptr, 0, sizeof(*qcoeff_ptr) * (16 << (tx_size * 2))); | 319 vpx_memset(qcoeff_ptr, 0, sizeof(*qcoeff_ptr) * (16 << (tx_size * 2))); |
| 301 vpx_memset(dqcoeff_ptr, 0, sizeof(*dqcoeff_ptr) * (16 << (tx_size * 2))); | 320 vpx_memset(dqcoeff_ptr, 0, sizeof(*dqcoeff_ptr) * (16 << (tx_size * 2))); |
| 302 for (i = next; i < eob; i = next) { | 321 for (i = next; i < eob; i = next) { |
| 303 x = tokens[i][best].qc; | 322 x = tokens[i][best].qc; |
| 304 if (x) { | 323 if (x) { |
| 305 final_eob = i; | 324 final_eob = i; |
| 306 } | 325 } |
| 307 rc = scan[i]; | 326 rc = scan[i]; |
| 308 qcoeff_ptr[rc] = x; | 327 qcoeff_ptr[rc] = x; |
| 309 dqcoeff_ptr[rc] = (x * dequant_ptr[rc != 0]) / mul; | 328 dqcoeff_ptr[rc] = (x * dequant_ptr[rc != 0]) / mul; |
| 310 | 329 |
| 311 next = tokens[i][best].next; | 330 next = tokens[i][best].next; |
| 312 best = best_index[i][best]; | 331 best = best_index[i][best]; |
| 313 } | 332 } |
| 314 final_eob++; | 333 final_eob++; |
| 315 | 334 |
| 316 xd->plane[plane].eobs[block] = final_eob; | 335 mb->plane[plane].eobs[block] = final_eob; |
| 317 *a = *l = (final_eob > 0); | 336 *a = *l = (final_eob > 0); |
| 318 } | 337 } |
| 319 | 338 |
| 320 void vp9_optimize_b(int plane, int block, BLOCK_SIZE plane_bsize, | 339 void vp9_optimize_b(int plane, int block, BLOCK_SIZE plane_bsize, |
| 321 TX_SIZE tx_size, MACROBLOCK *mb, struct optimize_ctx *ctx) { | 340 TX_SIZE tx_size, MACROBLOCK *mb, struct optimize_ctx *ctx) { |
| 322 int x, y; | 341 int x, y; |
| 323 txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x, &y); | 342 txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x, &y); |
| 324 optimize_b(mb, plane, block, plane_bsize, | 343 optimize_b(mb, plane, block, plane_bsize, |
| 325 &ctx->ta[plane][x], &ctx->tl[plane][y], tx_size); | 344 &ctx->ta[plane][x], &ctx->tl[plane][y], tx_size); |
| 326 } | 345 } |
| (...skipping 14 matching lines...) Expand all Loading... |
| 341 } | 360 } |
| 342 | 361 |
| 343 void vp9_xform_quant(int plane, int block, BLOCK_SIZE plane_bsize, | 362 void vp9_xform_quant(int plane, int block, BLOCK_SIZE plane_bsize, |
| 344 TX_SIZE tx_size, void *arg) { | 363 TX_SIZE tx_size, void *arg) { |
| 345 struct encode_b_args* const args = arg; | 364 struct encode_b_args* const args = arg; |
| 346 MACROBLOCK* const x = args->x; | 365 MACROBLOCK* const x = args->x; |
| 347 MACROBLOCKD* const xd = &x->e_mbd; | 366 MACROBLOCKD* const xd = &x->e_mbd; |
| 348 struct macroblock_plane *const p = &x->plane[plane]; | 367 struct macroblock_plane *const p = &x->plane[plane]; |
| 349 struct macroblockd_plane *const pd = &xd->plane[plane]; | 368 struct macroblockd_plane *const pd = &xd->plane[plane]; |
| 350 int16_t *coeff = BLOCK_OFFSET(p->coeff, block); | 369 int16_t *coeff = BLOCK_OFFSET(p->coeff, block); |
| 351 int16_t *qcoeff = BLOCK_OFFSET(pd->qcoeff, block); | 370 int16_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block); |
| 352 int16_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); | 371 int16_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); |
| 353 const int16_t *scan, *iscan; | 372 const scan_order *scan_order; |
| 354 uint16_t *eob = &pd->eobs[block]; | 373 uint16_t *eob = &p->eobs[block]; |
| 355 const int bwl = b_width_log2(plane_bsize), bw = 1 << bwl; | 374 const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; |
| 356 const int twl = bwl - tx_size, twmask = (1 << twl) - 1; | 375 int i, j; |
| 357 int xoff, yoff; | |
| 358 int16_t *src_diff; | 376 int16_t *src_diff; |
| 377 txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j); |
| 378 src_diff = &p->src_diff[4 * (j * diff_stride + i)]; |
| 359 | 379 |
| 360 switch (tx_size) { | 380 switch (tx_size) { |
| 361 case TX_32X32: | 381 case TX_32X32: |
| 362 scan = vp9_default_scan_32x32; | 382 scan_order = &vp9_default_scan_orders[TX_32X32]; |
| 363 iscan = vp9_default_iscan_32x32; | |
| 364 block >>= 6; | |
| 365 xoff = 32 * (block & twmask); | |
| 366 yoff = 32 * (block >> twl); | |
| 367 src_diff = p->src_diff + 4 * bw * yoff + xoff; | |
| 368 if (x->use_lp32x32fdct) | 383 if (x->use_lp32x32fdct) |
| 369 vp9_fdct32x32_rd(src_diff, coeff, bw * 4); | 384 vp9_fdct32x32_rd(src_diff, coeff, diff_stride); |
| 370 else | 385 else |
| 371 vp9_fdct32x32(src_diff, coeff, bw * 4); | 386 vp9_fdct32x32(src_diff, coeff, diff_stride); |
| 372 vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round, | 387 vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round, |
| 373 p->quant, p->quant_shift, qcoeff, dqcoeff, | 388 p->quant, p->quant_shift, qcoeff, dqcoeff, |
| 374 pd->dequant, p->zbin_extra, eob, scan, iscan); | 389 pd->dequant, p->zbin_extra, eob, scan_order->scan, |
| 390 scan_order->iscan); |
| 375 break; | 391 break; |
| 376 case TX_16X16: | 392 case TX_16X16: |
| 377 scan = vp9_default_scan_16x16; | 393 scan_order = &vp9_default_scan_orders[TX_16X16]; |
| 378 iscan = vp9_default_iscan_16x16; | 394 vp9_fdct16x16(src_diff, coeff, diff_stride); |
| 379 block >>= 4; | |
| 380 xoff = 16 * (block & twmask); | |
| 381 yoff = 16 * (block >> twl); | |
| 382 src_diff = p->src_diff + 4 * bw * yoff + xoff; | |
| 383 vp9_fdct16x16(src_diff, coeff, bw * 4); | |
| 384 vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, | 395 vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, |
| 385 p->quant, p->quant_shift, qcoeff, dqcoeff, | 396 p->quant, p->quant_shift, qcoeff, dqcoeff, |
| 386 pd->dequant, p->zbin_extra, eob, scan, iscan); | 397 pd->dequant, p->zbin_extra, eob, |
| 398 scan_order->scan, scan_order->iscan); |
| 387 break; | 399 break; |
| 388 case TX_8X8: | 400 case TX_8X8: |
| 389 scan = vp9_default_scan_8x8; | 401 scan_order = &vp9_default_scan_orders[TX_8X8]; |
| 390 iscan = vp9_default_iscan_8x8; | 402 vp9_fdct8x8(src_diff, coeff, diff_stride); |
| 391 block >>= 2; | |
| 392 xoff = 8 * (block & twmask); | |
| 393 yoff = 8 * (block >> twl); | |
| 394 src_diff = p->src_diff + 4 * bw * yoff + xoff; | |
| 395 vp9_fdct8x8(src_diff, coeff, bw * 4); | |
| 396 vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, | 403 vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, |
| 397 p->quant, p->quant_shift, qcoeff, dqcoeff, | 404 p->quant, p->quant_shift, qcoeff, dqcoeff, |
| 398 pd->dequant, p->zbin_extra, eob, scan, iscan); | 405 pd->dequant, p->zbin_extra, eob, |
| 406 scan_order->scan, scan_order->iscan); |
| 399 break; | 407 break; |
| 400 case TX_4X4: | 408 case TX_4X4: |
| 401 scan = vp9_default_scan_4x4; | 409 scan_order = &vp9_default_scan_orders[TX_4X4]; |
| 402 iscan = vp9_default_iscan_4x4; | 410 x->fwd_txm4x4(src_diff, coeff, diff_stride); |
| 403 xoff = 4 * (block & twmask); | |
| 404 yoff = 4 * (block >> twl); | |
| 405 src_diff = p->src_diff + 4 * bw * yoff + xoff; | |
| 406 x->fwd_txm4x4(src_diff, coeff, bw * 4); | |
| 407 vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, | 411 vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, |
| 408 p->quant, p->quant_shift, qcoeff, dqcoeff, | 412 p->quant, p->quant_shift, qcoeff, dqcoeff, |
| 409 pd->dequant, p->zbin_extra, eob, scan, iscan); | 413 pd->dequant, p->zbin_extra, eob, |
| 414 scan_order->scan, scan_order->iscan); |
| 410 break; | 415 break; |
| 411 default: | 416 default: |
| 412 assert(0); | 417 assert(0); |
| 413 } | 418 } |
| 414 } | 419 } |
| 415 | 420 |
| 416 static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize, | 421 static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize, |
| 417 TX_SIZE tx_size, void *arg) { | 422 TX_SIZE tx_size, void *arg) { |
| 418 struct encode_b_args *const args = arg; | 423 struct encode_b_args *const args = arg; |
| 419 MACROBLOCK *const x = args->x; | 424 MACROBLOCK *const x = args->x; |
| 420 MACROBLOCKD *const xd = &x->e_mbd; | 425 MACROBLOCKD *const xd = &x->e_mbd; |
| 421 struct optimize_ctx *const ctx = args->ctx; | 426 struct optimize_ctx *const ctx = args->ctx; |
| 427 struct macroblock_plane *const p = &x->plane[plane]; |
| 422 struct macroblockd_plane *const pd = &xd->plane[plane]; | 428 struct macroblockd_plane *const pd = &xd->plane[plane]; |
| 423 const int raster_block = txfrm_block_to_raster_block(plane_bsize, tx_size, | |
| 424 block); | |
| 425 | |
| 426 int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); | 429 int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); |
| 427 uint8_t *const dst = raster_block_offset_uint8(plane_bsize, raster_block, | 430 int i, j; |
| 428 pd->dst.buf, pd->dst.stride); | 431 uint8_t *dst; |
| 432 txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j); |
| 433 dst = &pd->dst.buf[4 * j * pd->dst.stride + 4 * i]; |
| 429 | 434 |
| 430 // TODO(jingning): per transformed block zero forcing only enabled for | 435 // TODO(jingning): per transformed block zero forcing only enabled for |
| 431 // luma component. will integrate chroma components as well. | 436 // luma component. will integrate chroma components as well. |
| 432 if (x->zcoeff_blk[tx_size][block] && plane == 0) { | 437 if (x->zcoeff_blk[tx_size][block] && plane == 0) { |
| 433 int x, y; | 438 p->eobs[block] = 0; |
| 434 pd->eobs[block] = 0; | 439 ctx->ta[plane][i] = 0; |
| 435 txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x, &y); | 440 ctx->tl[plane][j] = 0; |
| 436 ctx->ta[plane][x] = 0; | |
| 437 ctx->tl[plane][y] = 0; | |
| 438 return; | 441 return; |
| 439 } | 442 } |
| 440 | 443 |
| 441 vp9_xform_quant(plane, block, plane_bsize, tx_size, arg); | 444 if (!x->skip_recode) |
| 445 vp9_xform_quant(plane, block, plane_bsize, tx_size, arg); |
| 442 | 446 |
| 443 if (x->optimize) | 447 if (x->optimize && (!x->skip_recode || !x->skip_optimize)) { |
| 444 vp9_optimize_b(plane, block, plane_bsize, tx_size, x, ctx); | 448 vp9_optimize_b(plane, block, plane_bsize, tx_size, x, ctx); |
| 449 } else { |
| 450 ctx->ta[plane][i] = p->eobs[block] > 0; |
| 451 ctx->tl[plane][j] = p->eobs[block] > 0; |
| 452 } |
| 445 | 453 |
| 446 if (x->skip_encode || pd->eobs[block] == 0) | 454 if (x->skip_encode || p->eobs[block] == 0) |
| 447 return; | 455 return; |
| 448 | 456 |
| 449 switch (tx_size) { | 457 switch (tx_size) { |
| 450 case TX_32X32: | 458 case TX_32X32: |
| 451 vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, pd->eobs[block]); | 459 vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); |
| 452 break; | 460 break; |
| 453 case TX_16X16: | 461 case TX_16X16: |
| 454 vp9_idct16x16_add(dqcoeff, dst, pd->dst.stride, pd->eobs[block]); | 462 vp9_idct16x16_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); |
| 455 break; | 463 break; |
| 456 case TX_8X8: | 464 case TX_8X8: |
| 457 vp9_idct8x8_add(dqcoeff, dst, pd->dst.stride, pd->eobs[block]); | 465 vp9_idct8x8_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); |
| 458 break; | 466 break; |
| 459 case TX_4X4: | 467 case TX_4X4: |
| 460 // this is like vp9_short_idct4x4 but has a special case around eob<=1 | 468 // this is like vp9_short_idct4x4 but has a special case around eob<=1 |
| 461 // which is significant (not just an optimization) for the lossless | 469 // which is significant (not just an optimization) for the lossless |
| 462 // case. | 470 // case. |
| 463 xd->itxm_add(dqcoeff, dst, pd->dst.stride, pd->eobs[block]); | 471 xd->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); |
| 464 break; | 472 break; |
| 465 default: | 473 default: |
| 466 assert(!"Invalid transform size"); | 474 assert(0 && "Invalid transform size"); |
| 467 } | 475 } |
| 468 } | 476 } |
| 469 | 477 |
| 470 static void encode_block_pass1(int plane, int block, BLOCK_SIZE plane_bsize, | 478 static void encode_block_pass1(int plane, int block, BLOCK_SIZE plane_bsize, |
| 471 TX_SIZE tx_size, void *arg) { | 479 TX_SIZE tx_size, void *arg) { |
| 472 struct encode_b_args *const args = arg; | 480 struct encode_b_args *const args = arg; |
| 473 MACROBLOCK *const x = args->x; | 481 MACROBLOCK *const x = args->x; |
| 474 MACROBLOCKD *const xd = &x->e_mbd; | 482 MACROBLOCKD *const xd = &x->e_mbd; |
| 483 struct macroblock_plane *const p = &x->plane[plane]; |
| 475 struct macroblockd_plane *const pd = &xd->plane[plane]; | 484 struct macroblockd_plane *const pd = &xd->plane[plane]; |
| 476 const int raster_block = txfrm_block_to_raster_block(plane_bsize, tx_size, | |
| 477 block); | |
| 478 | |
| 479 int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); | 485 int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); |
| 480 uint8_t *const dst = raster_block_offset_uint8(plane_bsize, raster_block, | 486 int i, j; |
| 481 pd->dst.buf, pd->dst.stride); | 487 uint8_t *dst; |
| 488 txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j); |
| 489 dst = &pd->dst.buf[4 * j * pd->dst.stride + 4 * i]; |
| 482 | 490 |
| 483 vp9_xform_quant(plane, block, plane_bsize, tx_size, arg); | 491 vp9_xform_quant(plane, block, plane_bsize, tx_size, arg); |
| 484 | 492 |
| 485 if (pd->eobs[block] == 0) | 493 if (p->eobs[block] == 0) |
| 486 return; | 494 return; |
| 487 | 495 |
| 488 xd->itxm_add(dqcoeff, dst, pd->dst.stride, pd->eobs[block]); | 496 xd->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); |
| 489 } | 497 } |
| 490 | 498 |
| 491 void vp9_encode_sby(MACROBLOCK *x, BLOCK_SIZE bsize) { | 499 void vp9_encode_sby(MACROBLOCK *x, BLOCK_SIZE bsize) { |
| 492 MACROBLOCKD *const xd = &x->e_mbd; | 500 MACROBLOCKD *const xd = &x->e_mbd; |
| 493 struct optimize_ctx ctx; | 501 struct optimize_ctx ctx; |
| 494 struct encode_b_args arg = {x, &ctx}; | 502 struct encode_b_args arg = {x, &ctx}; |
| 495 | 503 |
| 496 vp9_subtract_sby(x, bsize); | 504 vp9_subtract_sby(x, bsize); |
| 497 if (x->optimize) | 505 if (x->optimize) |
| 498 optimize_init_b(0, bsize, &arg); | 506 optimize_init_b(0, bsize, &arg); |
| 499 | 507 |
| 500 foreach_transformed_block_in_plane(xd, bsize, 0, encode_block_pass1, &arg); | 508 foreach_transformed_block_in_plane(xd, bsize, 0, encode_block_pass1, &arg); |
| 501 } | 509 } |
| 502 | 510 |
| 503 void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) { | 511 void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) { |
| 504 MACROBLOCKD *const xd = &x->e_mbd; | 512 MACROBLOCKD *const xd = &x->e_mbd; |
| 505 struct optimize_ctx ctx; | 513 struct optimize_ctx ctx; |
| 506 struct encode_b_args arg = {x, &ctx}; | 514 struct encode_b_args arg = {x, &ctx}; |
| 507 | 515 |
| 508 vp9_subtract_sb(x, bsize); | 516 if (!x->skip_recode) |
| 517 vp9_subtract_sb(x, bsize); |
| 509 | 518 |
| 510 if (x->optimize) { | 519 if (x->optimize && (!x->skip_recode || !x->skip_optimize)) { |
| 511 int i; | 520 int i; |
| 512 for (i = 0; i < MAX_MB_PLANE; ++i) | 521 for (i = 0; i < MAX_MB_PLANE; ++i) |
| 513 optimize_init_b(i, bsize, &arg); | 522 optimize_init_b(i, bsize, &arg); |
| 514 } | 523 } |
| 515 | 524 |
| 516 foreach_transformed_block(xd, bsize, encode_block, &arg); | 525 foreach_transformed_block(xd, bsize, encode_block, &arg); |
| 517 } | 526 } |
| 518 | 527 |
| 519 void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, | 528 void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, |
| 520 TX_SIZE tx_size, void *arg) { | 529 TX_SIZE tx_size, void *arg) { |
| 521 struct encode_b_args* const args = arg; | 530 struct encode_b_args* const args = arg; |
| 522 MACROBLOCK *const x = args->x; | 531 MACROBLOCK *const x = args->x; |
| 523 MACROBLOCKD *const xd = &x->e_mbd; | 532 MACROBLOCKD *const xd = &x->e_mbd; |
| 524 MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; | 533 MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; |
| 525 struct macroblock_plane *const p = &x->plane[plane]; | 534 struct macroblock_plane *const p = &x->plane[plane]; |
| 526 struct macroblockd_plane *const pd = &xd->plane[plane]; | 535 struct macroblockd_plane *const pd = &xd->plane[plane]; |
| 527 int16_t *coeff = BLOCK_OFFSET(p->coeff, block); | 536 int16_t *coeff = BLOCK_OFFSET(p->coeff, block); |
| 528 int16_t *qcoeff = BLOCK_OFFSET(pd->qcoeff, block); | 537 int16_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block); |
| 529 int16_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); | 538 int16_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); |
| 530 const int16_t *scan, *iscan; | 539 const scan_order *scan_order; |
| 531 TX_TYPE tx_type; | 540 TX_TYPE tx_type; |
| 532 MB_PREDICTION_MODE mode; | 541 MB_PREDICTION_MODE mode; |
| 533 const int bwl = b_width_log2(plane_bsize), bw = 1 << bwl; | 542 const int bwl = b_width_log2(plane_bsize); |
| 534 const int twl = bwl - tx_size, twmask = (1 << twl) - 1; | 543 const int diff_stride = 4 * (1 << bwl); |
| 535 int xoff, yoff; | |
| 536 uint8_t *src, *dst; | 544 uint8_t *src, *dst; |
| 537 int16_t *src_diff; | 545 int16_t *src_diff; |
| 538 uint16_t *eob = &pd->eobs[block]; | 546 uint16_t *eob = &p->eobs[block]; |
| 547 int i, j; |
| 548 txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j); |
| 549 dst = &pd->dst.buf[4 * (j * pd->dst.stride + i)]; |
| 550 src = &p->src.buf[4 * (j * p->src.stride + i)]; |
| 551 src_diff = &p->src_diff[4 * (j * diff_stride + i)]; |
| 539 | 552 |
| 540 if (xd->mb_to_right_edge < 0 || xd->mb_to_bottom_edge < 0) | 553 if (xd->mb_to_right_edge < 0 || xd->mb_to_bottom_edge < 0) |
| 541 extend_for_intra(xd, plane_bsize, plane, block, tx_size); | 554 extend_for_intra(xd, plane_bsize, plane, i, j); |
| 542 | 555 |
| 543 // if (x->optimize) | 556 // if (x->optimize) |
| 544 // vp9_optimize_b(plane, block, plane_bsize, tx_size, x, args->ctx); | 557 // vp9_optimize_b(plane, block, plane_bsize, tx_size, x, args->ctx); |
| 545 | 558 |
| 546 switch (tx_size) { | 559 switch (tx_size) { |
| 547 case TX_32X32: | 560 case TX_32X32: |
| 548 scan = vp9_default_scan_32x32; | 561 scan_order = &vp9_default_scan_orders[TX_32X32]; |
| 549 iscan = vp9_default_iscan_32x32; | |
| 550 mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; | 562 mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; |
| 551 block >>= 6; | 563 vp9_predict_intra_block(xd, block >> 6, bwl, TX_32X32, mode, |
| 552 xoff = 32 * (block & twmask); | 564 x->skip_encode ? src : dst, |
| 553 yoff = 32 * (block >> twl); | 565 x->skip_encode ? p->src.stride : pd->dst.stride, |
| 554 dst = pd->dst.buf + yoff * pd->dst.stride + xoff; | 566 dst, pd->dst.stride); |
| 555 src = p->src.buf + yoff * p->src.stride + xoff; | 567 if (!x->skip_recode) { |
| 556 src_diff = p->src_diff + 4 * bw * yoff + xoff; | 568 vp9_subtract_block(32, 32, src_diff, diff_stride, |
| 557 vp9_predict_intra_block(xd, block, bwl, TX_32X32, mode, | 569 src, p->src.stride, dst, pd->dst.stride); |
| 558 dst, pd->dst.stride, dst, pd->dst.stride); | 570 if (x->use_lp32x32fdct) |
| 559 vp9_subtract_block(32, 32, src_diff, bw * 4, | 571 vp9_fdct32x32_rd(src_diff, coeff, diff_stride); |
| 560 src, p->src.stride, dst, pd->dst.stride); | 572 else |
| 561 if (x->use_lp32x32fdct) | 573 vp9_fdct32x32(src_diff, coeff, diff_stride); |
| 562 vp9_fdct32x32_rd(src_diff, coeff, bw * 4); | 574 vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round, |
| 563 else | 575 p->quant, p->quant_shift, qcoeff, dqcoeff, |
| 564 vp9_fdct32x32(src_diff, coeff, bw * 4); | 576 pd->dequant, p->zbin_extra, eob, scan_order->scan, |
| 565 vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round, | 577 scan_order->iscan); |
| 566 p->quant, p->quant_shift, qcoeff, dqcoeff, | 578 } |
| 567 pd->dequant, p->zbin_extra, eob, scan, iscan); | |
| 568 if (!x->skip_encode && *eob) | 579 if (!x->skip_encode && *eob) |
| 569 vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, *eob); | 580 vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, *eob); |
| 570 break; | 581 break; |
| 571 case TX_16X16: | 582 case TX_16X16: |
| 572 tx_type = get_tx_type_16x16(pd->plane_type, xd); | 583 tx_type = get_tx_type_16x16(pd->plane_type, xd); |
| 573 scan = get_scan_16x16(tx_type); | 584 scan_order = &vp9_scan_orders[TX_16X16][tx_type]; |
| 574 iscan = get_iscan_16x16(tx_type); | |
| 575 mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; | 585 mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; |
| 576 block >>= 4; | 586 vp9_predict_intra_block(xd, block >> 4, bwl, TX_16X16, mode, |
| 577 xoff = 16 * (block & twmask); | 587 x->skip_encode ? src : dst, |
| 578 yoff = 16 * (block >> twl); | 588 x->skip_encode ? p->src.stride : pd->dst.stride, |
| 579 dst = pd->dst.buf + yoff * pd->dst.stride + xoff; | 589 dst, pd->dst.stride); |
| 580 src = p->src.buf + yoff * p->src.stride + xoff; | 590 if (!x->skip_recode) { |
| 581 src_diff = p->src_diff + 4 * bw * yoff + xoff; | 591 vp9_subtract_block(16, 16, src_diff, diff_stride, |
| 582 vp9_predict_intra_block(xd, block, bwl, TX_16X16, mode, | 592 src, p->src.stride, dst, pd->dst.stride); |
| 583 dst, pd->dst.stride, dst, pd->dst.stride); | 593 vp9_fht16x16(tx_type, src_diff, coeff, diff_stride); |
| 584 vp9_subtract_block(16, 16, src_diff, bw * 4, | 594 vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, |
| 585 src, p->src.stride, dst, pd->dst.stride); | 595 p->quant, p->quant_shift, qcoeff, dqcoeff, |
| 586 vp9_fht16x16(tx_type, src_diff, coeff, bw * 4); | 596 pd->dequant, p->zbin_extra, eob, scan_order->scan, |
| 587 vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, | 597 scan_order->iscan); |
| 588 p->quant, p->quant_shift, qcoeff, dqcoeff, | 598 } |
| 589 pd->dequant, p->zbin_extra, eob, scan, iscan); | |
| 590 if (!x->skip_encode && *eob) | 599 if (!x->skip_encode && *eob) |
| 591 vp9_iht16x16_add(tx_type, dqcoeff, dst, pd->dst.stride, *eob); | 600 vp9_iht16x16_add(tx_type, dqcoeff, dst, pd->dst.stride, *eob); |
| 592 break; | 601 break; |
| 593 case TX_8X8: | 602 case TX_8X8: |
| 594 tx_type = get_tx_type_8x8(pd->plane_type, xd); | 603 tx_type = get_tx_type_8x8(pd->plane_type, xd); |
| 595 scan = get_scan_8x8(tx_type); | 604 scan_order = &vp9_scan_orders[TX_8X8][tx_type]; |
| 596 iscan = get_iscan_8x8(tx_type); | |
| 597 mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; | 605 mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; |
| 598 block >>= 2; | 606 vp9_predict_intra_block(xd, block >> 2, bwl, TX_8X8, mode, |
| 599 xoff = 8 * (block & twmask); | 607 x->skip_encode ? src : dst, |
| 600 yoff = 8 * (block >> twl); | 608 x->skip_encode ? p->src.stride : pd->dst.stride, |
| 601 dst = pd->dst.buf + yoff * pd->dst.stride + xoff; | 609 dst, pd->dst.stride); |
| 602 src = p->src.buf + yoff * p->src.stride + xoff; | 610 if (!x->skip_recode) { |
| 603 src_diff = p->src_diff + 4 * bw * yoff + xoff; | 611 vp9_subtract_block(8, 8, src_diff, diff_stride, |
| 604 vp9_predict_intra_block(xd, block, bwl, TX_8X8, mode, | 612 src, p->src.stride, dst, pd->dst.stride); |
| 605 dst, pd->dst.stride, dst, pd->dst.stride); | 613 vp9_fht8x8(tx_type, src_diff, coeff, diff_stride); |
| 606 vp9_subtract_block(8, 8, src_diff, bw * 4, | 614 vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant, |
| 607 src, p->src.stride, dst, pd->dst.stride); | 615 p->quant_shift, qcoeff, dqcoeff, |
| 608 vp9_fht8x8(tx_type, src_diff, coeff, bw * 4); | 616 pd->dequant, p->zbin_extra, eob, scan_order->scan, |
| 609 vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant, | 617 scan_order->iscan); |
| 610 p->quant_shift, qcoeff, dqcoeff, | 618 } |
| 611 pd->dequant, p->zbin_extra, eob, scan, iscan); | |
| 612 if (!x->skip_encode && *eob) | 619 if (!x->skip_encode && *eob) |
| 613 vp9_iht8x8_add(tx_type, dqcoeff, dst, pd->dst.stride, *eob); | 620 vp9_iht8x8_add(tx_type, dqcoeff, dst, pd->dst.stride, *eob); |
| 614 break; | 621 break; |
| 615 case TX_4X4: | 622 case TX_4X4: |
| 616 tx_type = get_tx_type_4x4(pd->plane_type, xd, block); | 623 tx_type = get_tx_type_4x4(pd->plane_type, xd, block); |
| 617 scan = get_scan_4x4(tx_type); | 624 scan_order = &vp9_scan_orders[TX_4X4][tx_type]; |
| 618 iscan = get_iscan_4x4(tx_type); | |
| 619 if (mbmi->sb_type < BLOCK_8X8 && plane == 0) | 625 if (mbmi->sb_type < BLOCK_8X8 && plane == 0) |
| 620 mode = xd->mi_8x8[0]->bmi[block].as_mode; | 626 mode = xd->mi_8x8[0]->bmi[block].as_mode; |
| 621 else | 627 else |
| 622 mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; | 628 mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; |
| 623 | 629 |
| 624 xoff = 4 * (block & twmask); | |
| 625 yoff = 4 * (block >> twl); | |
| 626 dst = pd->dst.buf + yoff * pd->dst.stride + xoff; | |
| 627 src = p->src.buf + yoff * p->src.stride + xoff; | |
| 628 src_diff = p->src_diff + 4 * bw * yoff + xoff; | |
| 629 vp9_predict_intra_block(xd, block, bwl, TX_4X4, mode, | 630 vp9_predict_intra_block(xd, block, bwl, TX_4X4, mode, |
| 630 dst, pd->dst.stride, dst, pd->dst.stride); | 631 x->skip_encode ? src : dst, |
| 631 vp9_subtract_block(4, 4, src_diff, bw * 4, | 632 x->skip_encode ? p->src.stride : pd->dst.stride, |
| 632 src, p->src.stride, dst, pd->dst.stride); | 633 dst, pd->dst.stride); |
| 633 if (tx_type != DCT_DCT) | 634 |
| 634 vp9_short_fht4x4(src_diff, coeff, bw * 4, tx_type); | 635 if (!x->skip_recode) { |
| 635 else | 636 vp9_subtract_block(4, 4, src_diff, diff_stride, |
| 636 x->fwd_txm4x4(src_diff, coeff, bw * 4); | 637 src, p->src.stride, dst, pd->dst.stride); |
| 637 vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant, | 638 if (tx_type != DCT_DCT) |
| 638 p->quant_shift, qcoeff, dqcoeff, | 639 vp9_short_fht4x4(src_diff, coeff, diff_stride, tx_type); |
| 639 pd->dequant, p->zbin_extra, eob, scan, iscan); | 640 else |
| 641 x->fwd_txm4x4(src_diff, coeff, diff_stride); |
| 642 vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant, |
| 643 p->quant_shift, qcoeff, dqcoeff, |
| 644 pd->dequant, p->zbin_extra, eob, scan_order->scan, |
| 645 scan_order->iscan); |
| 646 } |
| 647 |
| 640 if (!x->skip_encode && *eob) { | 648 if (!x->skip_encode && *eob) { |
| 641 if (tx_type == DCT_DCT) | 649 if (tx_type == DCT_DCT) |
| 642 // this is like vp9_short_idct4x4 but has a special case around eob<=1 | 650 // this is like vp9_short_idct4x4 but has a special case around eob<=1 |
| 643 // which is significant (not just an optimization) for the lossless | 651 // which is significant (not just an optimization) for the lossless |
| 644 // case. | 652 // case. |
| 645 xd->itxm_add(dqcoeff, dst, pd->dst.stride, *eob); | 653 xd->itxm_add(dqcoeff, dst, pd->dst.stride, *eob); |
| 646 else | 654 else |
| 647 vp9_iht4x4_16_add(dqcoeff, dst, pd->dst.stride, tx_type); | 655 vp9_iht4x4_16_add(dqcoeff, dst, pd->dst.stride, tx_type); |
| 648 } | 656 } |
| 649 break; | 657 break; |
| (...skipping 10 matching lines...) Expand all Loading... |
| 660 foreach_transformed_block_in_plane(xd, bsize, 0, vp9_encode_block_intra, | 668 foreach_transformed_block_in_plane(xd, bsize, 0, vp9_encode_block_intra, |
| 661 &arg); | 669 &arg); |
| 662 } | 670 } |
| 663 void vp9_encode_intra_block_uv(MACROBLOCK *x, BLOCK_SIZE bsize) { | 671 void vp9_encode_intra_block_uv(MACROBLOCK *x, BLOCK_SIZE bsize) { |
| 664 MACROBLOCKD* const xd = &x->e_mbd; | 672 MACROBLOCKD* const xd = &x->e_mbd; |
| 665 struct optimize_ctx ctx; | 673 struct optimize_ctx ctx; |
| 666 struct encode_b_args arg = {x, &ctx}; | 674 struct encode_b_args arg = {x, &ctx}; |
| 667 foreach_transformed_block_uv(xd, bsize, vp9_encode_block_intra, &arg); | 675 foreach_transformed_block_uv(xd, bsize, vp9_encode_block_intra, &arg); |
| 668 } | 676 } |
| 669 | 677 |
| 678 int vp9_encode_intra(MACROBLOCK *x, int use_16x16_pred) { |
| 679 MB_MODE_INFO * mbmi = &x->e_mbd.mi_8x8[0]->mbmi; |
| 680 x->skip_encode = 0; |
| 681 mbmi->mode = DC_PRED; |
| 682 mbmi->ref_frame[0] = INTRA_FRAME; |
| 683 mbmi->tx_size = use_16x16_pred ? (mbmi->sb_type >= BLOCK_16X16 ? TX_16X16 |
| 684 : TX_8X8) |
| 685 : TX_4X4; |
| 686 vp9_encode_intra_block_y(x, mbmi->sb_type); |
| 687 return vp9_get_mb_ss(x->plane[0].src_diff); |
| 688 } |
| OLD | NEW |