| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| 11 |
| 12 #include "./vp9_rtcd.h" |
| 11 #include "./vpx_config.h" | 13 #include "./vpx_config.h" |
| 14 |
| 15 #include "vpx_mem/vpx_mem.h" |
| 16 |
| 17 #include "vp9/common/vp9_idct.h" |
| 18 #include "vp9/common/vp9_reconinter.h" |
| 19 #include "vp9/common/vp9_reconintra.h" |
| 20 #include "vp9/common/vp9_systemdependent.h" |
| 21 |
| 22 #include "vp9/encoder/vp9_dct.h" |
| 12 #include "vp9/encoder/vp9_encodemb.h" | 23 #include "vp9/encoder/vp9_encodemb.h" |
| 13 #include "vp9/common/vp9_reconinter.h" | |
| 14 #include "vp9/encoder/vp9_quantize.h" | 24 #include "vp9/encoder/vp9_quantize.h" |
| 25 #include "vp9/encoder/vp9_rdopt.h" |
| 15 #include "vp9/encoder/vp9_tokenize.h" | 26 #include "vp9/encoder/vp9_tokenize.h" |
| 16 #include "vp9/common/vp9_reconintra.h" | |
| 17 #include "vpx_mem/vpx_mem.h" | |
| 18 #include "vp9/encoder/vp9_rdopt.h" | |
| 19 #include "vp9/common/vp9_systemdependent.h" | |
| 20 #include "vp9_rtcd.h" | |
| 21 | |
| 22 DECLARE_ALIGNED(16, extern const uint8_t, | |
| 23 vp9_pt_energy_class[MAX_ENTROPY_TOKENS]); | |
| 24 | 27 |
| 25 void vp9_subtract_block_c(int rows, int cols, | 28 void vp9_subtract_block_c(int rows, int cols, |
| 26 int16_t *diff_ptr, ptrdiff_t diff_stride, | 29 int16_t *diff_ptr, ptrdiff_t diff_stride, |
| 27 const uint8_t *src_ptr, ptrdiff_t src_stride, | 30 const uint8_t *src_ptr, ptrdiff_t src_stride, |
| 28 const uint8_t *pred_ptr, ptrdiff_t pred_stride) { | 31 const uint8_t *pred_ptr, ptrdiff_t pred_stride) { |
| 29 int r, c; | 32 int r, c; |
| 30 | 33 |
| 31 for (r = 0; r < rows; r++) { | 34 for (r = 0; r < rows; r++) { |
| 32 for (c = 0; c < cols; c++) | 35 for (c = 0; c < cols; c++) |
| 33 diff_ptr[c] = src_ptr[c] - pred_ptr[c]; | 36 diff_ptr[c] = src_ptr[c] - pred_ptr[c]; |
| 34 | 37 |
| 35 diff_ptr += diff_stride; | 38 diff_ptr += diff_stride; |
| 36 pred_ptr += pred_stride; | 39 pred_ptr += pred_stride; |
| 37 src_ptr += src_stride; | 40 src_ptr += src_stride; |
| 38 } | 41 } |
| 39 } | 42 } |
| 40 | 43 |
| 41 static void inverse_transform_b_4x4_add(MACROBLOCKD *xd, int eob, | |
| 42 int16_t *dqcoeff, uint8_t *dest, | |
| 43 int stride) { | |
| 44 if (eob <= 1) | |
| 45 xd->inv_txm4x4_1_add(dqcoeff, dest, stride); | |
| 46 else | |
| 47 xd->inv_txm4x4_add(dqcoeff, dest, stride); | |
| 48 } | |
| 49 | |
| 50 static void inverse_transform_b_8x8_add(int eob, | |
| 51 int16_t *dqcoeff, uint8_t *dest, | |
| 52 int stride) { | |
| 53 if (eob <= 1) | |
| 54 vp9_short_idct8x8_1_add(dqcoeff, dest, stride); | |
| 55 else if (eob <= 10) | |
| 56 vp9_short_idct10_8x8_add(dqcoeff, dest, stride); | |
| 57 else | |
| 58 vp9_short_idct8x8_add(dqcoeff, dest, stride); | |
| 59 } | |
| 60 | |
| 61 static void inverse_transform_b_16x16_add(int eob, | |
| 62 int16_t *dqcoeff, uint8_t *dest, | |
| 63 int stride) { | |
| 64 if (eob <= 1) | |
| 65 vp9_short_idct16x16_1_add(dqcoeff, dest, stride); | |
| 66 else if (eob <= 10) | |
| 67 vp9_short_idct10_16x16_add(dqcoeff, dest, stride); | |
| 68 else | |
| 69 vp9_short_idct16x16_add(dqcoeff, dest, stride); | |
| 70 } | |
| 71 | |
| 72 static void subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) { | 44 static void subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) { |
| 73 struct macroblock_plane *const p = &x->plane[plane]; | 45 struct macroblock_plane *const p = &x->plane[plane]; |
| 74 const MACROBLOCKD *const xd = &x->e_mbd; | 46 const MACROBLOCKD *const xd = &x->e_mbd; |
| 75 const struct macroblockd_plane *const pd = &xd->plane[plane]; | 47 const struct macroblockd_plane *const pd = &xd->plane[plane]; |
| 76 const int bw = plane_block_width(bsize, pd); | 48 const int bw = plane_block_width(bsize, pd); |
| 77 const int bh = plane_block_height(bsize, pd); | 49 const int bh = plane_block_height(bsize, pd); |
| 78 | 50 |
| 79 vp9_subtract_block(bh, bw, p->src_diff, bw, | 51 vp9_subtract_block(bh, bw, p->src_diff, bw, |
| 80 p->src.buf, p->src.stride, | 52 p->src.buf, p->src.stride, |
| 81 pd->dst.buf, pd->dst.stride); | 53 pd->dst.buf, pd->dst.stride); |
| 82 } | 54 } |
| 83 | 55 |
| 84 void vp9_subtract_sby(MACROBLOCK *x, BLOCK_SIZE bsize) { | 56 void vp9_subtract_sby(MACROBLOCK *x, BLOCK_SIZE bsize) { |
| 85 subtract_plane(x, bsize, 0); | 57 subtract_plane(x, bsize, 0); |
| 86 } | 58 } |
| 87 | 59 |
| 88 void vp9_subtract_sbuv(MACROBLOCK *x, BLOCK_SIZE bsize) { | 60 void vp9_subtract_sbuv(MACROBLOCK *x, BLOCK_SIZE bsize) { |
| 89 int i; | 61 int i; |
| 90 | 62 |
| 91 for (i = 1; i < MAX_MB_PLANE; i++) | 63 for (i = 1; i < MAX_MB_PLANE; i++) |
| 92 subtract_plane(x, bsize, i); | 64 subtract_plane(x, bsize, i); |
| 93 } | 65 } |
| 94 | 66 |
| 95 void vp9_subtract_sb(MACROBLOCK *x, BLOCK_SIZE bsize) { | 67 void vp9_subtract_sb(MACROBLOCK *x, BLOCK_SIZE bsize) { |
| 96 vp9_subtract_sby(x, bsize); | 68 vp9_subtract_sby(x, bsize); |
| 97 vp9_subtract_sbuv(x, bsize); | 69 vp9_subtract_sbuv(x, bsize); |
| 98 } | 70 } |
| 99 | 71 |
| 100 | 72 #define RDTRUNC(RM, DM, R, D) ((128 + (R) * (RM)) & 0xFF) |
| 101 #define RDTRUNC(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF ) | |
| 102 typedef struct vp9_token_state vp9_token_state; | 73 typedef struct vp9_token_state vp9_token_state; |
| 103 | 74 |
| 104 struct vp9_token_state { | 75 struct vp9_token_state { |
| 105 int rate; | 76 int rate; |
| 106 int error; | 77 int error; |
| 107 int next; | 78 int next; |
| 108 signed char token; | 79 signed char token; |
| 109 short qc; | 80 short qc; |
| 110 }; | 81 }; |
| 111 | 82 |
| 112 // TODO: experiments to find optimal multiple numbers | 83 // TODO(jimbankoski): experiment to find optimal RD numbers. |
| 113 #define Y1_RD_MULT 4 | 84 #define Y1_RD_MULT 4 |
| 114 #define UV_RD_MULT 2 | 85 #define UV_RD_MULT 2 |
| 115 | 86 |
| 116 static const int plane_rd_mult[4] = { | 87 static const int plane_rd_mult[4] = { |
| 117 Y1_RD_MULT, | 88 Y1_RD_MULT, |
| 118 UV_RD_MULT, | 89 UV_RD_MULT, |
| 119 }; | 90 }; |
| 120 | 91 |
| 121 #define UPDATE_RD_COST()\ | 92 #define UPDATE_RD_COST()\ |
| 122 {\ | 93 {\ |
| (...skipping 17 matching lines...) Expand all Loading... |
| 140 token_cache[scan[idx]] = bak; | 111 token_cache[scan[idx]] = bak; |
| 141 return pt; | 112 return pt; |
| 142 } | 113 } |
| 143 | 114 |
| 144 static void optimize_b(MACROBLOCK *mb, | 115 static void optimize_b(MACROBLOCK *mb, |
| 145 int plane, int block, BLOCK_SIZE plane_bsize, | 116 int plane, int block, BLOCK_SIZE plane_bsize, |
| 146 ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, | 117 ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, |
| 147 TX_SIZE tx_size) { | 118 TX_SIZE tx_size) { |
| 148 MACROBLOCKD *const xd = &mb->e_mbd; | 119 MACROBLOCKD *const xd = &mb->e_mbd; |
| 149 struct macroblockd_plane *pd = &xd->plane[plane]; | 120 struct macroblockd_plane *pd = &xd->plane[plane]; |
| 150 const int ref = is_inter_block(&xd->this_mi->mbmi); | 121 const int ref = is_inter_block(&xd->mi_8x8[0]->mbmi); |
| 151 vp9_token_state tokens[1025][2]; | 122 vp9_token_state tokens[1025][2]; |
| 152 unsigned best_index[1025][2]; | 123 unsigned best_index[1025][2]; |
| 153 const int16_t *coeff_ptr = BLOCK_OFFSET(mb->plane[plane].coeff, block); | 124 const int16_t *coeff_ptr = BLOCK_OFFSET(mb->plane[plane].coeff, block); |
| 154 int16_t *qcoeff_ptr; | 125 int16_t *qcoeff_ptr; |
| 155 int16_t *dqcoeff_ptr; | 126 int16_t *dqcoeff_ptr; |
| 156 int eob = pd->eobs[block], final_eob, sz = 0; | 127 int eob = pd->eobs[block], final_eob, sz = 0; |
| 157 const int i0 = 0; | 128 const int i0 = 0; |
| 158 int rc, x, next, i; | 129 int rc, x, next, i; |
| 159 int64_t rdmult, rddiv, rd_cost0, rd_cost1; | 130 int64_t rdmult, rddiv, rd_cost0, rd_cost1; |
| 160 int rate0, rate1, error0, error1, t0, t1; | 131 int rate0, rate1, error0, error1, t0, t1; |
| 161 int best, band, pt; | 132 int best, band, pt; |
| 162 PLANE_TYPE type = pd->plane_type; | 133 PLANE_TYPE type = pd->plane_type; |
| 163 int err_mult = plane_rd_mult[type]; | 134 int err_mult = plane_rd_mult[type]; |
| 164 int default_eob; | 135 const int default_eob = 16 << (tx_size << 1); |
| 165 const int16_t *scan, *nb; | 136 const int16_t *scan, *nb; |
| 166 const int mul = 1 + (tx_size == TX_32X32); | 137 const int mul = 1 + (tx_size == TX_32X32); |
| 167 uint8_t token_cache[1024]; | 138 uint8_t token_cache[1024]; |
| 168 const int ib = txfrm_block_to_raster_block(plane_bsize, tx_size, block); | 139 const int ib = txfrm_block_to_raster_block(plane_bsize, tx_size, block); |
| 169 const int16_t *dequant_ptr = pd->dequant; | 140 const int16_t *dequant_ptr = pd->dequant; |
| 170 const uint8_t * band_translate; | 141 const uint8_t *const band_translate = get_band_translate(tx_size); |
| 171 | 142 |
| 172 assert((!type && !plane) || (type && plane)); | 143 assert((!type && !plane) || (type && plane)); |
| 173 dqcoeff_ptr = BLOCK_OFFSET(pd->dqcoeff, block); | 144 dqcoeff_ptr = BLOCK_OFFSET(pd->dqcoeff, block); |
| 174 qcoeff_ptr = BLOCK_OFFSET(pd->qcoeff, block); | 145 qcoeff_ptr = BLOCK_OFFSET(pd->qcoeff, block); |
| 175 switch (tx_size) { | 146 get_scan(xd, tx_size, type, ib, &scan, &nb); |
| 176 default: | |
| 177 case TX_4X4: | |
| 178 default_eob = 16; | |
| 179 scan = get_scan_4x4(get_tx_type_4x4(type, xd, ib)); | |
| 180 band_translate = vp9_coefband_trans_4x4; | |
| 181 break; | |
| 182 case TX_8X8: | |
| 183 scan = get_scan_8x8(get_tx_type_8x8(type, xd)); | |
| 184 default_eob = 64; | |
| 185 band_translate = vp9_coefband_trans_8x8plus; | |
| 186 break; | |
| 187 case TX_16X16: | |
| 188 scan = get_scan_16x16(get_tx_type_16x16(type, xd)); | |
| 189 default_eob = 256; | |
| 190 band_translate = vp9_coefband_trans_8x8plus; | |
| 191 break; | |
| 192 case TX_32X32: | |
| 193 scan = vp9_default_scan_32x32; | |
| 194 default_eob = 1024; | |
| 195 band_translate = vp9_coefband_trans_8x8plus; | |
| 196 break; | |
| 197 } | |
| 198 assert(eob <= default_eob); | 147 assert(eob <= default_eob); |
| 199 | 148 |
| 200 /* Now set up a Viterbi trellis to evaluate alternative roundings. */ | 149 /* Now set up a Viterbi trellis to evaluate alternative roundings. */ |
| 201 rdmult = mb->rdmult * err_mult; | 150 rdmult = mb->rdmult * err_mult; |
| 202 if (mb->e_mbd.mi_8x8[0]->mbmi.ref_frame[0] == INTRA_FRAME) | 151 if (mb->e_mbd.mi_8x8[0]->mbmi.ref_frame[0] == INTRA_FRAME) |
| 203 rdmult = (rdmult * 9) >> 4; | 152 rdmult = (rdmult * 9) >> 4; |
| 204 rddiv = mb->rddiv; | 153 rddiv = mb->rddiv; |
| 205 /* Initialize the sentinel node of the trellis. */ | 154 /* Initialize the sentinel node of the trellis. */ |
| 206 tokens[eob][0].rate = 0; | 155 tokens[eob][0].rate = 0; |
| 207 tokens[eob][0].error = 0; | 156 tokens[eob][0].error = 0; |
| 208 tokens[eob][0].next = default_eob; | 157 tokens[eob][0].next = default_eob; |
| 209 tokens[eob][0].token = DCT_EOB_TOKEN; | 158 tokens[eob][0].token = DCT_EOB_TOKEN; |
| 210 tokens[eob][0].qc = 0; | 159 tokens[eob][0].qc = 0; |
| 211 *(tokens[eob] + 1) = *(tokens[eob] + 0); | 160 *(tokens[eob] + 1) = *(tokens[eob] + 0); |
| 212 next = eob; | 161 next = eob; |
| 213 for (i = 0; i < eob; i++) | 162 for (i = 0; i < eob; i++) |
| 214 token_cache[scan[i]] = vp9_pt_energy_class[vp9_dct_value_tokens_ptr[ | 163 token_cache[scan[i]] = vp9_pt_energy_class[vp9_dct_value_tokens_ptr[ |
| 215 qcoeff_ptr[scan[i]]].token]; | 164 qcoeff_ptr[scan[i]]].token]; |
| 216 nb = vp9_get_coef_neighbors_handle(scan); | |
| 217 | 165 |
| 218 for (i = eob; i-- > i0;) { | 166 for (i = eob; i-- > i0;) { |
| 219 int base_bits, d2, dx; | 167 int base_bits, d2, dx; |
| 220 | 168 |
| 221 rc = scan[i]; | 169 rc = scan[i]; |
| 222 x = qcoeff_ptr[rc]; | 170 x = qcoeff_ptr[rc]; |
| 223 /* Only add a trellis state for non-zero coefficients. */ | 171 /* Only add a trellis state for non-zero coefficients. */ |
| 224 if (x) { | 172 if (x) { |
| 225 int shortcut = 0; | 173 int shortcut = 0; |
| 226 error0 = tokens[next][0].error; | 174 error0 = tokens[next][0].error; |
| (...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 305 d2 = dx * dx; | 253 d2 = dx * dx; |
| 306 } | 254 } |
| 307 tokens[i][1].rate = base_bits + (best ? rate1 : rate0); | 255 tokens[i][1].rate = base_bits + (best ? rate1 : rate0); |
| 308 tokens[i][1].error = d2 + (best ? error1 : error0); | 256 tokens[i][1].error = d2 + (best ? error1 : error0); |
| 309 tokens[i][1].next = next; | 257 tokens[i][1].next = next; |
| 310 tokens[i][1].token = best ? t1 : t0; | 258 tokens[i][1].token = best ? t1 : t0; |
| 311 tokens[i][1].qc = x; | 259 tokens[i][1].qc = x; |
| 312 best_index[i][1] = best; | 260 best_index[i][1] = best; |
| 313 /* Finally, make this the new head of the trellis. */ | 261 /* Finally, make this the new head of the trellis. */ |
| 314 next = i; | 262 next = i; |
| 315 } | 263 } else { |
| 316 /* There's no choice to make for a zero coefficient, so we don't | 264 /* There's no choice to make for a zero coefficient, so we don't |
| 317 * add a new trellis node, but we do need to update the costs. | 265 * add a new trellis node, but we do need to update the costs. |
| 318 */ | 266 */ |
| 319 else { | |
| 320 band = get_coef_band(band_translate, i + 1); | 267 band = get_coef_band(band_translate, i + 1); |
| 321 t0 = tokens[next][0].token; | 268 t0 = tokens[next][0].token; |
| 322 t1 = tokens[next][1].token; | 269 t1 = tokens[next][1].token; |
| 323 /* Update the cost of each path if we're past the EOB token. */ | 270 /* Update the cost of each path if we're past the EOB token. */ |
| 324 if (t0 != DCT_EOB_TOKEN) { | 271 if (t0 != DCT_EOB_TOKEN) { |
| 325 tokens[next][0].rate += | 272 tokens[next][0].rate += |
| 326 mb->token_costs[tx_size][type][ref][band][1][0][t0]; | 273 mb->token_costs[tx_size][type][ref][band][1][0][t0]; |
| 327 tokens[next][0].token = ZERO_TOKEN; | 274 tokens[next][0].token = ZERO_TOKEN; |
| 328 } | 275 } |
| 329 if (t1 != DCT_EOB_TOKEN) { | 276 if (t1 != DCT_EOB_TOKEN) { |
| (...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 378 &ctx->ta[plane][x], &ctx->tl[plane][y], tx_size); | 325 &ctx->ta[plane][x], &ctx->tl[plane][y], tx_size); |
| 379 } | 326 } |
| 380 | 327 |
| 381 static void optimize_init_b(int plane, BLOCK_SIZE bsize, | 328 static void optimize_init_b(int plane, BLOCK_SIZE bsize, |
| 382 struct encode_b_args *args) { | 329 struct encode_b_args *args) { |
| 383 const MACROBLOCKD *xd = &args->x->e_mbd; | 330 const MACROBLOCKD *xd = &args->x->e_mbd; |
| 384 const struct macroblockd_plane* const pd = &xd->plane[plane]; | 331 const struct macroblockd_plane* const pd = &xd->plane[plane]; |
| 385 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); | 332 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); |
| 386 const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; | 333 const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; |
| 387 const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; | 334 const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; |
| 388 const MB_MODE_INFO *mbmi = &xd->this_mi->mbmi; | 335 const MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; |
| 389 const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi) : mbmi->tx_size; | 336 const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi) : mbmi->tx_size; |
| 390 int i; | |
| 391 | 337 |
| 392 switch (tx_size) { | 338 vp9_get_entropy_contexts(tx_size, args->ctx->ta[plane], args->ctx->tl[plane], |
| 393 case TX_4X4: | 339 pd->above_context, pd->left_context, |
| 394 vpx_memcpy(args->ctx->ta[plane], pd->above_context, | 340 num_4x4_w, num_4x4_h); |
| 395 sizeof(ENTROPY_CONTEXT) * num_4x4_w); | |
| 396 vpx_memcpy(args->ctx->tl[plane], pd->left_context, | |
| 397 sizeof(ENTROPY_CONTEXT) * num_4x4_h); | |
| 398 break; | |
| 399 case TX_8X8: | |
| 400 for (i = 0; i < num_4x4_w; i += 2) | |
| 401 args->ctx->ta[plane][i] = !!*(uint16_t *)&pd->above_context[i]; | |
| 402 for (i = 0; i < num_4x4_h; i += 2) | |
| 403 args->ctx->tl[plane][i] = !!*(uint16_t *)&pd->left_context[i]; | |
| 404 break; | |
| 405 case TX_16X16: | |
| 406 for (i = 0; i < num_4x4_w; i += 4) | |
| 407 args->ctx->ta[plane][i] = !!*(uint32_t *)&pd->above_context[i]; | |
| 408 for (i = 0; i < num_4x4_h; i += 4) | |
| 409 args->ctx->tl[plane][i] = !!*(uint32_t *)&pd->left_context[i]; | |
| 410 break; | |
| 411 case TX_32X32: | |
| 412 for (i = 0; i < num_4x4_w; i += 8) | |
| 413 args->ctx->ta[plane][i] = !!*(uint64_t *)&pd->above_context[i]; | |
| 414 for (i = 0; i < num_4x4_h; i += 8) | |
| 415 args->ctx->tl[plane][i] = !!*(uint64_t *)&pd->left_context[i]; | |
| 416 break; | |
| 417 default: | |
| 418 assert(0); | |
| 419 } | |
| 420 } | 341 } |
| 421 | 342 |
| 422 void vp9_xform_quant(int plane, int block, BLOCK_SIZE plane_bsize, | 343 void vp9_xform_quant(int plane, int block, BLOCK_SIZE plane_bsize, |
| 423 TX_SIZE tx_size, void *arg) { | 344 TX_SIZE tx_size, void *arg) { |
| 424 struct encode_b_args* const args = arg; | 345 struct encode_b_args* const args = arg; |
| 425 MACROBLOCK* const x = args->x; | 346 MACROBLOCK* const x = args->x; |
| 426 MACROBLOCKD* const xd = &x->e_mbd; | 347 MACROBLOCKD* const xd = &x->e_mbd; |
| 427 struct macroblock_plane *const p = &x->plane[plane]; | 348 struct macroblock_plane *const p = &x->plane[plane]; |
| 428 struct macroblockd_plane *const pd = &xd->plane[plane]; | 349 struct macroblockd_plane *const pd = &xd->plane[plane]; |
| 429 int16_t *coeff = BLOCK_OFFSET(p->coeff, block); | 350 int16_t *coeff = BLOCK_OFFSET(p->coeff, block); |
| 430 int16_t *qcoeff = BLOCK_OFFSET(pd->qcoeff, block); | 351 int16_t *qcoeff = BLOCK_OFFSET(pd->qcoeff, block); |
| 431 int16_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); | 352 int16_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); |
| 432 const int16_t *scan, *iscan; | 353 const int16_t *scan, *iscan; |
| 433 uint16_t *eob = &pd->eobs[block]; | 354 uint16_t *eob = &pd->eobs[block]; |
| 434 const int bwl = b_width_log2(plane_bsize), bw = 1 << bwl; | 355 const int bwl = b_width_log2(plane_bsize), bw = 1 << bwl; |
| 435 const int twl = bwl - tx_size, twmask = (1 << twl) - 1; | 356 const int twl = bwl - tx_size, twmask = (1 << twl) - 1; |
| 436 int xoff, yoff; | 357 int xoff, yoff; |
| 437 int16_t *src_diff; | 358 int16_t *src_diff; |
| 438 | 359 |
| 439 switch (tx_size) { | 360 switch (tx_size) { |
| 440 case TX_32X32: | 361 case TX_32X32: |
| 441 scan = vp9_default_scan_32x32; | 362 scan = vp9_default_scan_32x32; |
| 442 iscan = vp9_default_iscan_32x32; | 363 iscan = vp9_default_iscan_32x32; |
| 443 block >>= 6; | 364 block >>= 6; |
| 444 xoff = 32 * (block & twmask); | 365 xoff = 32 * (block & twmask); |
| 445 yoff = 32 * (block >> twl); | 366 yoff = 32 * (block >> twl); |
| 446 src_diff = p->src_diff + 4 * bw * yoff + xoff; | 367 src_diff = p->src_diff + 4 * bw * yoff + xoff; |
| 447 if (x->use_lp32x32fdct) | 368 if (x->use_lp32x32fdct) |
| 448 vp9_short_fdct32x32_rd(src_diff, coeff, bw * 8); | 369 vp9_fdct32x32_rd(src_diff, coeff, bw * 4); |
| 449 else | 370 else |
| 450 vp9_short_fdct32x32(src_diff, coeff, bw * 8); | 371 vp9_fdct32x32(src_diff, coeff, bw * 4); |
| 451 vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round, | 372 vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round, |
| 452 p->quant, p->quant_shift, qcoeff, dqcoeff, | 373 p->quant, p->quant_shift, qcoeff, dqcoeff, |
| 453 pd->dequant, p->zbin_extra, eob, scan, iscan); | 374 pd->dequant, p->zbin_extra, eob, scan, iscan); |
| 454 break; | 375 break; |
| 455 case TX_16X16: | 376 case TX_16X16: |
| 456 scan = vp9_default_scan_16x16; | 377 scan = vp9_default_scan_16x16; |
| 457 iscan = vp9_default_iscan_16x16; | 378 iscan = vp9_default_iscan_16x16; |
| 458 block >>= 4; | 379 block >>= 4; |
| 459 xoff = 16 * (block & twmask); | 380 xoff = 16 * (block & twmask); |
| 460 yoff = 16 * (block >> twl); | 381 yoff = 16 * (block >> twl); |
| 461 src_diff = p->src_diff + 4 * bw * yoff + xoff; | 382 src_diff = p->src_diff + 4 * bw * yoff + xoff; |
| 462 x->fwd_txm16x16(src_diff, coeff, bw * 8); | 383 vp9_fdct16x16(src_diff, coeff, bw * 4); |
| 463 vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, | 384 vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, |
| 464 p->quant, p->quant_shift, qcoeff, dqcoeff, | 385 p->quant, p->quant_shift, qcoeff, dqcoeff, |
| 465 pd->dequant, p->zbin_extra, eob, scan, iscan); | 386 pd->dequant, p->zbin_extra, eob, scan, iscan); |
| 466 break; | 387 break; |
| 467 case TX_8X8: | 388 case TX_8X8: |
| 468 scan = vp9_default_scan_8x8; | 389 scan = vp9_default_scan_8x8; |
| 469 iscan = vp9_default_iscan_8x8; | 390 iscan = vp9_default_iscan_8x8; |
| 470 block >>= 2; | 391 block >>= 2; |
| 471 xoff = 8 * (block & twmask); | 392 xoff = 8 * (block & twmask); |
| 472 yoff = 8 * (block >> twl); | 393 yoff = 8 * (block >> twl); |
| 473 src_diff = p->src_diff + 4 * bw * yoff + xoff; | 394 src_diff = p->src_diff + 4 * bw * yoff + xoff; |
| 474 x->fwd_txm8x8(src_diff, coeff, bw * 8); | 395 vp9_fdct8x8(src_diff, coeff, bw * 4); |
| 475 vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, | 396 vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, |
| 476 p->quant, p->quant_shift, qcoeff, dqcoeff, | 397 p->quant, p->quant_shift, qcoeff, dqcoeff, |
| 477 pd->dequant, p->zbin_extra, eob, scan, iscan); | 398 pd->dequant, p->zbin_extra, eob, scan, iscan); |
| 478 break; | 399 break; |
| 479 case TX_4X4: | 400 case TX_4X4: |
| 480 scan = vp9_default_scan_4x4; | 401 scan = vp9_default_scan_4x4; |
| 481 iscan = vp9_default_iscan_4x4; | 402 iscan = vp9_default_iscan_4x4; |
| 482 xoff = 4 * (block & twmask); | 403 xoff = 4 * (block & twmask); |
| 483 yoff = 4 * (block >> twl); | 404 yoff = 4 * (block >> twl); |
| 484 src_diff = p->src_diff + 4 * bw * yoff + xoff; | 405 src_diff = p->src_diff + 4 * bw * yoff + xoff; |
| 485 x->fwd_txm4x4(src_diff, coeff, bw * 8); | 406 x->fwd_txm4x4(src_diff, coeff, bw * 4); |
| 486 vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, | 407 vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, |
| 487 p->quant, p->quant_shift, qcoeff, dqcoeff, | 408 p->quant, p->quant_shift, qcoeff, dqcoeff, |
| 488 pd->dequant, p->zbin_extra, eob, scan, iscan); | 409 pd->dequant, p->zbin_extra, eob, scan, iscan); |
| 489 break; | 410 break; |
| 490 default: | 411 default: |
| 491 assert(0); | 412 assert(0); |
| 492 } | 413 } |
| 493 } | 414 } |
| 494 | 415 |
| 495 static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize, | 416 static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize, |
| 496 TX_SIZE tx_size, void *arg) { | 417 TX_SIZE tx_size, void *arg) { |
| 497 struct encode_b_args *const args = arg; | 418 struct encode_b_args *const args = arg; |
| 498 MACROBLOCK *const x = args->x; | 419 MACROBLOCK *const x = args->x; |
| 499 MACROBLOCKD *const xd = &x->e_mbd; | 420 MACROBLOCKD *const xd = &x->e_mbd; |
| 421 struct optimize_ctx *const ctx = args->ctx; |
| 500 struct macroblockd_plane *const pd = &xd->plane[plane]; | 422 struct macroblockd_plane *const pd = &xd->plane[plane]; |
| 501 const int raster_block = txfrm_block_to_raster_block(plane_bsize, tx_size, | 423 const int raster_block = txfrm_block_to_raster_block(plane_bsize, tx_size, |
| 502 block); | 424 block); |
| 503 | 425 |
| 504 int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); | 426 int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); |
| 505 uint8_t *const dst = raster_block_offset_uint8(plane_bsize, raster_block, | 427 uint8_t *const dst = raster_block_offset_uint8(plane_bsize, raster_block, |
| 506 pd->dst.buf, pd->dst.stride); | 428 pd->dst.buf, pd->dst.stride); |
| 429 |
| 430 // TODO(jingning): per transformed block zero forcing only enabled for |
| 431 // luma component. will integrate chroma components as well. |
| 432 if (x->zcoeff_blk[tx_size][block] && plane == 0) { |
| 433 int x, y; |
| 434 pd->eobs[block] = 0; |
| 435 txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x, &y); |
| 436 ctx->ta[plane][x] = 0; |
| 437 ctx->tl[plane][y] = 0; |
| 438 return; |
| 439 } |
| 440 |
| 507 vp9_xform_quant(plane, block, plane_bsize, tx_size, arg); | 441 vp9_xform_quant(plane, block, plane_bsize, tx_size, arg); |
| 508 | 442 |
| 509 if (x->optimize) | 443 if (x->optimize) |
| 510 vp9_optimize_b(plane, block, plane_bsize, tx_size, x, args->ctx); | 444 vp9_optimize_b(plane, block, plane_bsize, tx_size, x, ctx); |
| 511 | 445 |
| 512 if (x->skip_encode || pd->eobs[block] == 0) | 446 if (x->skip_encode || pd->eobs[block] == 0) |
| 513 return; | 447 return; |
| 514 | 448 |
| 515 switch (tx_size) { | 449 switch (tx_size) { |
| 516 case TX_32X32: | 450 case TX_32X32: |
| 517 vp9_short_idct32x32_add(dqcoeff, dst, pd->dst.stride); | 451 vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, pd->eobs[block]); |
| 518 break; | 452 break; |
| 519 case TX_16X16: | 453 case TX_16X16: |
| 520 inverse_transform_b_16x16_add(pd->eobs[block], dqcoeff, dst, | 454 vp9_idct16x16_add(dqcoeff, dst, pd->dst.stride, pd->eobs[block]); |
| 521 pd->dst.stride); | |
| 522 break; | 455 break; |
| 523 case TX_8X8: | 456 case TX_8X8: |
| 524 inverse_transform_b_8x8_add(pd->eobs[block], dqcoeff, dst, | 457 vp9_idct8x8_add(dqcoeff, dst, pd->dst.stride, pd->eobs[block]); |
| 525 pd->dst.stride); | |
| 526 break; | 458 break; |
| 527 case TX_4X4: | 459 case TX_4X4: |
| 528 // this is like vp9_short_idct4x4 but has a special case around eob<=1 | 460 // this is like vp9_short_idct4x4 but has a special case around eob<=1 |
| 529 // which is significant (not just an optimization) for the lossless | 461 // which is significant (not just an optimization) for the lossless |
| 530 // case. | 462 // case. |
| 531 inverse_transform_b_4x4_add(xd, pd->eobs[block], dqcoeff, | 463 xd->itxm_add(dqcoeff, dst, pd->dst.stride, pd->eobs[block]); |
| 532 dst, pd->dst.stride); | |
| 533 break; | 464 break; |
| 534 default: | 465 default: |
| 535 assert(!"Invalid transform size"); | 466 assert(!"Invalid transform size"); |
| 536 } | 467 } |
| 537 } | 468 } |
| 538 | 469 |
| 470 static void encode_block_pass1(int plane, int block, BLOCK_SIZE plane_bsize, |
| 471 TX_SIZE tx_size, void *arg) { |
| 472 struct encode_b_args *const args = arg; |
| 473 MACROBLOCK *const x = args->x; |
| 474 MACROBLOCKD *const xd = &x->e_mbd; |
| 475 struct macroblockd_plane *const pd = &xd->plane[plane]; |
| 476 const int raster_block = txfrm_block_to_raster_block(plane_bsize, tx_size, |
| 477 block); |
| 478 |
| 479 int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); |
| 480 uint8_t *const dst = raster_block_offset_uint8(plane_bsize, raster_block, |
| 481 pd->dst.buf, pd->dst.stride); |
| 482 |
| 483 vp9_xform_quant(plane, block, plane_bsize, tx_size, arg); |
| 484 |
| 485 if (pd->eobs[block] == 0) |
| 486 return; |
| 487 |
| 488 xd->itxm_add(dqcoeff, dst, pd->dst.stride, pd->eobs[block]); |
| 489 } |
| 490 |
| 539 void vp9_encode_sby(MACROBLOCK *x, BLOCK_SIZE bsize) { | 491 void vp9_encode_sby(MACROBLOCK *x, BLOCK_SIZE bsize) { |
| 540 MACROBLOCKD *const xd = &x->e_mbd; | 492 MACROBLOCKD *const xd = &x->e_mbd; |
| 541 struct optimize_ctx ctx; | 493 struct optimize_ctx ctx; |
| 542 struct encode_b_args arg = {x, &ctx}; | 494 struct encode_b_args arg = {x, &ctx}; |
| 543 | 495 |
| 544 vp9_subtract_sby(x, bsize); | 496 vp9_subtract_sby(x, bsize); |
| 545 if (x->optimize) | 497 if (x->optimize) |
| 546 optimize_init_b(0, bsize, &arg); | 498 optimize_init_b(0, bsize, &arg); |
| 547 | 499 |
| 548 foreach_transformed_block_in_plane(xd, bsize, 0, encode_block, &arg); | 500 foreach_transformed_block_in_plane(xd, bsize, 0, encode_block_pass1, &arg); |
| 549 } | 501 } |
| 550 | 502 |
| 551 void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) { | 503 void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) { |
| 552 MACROBLOCKD *const xd = &x->e_mbd; | 504 MACROBLOCKD *const xd = &x->e_mbd; |
| 553 struct optimize_ctx ctx; | 505 struct optimize_ctx ctx; |
| 554 struct encode_b_args arg = {x, &ctx}; | 506 struct encode_b_args arg = {x, &ctx}; |
| 555 | 507 |
| 556 vp9_subtract_sb(x, bsize); | 508 vp9_subtract_sb(x, bsize); |
| 557 | 509 |
| 558 if (x->optimize) { | 510 if (x->optimize) { |
| 559 int i; | 511 int i; |
| 560 for (i = 0; i < MAX_MB_PLANE; ++i) | 512 for (i = 0; i < MAX_MB_PLANE; ++i) |
| 561 optimize_init_b(i, bsize, &arg); | 513 optimize_init_b(i, bsize, &arg); |
| 562 } | 514 } |
| 563 | 515 |
| 564 foreach_transformed_block(xd, bsize, encode_block, &arg); | 516 foreach_transformed_block(xd, bsize, encode_block, &arg); |
| 565 } | 517 } |
| 566 | 518 |
| 567 void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, | 519 void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, |
| 568 TX_SIZE tx_size, void *arg) { | 520 TX_SIZE tx_size, void *arg) { |
| 569 struct encode_b_args* const args = arg; | 521 struct encode_b_args* const args = arg; |
| 570 MACROBLOCK *const x = args->x; | 522 MACROBLOCK *const x = args->x; |
| 571 MACROBLOCKD *const xd = &x->e_mbd; | 523 MACROBLOCKD *const xd = &x->e_mbd; |
| 572 MB_MODE_INFO *mbmi = &xd->this_mi->mbmi; | 524 MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; |
| 573 struct macroblock_plane *const p = &x->plane[plane]; | 525 struct macroblock_plane *const p = &x->plane[plane]; |
| 574 struct macroblockd_plane *const pd = &xd->plane[plane]; | 526 struct macroblockd_plane *const pd = &xd->plane[plane]; |
| 575 int16_t *coeff = BLOCK_OFFSET(p->coeff, block); | 527 int16_t *coeff = BLOCK_OFFSET(p->coeff, block); |
| 576 int16_t *qcoeff = BLOCK_OFFSET(pd->qcoeff, block); | 528 int16_t *qcoeff = BLOCK_OFFSET(pd->qcoeff, block); |
| 577 int16_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); | 529 int16_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); |
| 578 const int16_t *scan, *iscan; | 530 const int16_t *scan, *iscan; |
| 579 TX_TYPE tx_type; | 531 TX_TYPE tx_type; |
| 580 MB_PREDICTION_MODE mode; | 532 MB_PREDICTION_MODE mode; |
| 581 const int bwl = b_width_log2(plane_bsize), bw = 1 << bwl; | 533 const int bwl = b_width_log2(plane_bsize), bw = 1 << bwl; |
| 582 const int twl = bwl - tx_size, twmask = (1 << twl) - 1; | 534 const int twl = bwl - tx_size, twmask = (1 << twl) - 1; |
| (...skipping 17 matching lines...) Expand all Loading... |
| 600 xoff = 32 * (block & twmask); | 552 xoff = 32 * (block & twmask); |
| 601 yoff = 32 * (block >> twl); | 553 yoff = 32 * (block >> twl); |
| 602 dst = pd->dst.buf + yoff * pd->dst.stride + xoff; | 554 dst = pd->dst.buf + yoff * pd->dst.stride + xoff; |
| 603 src = p->src.buf + yoff * p->src.stride + xoff; | 555 src = p->src.buf + yoff * p->src.stride + xoff; |
| 604 src_diff = p->src_diff + 4 * bw * yoff + xoff; | 556 src_diff = p->src_diff + 4 * bw * yoff + xoff; |
| 605 vp9_predict_intra_block(xd, block, bwl, TX_32X32, mode, | 557 vp9_predict_intra_block(xd, block, bwl, TX_32X32, mode, |
| 606 dst, pd->dst.stride, dst, pd->dst.stride); | 558 dst, pd->dst.stride, dst, pd->dst.stride); |
| 607 vp9_subtract_block(32, 32, src_diff, bw * 4, | 559 vp9_subtract_block(32, 32, src_diff, bw * 4, |
| 608 src, p->src.stride, dst, pd->dst.stride); | 560 src, p->src.stride, dst, pd->dst.stride); |
| 609 if (x->use_lp32x32fdct) | 561 if (x->use_lp32x32fdct) |
| 610 vp9_short_fdct32x32_rd(src_diff, coeff, bw * 8); | 562 vp9_fdct32x32_rd(src_diff, coeff, bw * 4); |
| 611 else | 563 else |
| 612 vp9_short_fdct32x32(src_diff, coeff, bw * 8); | 564 vp9_fdct32x32(src_diff, coeff, bw * 4); |
| 613 vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round, | 565 vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round, |
| 614 p->quant, p->quant_shift, qcoeff, dqcoeff, | 566 p->quant, p->quant_shift, qcoeff, dqcoeff, |
| 615 pd->dequant, p->zbin_extra, eob, scan, iscan); | 567 pd->dequant, p->zbin_extra, eob, scan, iscan); |
| 616 if (!x->skip_encode && *eob) | 568 if (!x->skip_encode && *eob) |
| 617 vp9_short_idct32x32_add(dqcoeff, dst, pd->dst.stride); | 569 vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, *eob); |
| 618 break; | 570 break; |
| 619 case TX_16X16: | 571 case TX_16X16: |
| 620 tx_type = get_tx_type_16x16(pd->plane_type, xd); | 572 tx_type = get_tx_type_16x16(pd->plane_type, xd); |
| 621 scan = get_scan_16x16(tx_type); | 573 scan = get_scan_16x16(tx_type); |
| 622 iscan = get_iscan_16x16(tx_type); | 574 iscan = get_iscan_16x16(tx_type); |
| 623 mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; | 575 mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; |
| 624 block >>= 4; | 576 block >>= 4; |
| 625 xoff = 16 * (block & twmask); | 577 xoff = 16 * (block & twmask); |
| 626 yoff = 16 * (block >> twl); | 578 yoff = 16 * (block >> twl); |
| 627 dst = pd->dst.buf + yoff * pd->dst.stride + xoff; | 579 dst = pd->dst.buf + yoff * pd->dst.stride + xoff; |
| 628 src = p->src.buf + yoff * p->src.stride + xoff; | 580 src = p->src.buf + yoff * p->src.stride + xoff; |
| 629 src_diff = p->src_diff + 4 * bw * yoff + xoff; | 581 src_diff = p->src_diff + 4 * bw * yoff + xoff; |
| 630 vp9_predict_intra_block(xd, block, bwl, TX_16X16, mode, | 582 vp9_predict_intra_block(xd, block, bwl, TX_16X16, mode, |
| 631 dst, pd->dst.stride, dst, pd->dst.stride); | 583 dst, pd->dst.stride, dst, pd->dst.stride); |
| 632 vp9_subtract_block(16, 16, src_diff, bw * 4, | 584 vp9_subtract_block(16, 16, src_diff, bw * 4, |
| 633 src, p->src.stride, dst, pd->dst.stride); | 585 src, p->src.stride, dst, pd->dst.stride); |
| 634 if (tx_type != DCT_DCT) | 586 vp9_fht16x16(tx_type, src_diff, coeff, bw * 4); |
| 635 vp9_short_fht16x16(src_diff, coeff, bw * 4, tx_type); | |
| 636 else | |
| 637 x->fwd_txm16x16(src_diff, coeff, bw * 8); | |
| 638 vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, | 587 vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, |
| 639 p->quant, p->quant_shift, qcoeff, dqcoeff, | 588 p->quant, p->quant_shift, qcoeff, dqcoeff, |
| 640 pd->dequant, p->zbin_extra, eob, scan, iscan); | 589 pd->dequant, p->zbin_extra, eob, scan, iscan); |
| 641 if (!x->skip_encode && *eob) { | 590 if (!x->skip_encode && *eob) |
| 642 if (tx_type == DCT_DCT) | 591 vp9_iht16x16_add(tx_type, dqcoeff, dst, pd->dst.stride, *eob); |
| 643 inverse_transform_b_16x16_add(*eob, dqcoeff, dst, pd->dst.stride); | |
| 644 else | |
| 645 vp9_short_iht16x16_add(dqcoeff, dst, pd->dst.stride, tx_type); | |
| 646 } | |
| 647 break; | 592 break; |
| 648 case TX_8X8: | 593 case TX_8X8: |
| 649 tx_type = get_tx_type_8x8(pd->plane_type, xd); | 594 tx_type = get_tx_type_8x8(pd->plane_type, xd); |
| 650 scan = get_scan_8x8(tx_type); | 595 scan = get_scan_8x8(tx_type); |
| 651 iscan = get_iscan_8x8(tx_type); | 596 iscan = get_iscan_8x8(tx_type); |
| 652 mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; | 597 mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; |
| 653 block >>= 2; | 598 block >>= 2; |
| 654 xoff = 8 * (block & twmask); | 599 xoff = 8 * (block & twmask); |
| 655 yoff = 8 * (block >> twl); | 600 yoff = 8 * (block >> twl); |
| 656 dst = pd->dst.buf + yoff * pd->dst.stride + xoff; | 601 dst = pd->dst.buf + yoff * pd->dst.stride + xoff; |
| 657 src = p->src.buf + yoff * p->src.stride + xoff; | 602 src = p->src.buf + yoff * p->src.stride + xoff; |
| 658 src_diff = p->src_diff + 4 * bw * yoff + xoff; | 603 src_diff = p->src_diff + 4 * bw * yoff + xoff; |
| 659 vp9_predict_intra_block(xd, block, bwl, TX_8X8, mode, | 604 vp9_predict_intra_block(xd, block, bwl, TX_8X8, mode, |
| 660 dst, pd->dst.stride, dst, pd->dst.stride); | 605 dst, pd->dst.stride, dst, pd->dst.stride); |
| 661 vp9_subtract_block(8, 8, src_diff, bw * 4, | 606 vp9_subtract_block(8, 8, src_diff, bw * 4, |
| 662 src, p->src.stride, dst, pd->dst.stride); | 607 src, p->src.stride, dst, pd->dst.stride); |
| 663 if (tx_type != DCT_DCT) | 608 vp9_fht8x8(tx_type, src_diff, coeff, bw * 4); |
| 664 vp9_short_fht8x8(src_diff, coeff, bw * 4, tx_type); | |
| 665 else | |
| 666 x->fwd_txm8x8(src_diff, coeff, bw * 8); | |
| 667 vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant, | 609 vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant, |
| 668 p->quant_shift, qcoeff, dqcoeff, | 610 p->quant_shift, qcoeff, dqcoeff, |
| 669 pd->dequant, p->zbin_extra, eob, scan, iscan); | 611 pd->dequant, p->zbin_extra, eob, scan, iscan); |
| 670 if (!x->skip_encode && *eob) { | 612 if (!x->skip_encode && *eob) |
| 671 if (tx_type == DCT_DCT) | 613 vp9_iht8x8_add(tx_type, dqcoeff, dst, pd->dst.stride, *eob); |
| 672 inverse_transform_b_8x8_add(*eob, dqcoeff, dst, pd->dst.stride); | |
| 673 else | |
| 674 vp9_short_iht8x8_add(dqcoeff, dst, pd->dst.stride, tx_type); | |
| 675 } | |
| 676 break; | 614 break; |
| 677 case TX_4X4: | 615 case TX_4X4: |
| 678 tx_type = get_tx_type_4x4(pd->plane_type, xd, block); | 616 tx_type = get_tx_type_4x4(pd->plane_type, xd, block); |
| 679 scan = get_scan_4x4(tx_type); | 617 scan = get_scan_4x4(tx_type); |
| 680 iscan = get_iscan_4x4(tx_type); | 618 iscan = get_iscan_4x4(tx_type); |
| 681 if (mbmi->sb_type < BLOCK_8X8 && plane == 0) | 619 if (mbmi->sb_type < BLOCK_8X8 && plane == 0) |
| 682 mode = xd->this_mi->bmi[block].as_mode; | 620 mode = xd->mi_8x8[0]->bmi[block].as_mode; |
| 683 else | 621 else |
| 684 mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; | 622 mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; |
| 685 | 623 |
| 686 xoff = 4 * (block & twmask); | 624 xoff = 4 * (block & twmask); |
| 687 yoff = 4 * (block >> twl); | 625 yoff = 4 * (block >> twl); |
| 688 dst = pd->dst.buf + yoff * pd->dst.stride + xoff; | 626 dst = pd->dst.buf + yoff * pd->dst.stride + xoff; |
| 689 src = p->src.buf + yoff * p->src.stride + xoff; | 627 src = p->src.buf + yoff * p->src.stride + xoff; |
| 690 src_diff = p->src_diff + 4 * bw * yoff + xoff; | 628 src_diff = p->src_diff + 4 * bw * yoff + xoff; |
| 691 vp9_predict_intra_block(xd, block, bwl, TX_4X4, mode, | 629 vp9_predict_intra_block(xd, block, bwl, TX_4X4, mode, |
| 692 dst, pd->dst.stride, dst, pd->dst.stride); | 630 dst, pd->dst.stride, dst, pd->dst.stride); |
| 693 vp9_subtract_block(4, 4, src_diff, bw * 4, | 631 vp9_subtract_block(4, 4, src_diff, bw * 4, |
| 694 src, p->src.stride, dst, pd->dst.stride); | 632 src, p->src.stride, dst, pd->dst.stride); |
| 695 if (tx_type != DCT_DCT) | 633 if (tx_type != DCT_DCT) |
| 696 vp9_short_fht4x4(src_diff, coeff, bw * 4, tx_type); | 634 vp9_short_fht4x4(src_diff, coeff, bw * 4, tx_type); |
| 697 else | 635 else |
| 698 x->fwd_txm4x4(src_diff, coeff, bw * 8); | 636 x->fwd_txm4x4(src_diff, coeff, bw * 4); |
| 699 vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant, | 637 vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant, |
| 700 p->quant_shift, qcoeff, dqcoeff, | 638 p->quant_shift, qcoeff, dqcoeff, |
| 701 pd->dequant, p->zbin_extra, eob, scan, iscan); | 639 pd->dequant, p->zbin_extra, eob, scan, iscan); |
| 702 if (!x->skip_encode && *eob) { | 640 if (!x->skip_encode && *eob) { |
| 703 if (tx_type == DCT_DCT) | 641 if (tx_type == DCT_DCT) |
| 704 // this is like vp9_short_idct4x4 but has a special case around eob<=1 | 642 // this is like vp9_short_idct4x4 but has a special case around eob<=1 |
| 705 // which is significant (not just an optimization) for the lossless | 643 // which is significant (not just an optimization) for the lossless |
| 706 // case. | 644 // case. |
| 707 inverse_transform_b_4x4_add(xd, *eob, dqcoeff, dst, pd->dst.stride); | 645 xd->itxm_add(dqcoeff, dst, pd->dst.stride, *eob); |
| 708 else | 646 else |
| 709 vp9_short_iht4x4_add(dqcoeff, dst, pd->dst.stride, tx_type); | 647 vp9_iht4x4_16_add(dqcoeff, dst, pd->dst.stride, tx_type); |
| 710 } | 648 } |
| 711 break; | 649 break; |
| 712 default: | 650 default: |
| 713 assert(0); | 651 assert(0); |
| 714 } | 652 } |
| 715 } | 653 } |
| 716 | 654 |
| 717 void vp9_encode_intra_block_y(MACROBLOCK *x, BLOCK_SIZE bsize) { | 655 void vp9_encode_intra_block_y(MACROBLOCK *x, BLOCK_SIZE bsize) { |
| 718 MACROBLOCKD* const xd = &x->e_mbd; | 656 MACROBLOCKD* const xd = &x->e_mbd; |
| 719 struct optimize_ctx ctx; | 657 struct optimize_ctx ctx; |
| 720 struct encode_b_args arg = {x, &ctx}; | 658 struct encode_b_args arg = {x, &ctx}; |
| 721 | 659 |
| 722 foreach_transformed_block_in_plane(xd, bsize, 0, vp9_encode_block_intra, | 660 foreach_transformed_block_in_plane(xd, bsize, 0, vp9_encode_block_intra, |
| 723 &arg); | 661 &arg); |
| 724 } | 662 } |
| 725 void vp9_encode_intra_block_uv(MACROBLOCK *x, BLOCK_SIZE bsize) { | 663 void vp9_encode_intra_block_uv(MACROBLOCK *x, BLOCK_SIZE bsize) { |
| 726 MACROBLOCKD* const xd = &x->e_mbd; | 664 MACROBLOCKD* const xd = &x->e_mbd; |
| 727 struct optimize_ctx ctx; | 665 struct optimize_ctx ctx; |
| 728 struct encode_b_args arg = {x, &ctx}; | 666 struct encode_b_args arg = {x, &ctx}; |
| 729 foreach_transformed_block_uv(xd, bsize, vp9_encode_block_intra, &arg); | 667 foreach_transformed_block_uv(xd, bsize, vp9_encode_block_intra, &arg); |
| 730 } | 668 } |
| 731 | 669 |
| OLD | NEW |