Index: source/libvpx/vp9/encoder/vp9_encodemb.c |
=================================================================== |
--- source/libvpx/vp9/encoder/vp9_encodemb.c (revision 292608) |
+++ source/libvpx/vp9/encoder/vp9_encodemb.c (working copy) |
@@ -51,6 +51,29 @@ |
} |
} |
+#if CONFIG_VP9_HIGHBITDEPTH |
+void vp9_highbd_subtract_block_c(int rows, int cols, |
+ int16_t *diff, ptrdiff_t diff_stride, |
+ const uint8_t *src8, ptrdiff_t src_stride, |
+ const uint8_t *pred8, ptrdiff_t pred_stride, |
+ int bd) { |
+ int r, c; |
+ uint16_t *src = CONVERT_TO_SHORTPTR(src8); |
+ uint16_t *pred = CONVERT_TO_SHORTPTR(pred8); |
+ (void) bd; |
+ |
+ for (r = 0; r < rows; r++) { |
+ for (c = 0; c < cols; c++) { |
+ diff[c] = src[c] - pred[c]; |
+ } |
+ |
+ diff += diff_stride; |
+ pred += pred_stride; |
+ src += src_stride; |
+ } |
+} |
+#endif // CONFIG_VP9_HIGHBITDEPTH |
+ |
void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) { |
struct macroblock_plane *const p = &x->plane[plane]; |
const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane]; |
@@ -58,6 +81,14 @@ |
const int bw = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; |
const int bh = 4 * num_4x4_blocks_high_lookup[plane_bsize]; |
+#if CONFIG_VP9_HIGHBITDEPTH |
+ if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { |
+ vp9_highbd_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, |
+ p->src.stride, pd->dst.buf, pd->dst.stride, |
+ x->e_mbd.bd); |
+ return; |
+ } |
+#endif // CONFIG_VP9_HIGHBITDEPTH |
vp9_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride, |
pd->dst.buf, pd->dst.stride); |
} |
@@ -124,6 +155,8 @@ |
int64_t rd_cost0, rd_cost1; |
int rate0, rate1, error0, error1, t0, t1; |
int best, band, pt, i, final_eob; |
+ const TOKENVALUE *dct_value_tokens; |
+ const int16_t *dct_value_cost; |
assert((!type && !plane) || (type && plane)); |
assert(eob <= default_eob); |
@@ -140,9 +173,24 @@ |
tokens[eob][0].qc = 0; |
tokens[eob][1] = tokens[eob][0]; |
+#if CONFIG_VP9_HIGHBITDEPTH |
+ if (xd->bd == 12) { |
+ dct_value_tokens = vp9_dct_value_tokens_high12_ptr; |
+ dct_value_cost = vp9_dct_value_cost_high12_ptr; |
+ } else if (xd->bd == 10) { |
+ dct_value_tokens = vp9_dct_value_tokens_high10_ptr; |
+ dct_value_cost = vp9_dct_value_cost_high10_ptr; |
+ } else { |
+ dct_value_tokens = vp9_dct_value_tokens_ptr; |
+ dct_value_cost = vp9_dct_value_cost_ptr; |
+ } |
+#else |
+ dct_value_tokens = vp9_dct_value_tokens_ptr; |
+ dct_value_cost = vp9_dct_value_cost_ptr; |
+#endif |
for (i = 0; i < eob; i++) |
token_cache[scan[i]] = |
- vp9_pt_energy_class[vp9_dct_value_tokens_ptr[qcoeff[scan[i]]].token]; |
+ vp9_pt_energy_class[dct_value_tokens[qcoeff[scan[i]]].token]; |
for (i = eob; i-- > 0;) { |
int base_bits, d2, dx; |
@@ -156,7 +204,7 @@ |
/* Evaluate the first possibility for this state. */ |
rate0 = tokens[next][0].rate; |
rate1 = tokens[next][1].rate; |
- t0 = (vp9_dct_value_tokens_ptr + x)->token; |
+ t0 = (dct_value_tokens + x)->token; |
/* Consider both possible successor states. */ |
if (next < default_eob) { |
band = band_translate[i + 1]; |
@@ -169,8 +217,13 @@ |
UPDATE_RD_COST(); |
/* And pick the best. */ |
best = rd_cost1 < rd_cost0; |
- base_bits = vp9_dct_value_cost_ptr[x]; |
+ base_bits = dct_value_cost[x]; |
dx = mul * (dqcoeff[rc] - coeff[rc]); |
+#if CONFIG_VP9_HIGHBITDEPTH |
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { |
+ dx >>= xd->bd - 8; |
+ } |
+#endif // CONFIG_VP9_HIGHBITDEPTH |
d2 = dx * dx; |
tokens[i][0].rate = base_bits + (best ? rate1 : rate0); |
tokens[i][0].error = d2 + (best ? error1 : error0); |
@@ -203,7 +256,7 @@ |
t0 = tokens[next][0].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN; |
t1 = tokens[next][1].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN; |
} else { |
- t0 = t1 = (vp9_dct_value_tokens_ptr + x)->token; |
+ t0 = t1 = (dct_value_tokens + x)->token; |
} |
if (next < default_eob) { |
band = band_translate[i + 1]; |
@@ -222,10 +275,18 @@ |
UPDATE_RD_COST(); |
/* And pick the best. */ |
best = rd_cost1 < rd_cost0; |
- base_bits = vp9_dct_value_cost_ptr[x]; |
+ base_bits = dct_value_cost[x]; |
if (shortcut) { |
+#if CONFIG_VP9_HIGHBITDEPTH |
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { |
+ dx -= ((dequant_ptr[rc != 0] >> (xd->bd - 8)) + sz) ^ sz; |
+ } else { |
+ dx -= (dequant_ptr[rc != 0] + sz) ^ sz; |
+ } |
+#else |
dx -= (dequant_ptr[rc != 0] + sz) ^ sz; |
+#endif // CONFIG_VP9_HIGHBITDEPTH |
d2 = dx * dx; |
} |
tokens[i][1].rate = base_bits + (best ? rate1 : rate0); |
@@ -303,14 +364,14 @@ |
} |
#if CONFIG_VP9_HIGHBITDEPTH |
-static INLINE void high_fdct32x32(int rd_transform, const int16_t *src, |
- tran_low_t *dst, int src_stride) { |
+static INLINE void highbd_fdct32x32(int rd_transform, const int16_t *src, |
+ tran_low_t *dst, int src_stride) { |
if (rd_transform) |
- vp9_high_fdct32x32_rd(src, dst, src_stride); |
+ vp9_highbd_fdct32x32_rd(src, dst, src_stride); |
else |
- vp9_high_fdct32x32(src, dst, src_stride); |
+ vp9_highbd_fdct32x32(src, dst, src_stride); |
} |
-#endif |
+#endif // CONFIG_VP9_HIGHBITDEPTH |
void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, |
BLOCK_SIZE plane_bsize, TX_SIZE tx_size) { |
@@ -328,6 +389,45 @@ |
txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j); |
src_diff = &p->src_diff[4 * (j * diff_stride + i)]; |
+#if CONFIG_VP9_HIGHBITDEPTH |
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { |
+ switch (tx_size) { |
+ case TX_32X32: |
+ highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride); |
+ vp9_highbd_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin, |
+ p->round_fp, p->quant_fp, p->quant_shift, |
+ qcoeff, dqcoeff, pd->dequant, |
+ p->zbin_extra, eob, scan_order->scan, |
+ scan_order->iscan); |
+ break; |
+ case TX_16X16: |
+ vp9_highbd_fdct16x16(src_diff, coeff, diff_stride); |
+ vp9_highbd_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp, |
+ p->quant_fp, p->quant_shift, qcoeff, dqcoeff, |
+ pd->dequant, p->zbin_extra, eob, |
+ scan_order->scan, scan_order->iscan); |
+ break; |
+ case TX_8X8: |
+ vp9_highbd_fdct8x8(src_diff, coeff, diff_stride); |
+ vp9_highbd_quantize_fp(coeff, 64, x->skip_block, p->zbin, p->round_fp, |
+ p->quant_fp, p->quant_shift, qcoeff, dqcoeff, |
+ pd->dequant, p->zbin_extra, eob, |
+ scan_order->scan, scan_order->iscan); |
+ break; |
+ case TX_4X4: |
+ x->fwd_txm4x4(src_diff, coeff, diff_stride); |
+ vp9_highbd_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp, |
+ p->quant_fp, p->quant_shift, qcoeff, dqcoeff, |
+ pd->dequant, p->zbin_extra, eob, |
+ scan_order->scan, scan_order->iscan); |
+ break; |
+ default: |
+ assert(0); |
+ } |
+ return; |
+ } |
+#endif // CONFIG_VP9_HIGHBITDEPTH |
+ |
switch (tx_size) { |
case TX_32X32: |
fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride); |
@@ -379,6 +479,40 @@ |
txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j); |
src_diff = &p->src_diff[4 * (j * diff_stride + i)]; |
+#if CONFIG_VP9_HIGHBITDEPTH |
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { |
+ switch (tx_size) { |
+ case TX_32X32: |
+ vp9_highbd_fdct32x32_1(src_diff, coeff, diff_stride); |
+ vp9_highbd_quantize_dc_32x32(coeff, x->skip_block, p->round, |
+ p->quant_fp[0], qcoeff, dqcoeff, |
+ pd->dequant[0], eob); |
+ break; |
+ case TX_16X16: |
+ vp9_highbd_fdct16x16_1(src_diff, coeff, diff_stride); |
+ vp9_highbd_quantize_dc(coeff, x->skip_block, p->round, |
+ p->quant_fp[0], qcoeff, dqcoeff, |
+ pd->dequant[0], eob); |
+ break; |
+ case TX_8X8: |
+ vp9_highbd_fdct8x8_1(src_diff, coeff, diff_stride); |
+ vp9_highbd_quantize_dc(coeff, x->skip_block, p->round, |
+ p->quant_fp[0], qcoeff, dqcoeff, |
+ pd->dequant[0], eob); |
+ break; |
+ case TX_4X4: |
+ x->fwd_txm4x4(src_diff, coeff, diff_stride); |
+ vp9_highbd_quantize_dc(coeff, x->skip_block, p->round, |
+ p->quant_fp[0], qcoeff, dqcoeff, |
+ pd->dequant[0], eob); |
+ break; |
+ default: |
+ assert(0); |
+ } |
+ return; |
+ } |
+#endif // CONFIG_VP9_HIGHBITDEPTH |
+ |
switch (tx_size) { |
case TX_32X32: |
vp9_fdct32x32_1(src_diff, coeff, diff_stride); |
@@ -426,6 +560,44 @@ |
txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j); |
src_diff = &p->src_diff[4 * (j * diff_stride + i)]; |
+#if CONFIG_VP9_HIGHBITDEPTH |
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { |
+ switch (tx_size) { |
+ case TX_32X32: |
+ highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride); |
+ vp9_highbd_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, |
+ p->round, p->quant, p->quant_shift, qcoeff, |
+ dqcoeff, pd->dequant, p->zbin_extra, eob, |
+ scan_order->scan, scan_order->iscan); |
+ break; |
+ case TX_16X16: |
+ vp9_highbd_fdct16x16(src_diff, coeff, diff_stride); |
+ vp9_highbd_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, |
+ p->quant, p->quant_shift, qcoeff, dqcoeff, |
+ pd->dequant, p->zbin_extra, eob, |
+ scan_order->scan, scan_order->iscan); |
+ break; |
+ case TX_8X8: |
+ vp9_highbd_fdct8x8(src_diff, coeff, diff_stride); |
+ vp9_highbd_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, |
+ p->quant, p->quant_shift, qcoeff, dqcoeff, |
+ pd->dequant, p->zbin_extra, eob, |
+ scan_order->scan, scan_order->iscan); |
+ break; |
+ case TX_4X4: |
+ x->fwd_txm4x4(src_diff, coeff, diff_stride); |
+ vp9_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, |
+ p->quant, p->quant_shift, qcoeff, dqcoeff, |
+ pd->dequant, p->zbin_extra, eob, |
+ scan_order->scan, scan_order->iscan); |
+ break; |
+ default: |
+ assert(0); |
+ } |
+ return; |
+ } |
+#endif // CONFIG_VP9_HIGHBITDEPTH |
+ |
switch (tx_size) { |
case TX_32X32: |
fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride); |
@@ -520,6 +692,34 @@ |
if (x->skip_encode || p->eobs[block] == 0) |
return; |
+#if CONFIG_VP9_HIGHBITDEPTH |
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { |
+ switch (tx_size) { |
+ case TX_32X32: |
+ vp9_highbd_idct32x32_add(dqcoeff, dst, pd->dst.stride, |
+ p->eobs[block], xd->bd); |
+ break; |
+ case TX_16X16: |
+ vp9_highbd_idct16x16_add(dqcoeff, dst, pd->dst.stride, |
+ p->eobs[block], xd->bd); |
+ break; |
+ case TX_8X8: |
+ vp9_highbd_idct8x8_add(dqcoeff, dst, pd->dst.stride, |
+ p->eobs[block], xd->bd); |
+ break; |
+ case TX_4X4: |
+ // this is like vp9_short_idct4x4 but has a special case around eob<=1 |
+ // which is significant (not just an optimization) for the lossless |
+ // case. |
+ x->highbd_itxm_add(dqcoeff, dst, pd->dst.stride, |
+ p->eobs[block], xd->bd); |
+ break; |
+ default: |
+ assert(0 && "Invalid transform size"); |
+ } |
+ return; |
+ } |
+#endif // CONFIG_VP9_HIGHBITDEPTH |
switch (tx_size) { |
case TX_32X32: |
@@ -557,8 +757,15 @@ |
vp9_xform_quant(x, plane, block, plane_bsize, tx_size); |
- if (p->eobs[block] > 0) |
+ if (p->eobs[block] > 0) { |
+#if CONFIG_VP9_HIGHBITDEPTH |
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { |
+ x->highbd_itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block], xd->bd); |
+ return; |
+ } |
+#endif // CONFIG_VP9_HIGHBITDEPTH |
x->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); |
+ } |
} |
void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize) { |
@@ -609,7 +816,7 @@ |
const scan_order *scan_order; |
TX_TYPE tx_type; |
PREDICTION_MODE mode; |
- const int bwl = b_width_log2(plane_bsize); |
+ const int bwl = b_width_log2_lookup[plane_bsize]; |
const int diff_stride = 4 * (1 << bwl); |
uint8_t *src, *dst; |
int16_t *src_diff; |
@@ -622,6 +829,117 @@ |
src = &p->src.buf[4 * (j * src_stride + i)]; |
src_diff = &p->src_diff[4 * (j * diff_stride + i)]; |
+#if CONFIG_VP9_HIGHBITDEPTH |
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { |
+ switch (tx_size) { |
+ case TX_32X32: |
+ scan_order = &vp9_default_scan_orders[TX_32X32]; |
+ mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; |
+ vp9_predict_intra_block(xd, block >> 6, bwl, TX_32X32, mode, |
+ x->skip_encode ? src : dst, |
+ x->skip_encode ? src_stride : dst_stride, |
+ dst, dst_stride, i, j, plane); |
+ if (!x->skip_recode) { |
+ vp9_highbd_subtract_block(32, 32, src_diff, diff_stride, |
+ src, src_stride, dst, dst_stride, xd->bd); |
+ highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride); |
+ vp9_highbd_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, |
+ p->round, p->quant, p->quant_shift, |
+ qcoeff, dqcoeff, pd->dequant, |
+ p->zbin_extra, eob, |
+ scan_order->scan, scan_order->iscan); |
+ } |
+ if (!x->skip_encode && *eob) { |
+ vp9_highbd_idct32x32_add(dqcoeff, dst, dst_stride, *eob, xd->bd); |
+ } |
+ break; |
+ case TX_16X16: |
+ tx_type = get_tx_type(pd->plane_type, xd); |
+ scan_order = &vp9_scan_orders[TX_16X16][tx_type]; |
+ mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; |
+ vp9_predict_intra_block(xd, block >> 4, bwl, TX_16X16, mode, |
+ x->skip_encode ? src : dst, |
+ x->skip_encode ? src_stride : dst_stride, |
+ dst, dst_stride, i, j, plane); |
+ if (!x->skip_recode) { |
+ vp9_highbd_subtract_block(16, 16, src_diff, diff_stride, |
+ src, src_stride, dst, dst_stride, xd->bd); |
+ vp9_highbd_fht16x16(src_diff, coeff, diff_stride, tx_type); |
+ vp9_highbd_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, |
+ p->quant, p->quant_shift, qcoeff, dqcoeff, |
+ pd->dequant, p->zbin_extra, eob, |
+ scan_order->scan, scan_order->iscan); |
+ } |
+ if (!x->skip_encode && *eob) { |
+ vp9_highbd_iht16x16_add(tx_type, dqcoeff, dst, dst_stride, |
+ *eob, xd->bd); |
+ } |
+ break; |
+ case TX_8X8: |
+ tx_type = get_tx_type(pd->plane_type, xd); |
+ scan_order = &vp9_scan_orders[TX_8X8][tx_type]; |
+ mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; |
+ vp9_predict_intra_block(xd, block >> 2, bwl, TX_8X8, mode, |
+ x->skip_encode ? src : dst, |
+ x->skip_encode ? src_stride : dst_stride, |
+ dst, dst_stride, i, j, plane); |
+ if (!x->skip_recode) { |
+ vp9_highbd_subtract_block(8, 8, src_diff, diff_stride, |
+ src, src_stride, dst, dst_stride, xd->bd); |
+ vp9_highbd_fht8x8(src_diff, coeff, diff_stride, tx_type); |
+ vp9_highbd_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, |
+ p->quant, p->quant_shift, qcoeff, dqcoeff, |
+ pd->dequant, p->zbin_extra, eob, |
+ scan_order->scan, scan_order->iscan); |
+ } |
+ if (!x->skip_encode && *eob) { |
+ vp9_highbd_iht8x8_add(tx_type, dqcoeff, dst, dst_stride, *eob, |
+ xd->bd); |
+ } |
+ break; |
+ case TX_4X4: |
+ tx_type = get_tx_type_4x4(pd->plane_type, xd, block); |
+ scan_order = &vp9_scan_orders[TX_4X4][tx_type]; |
+ mode = plane == 0 ? get_y_mode(xd->mi[0].src_mi, block) : mbmi->uv_mode; |
+ vp9_predict_intra_block(xd, block, bwl, TX_4X4, mode, |
+ x->skip_encode ? src : dst, |
+ x->skip_encode ? src_stride : dst_stride, |
+ dst, dst_stride, i, j, plane); |
+ |
+ if (!x->skip_recode) { |
+ vp9_highbd_subtract_block(4, 4, src_diff, diff_stride, |
+ src, src_stride, dst, dst_stride, xd->bd); |
+ if (tx_type != DCT_DCT) |
+ vp9_highbd_fht4x4(src_diff, coeff, diff_stride, tx_type); |
+ else |
+ x->fwd_txm4x4(src_diff, coeff, diff_stride); |
+ vp9_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, |
+ p->quant, p->quant_shift, qcoeff, dqcoeff, |
+ pd->dequant, p->zbin_extra, eob, |
+ scan_order->scan, scan_order->iscan); |
+ } |
+ |
+ if (!x->skip_encode && *eob) { |
+ if (tx_type == DCT_DCT) { |
+ // this is like vp9_short_idct4x4 but has a special case around |
+ // eob<=1 which is significant (not just an optimization) for the |
+ // lossless case. |
+ x->highbd_itxm_add(dqcoeff, dst, dst_stride, *eob, xd->bd); |
+ } else { |
+ vp9_highbd_iht4x4_16_add(dqcoeff, dst, dst_stride, tx_type, xd->bd); |
+ } |
+ } |
+ break; |
+ default: |
+ assert(0); |
+ return; |
+ } |
+ if (*eob) |
+ *(args->skip) = 0; |
+ return; |
+ } |
+#endif // CONFIG_VP9_HIGHBITDEPTH |
+ |
switch (tx_size) { |
case TX_32X32: |
scan_order = &vp9_default_scan_orders[TX_32X32]; |