OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
| 11 |
| 12 #include "./vp9_rtcd.h" |
11 #include "./vpx_config.h" | 13 #include "./vpx_config.h" |
| 14 |
| 15 #include "vpx_mem/vpx_mem.h" |
| 16 |
| 17 #include "vp9/common/vp9_idct.h" |
| 18 #include "vp9/common/vp9_reconinter.h" |
| 19 #include "vp9/common/vp9_reconintra.h" |
| 20 #include "vp9/common/vp9_systemdependent.h" |
| 21 |
| 22 #include "vp9/encoder/vp9_dct.h" |
12 #include "vp9/encoder/vp9_encodemb.h" | 23 #include "vp9/encoder/vp9_encodemb.h" |
13 #include "vp9/common/vp9_reconinter.h" | |
14 #include "vp9/encoder/vp9_quantize.h" | 24 #include "vp9/encoder/vp9_quantize.h" |
| 25 #include "vp9/encoder/vp9_rdopt.h" |
15 #include "vp9/encoder/vp9_tokenize.h" | 26 #include "vp9/encoder/vp9_tokenize.h" |
16 #include "vp9/common/vp9_reconintra.h" | |
17 #include "vpx_mem/vpx_mem.h" | |
18 #include "vp9/encoder/vp9_rdopt.h" | |
19 #include "vp9/common/vp9_systemdependent.h" | |
20 #include "vp9_rtcd.h" | |
21 | |
22 DECLARE_ALIGNED(16, extern const uint8_t, | |
23 vp9_pt_energy_class[MAX_ENTROPY_TOKENS]); | |
24 | 27 |
25 void vp9_subtract_block_c(int rows, int cols, | 28 void vp9_subtract_block_c(int rows, int cols, |
26 int16_t *diff_ptr, ptrdiff_t diff_stride, | 29 int16_t *diff_ptr, ptrdiff_t diff_stride, |
27 const uint8_t *src_ptr, ptrdiff_t src_stride, | 30 const uint8_t *src_ptr, ptrdiff_t src_stride, |
28 const uint8_t *pred_ptr, ptrdiff_t pred_stride) { | 31 const uint8_t *pred_ptr, ptrdiff_t pred_stride) { |
29 int r, c; | 32 int r, c; |
30 | 33 |
31 for (r = 0; r < rows; r++) { | 34 for (r = 0; r < rows; r++) { |
32 for (c = 0; c < cols; c++) | 35 for (c = 0; c < cols; c++) |
33 diff_ptr[c] = src_ptr[c] - pred_ptr[c]; | 36 diff_ptr[c] = src_ptr[c] - pred_ptr[c]; |
34 | 37 |
35 diff_ptr += diff_stride; | 38 diff_ptr += diff_stride; |
36 pred_ptr += pred_stride; | 39 pred_ptr += pred_stride; |
37 src_ptr += src_stride; | 40 src_ptr += src_stride; |
38 } | 41 } |
39 } | 42 } |
40 | 43 |
41 static void inverse_transform_b_4x4_add(MACROBLOCKD *xd, int eob, | |
42 int16_t *dqcoeff, uint8_t *dest, | |
43 int stride) { | |
44 if (eob <= 1) | |
45 xd->inv_txm4x4_1_add(dqcoeff, dest, stride); | |
46 else | |
47 xd->inv_txm4x4_add(dqcoeff, dest, stride); | |
48 } | |
49 | |
50 static void inverse_transform_b_8x8_add(int eob, | |
51 int16_t *dqcoeff, uint8_t *dest, | |
52 int stride) { | |
53 if (eob <= 1) | |
54 vp9_short_idct8x8_1_add(dqcoeff, dest, stride); | |
55 else if (eob <= 10) | |
56 vp9_short_idct10_8x8_add(dqcoeff, dest, stride); | |
57 else | |
58 vp9_short_idct8x8_add(dqcoeff, dest, stride); | |
59 } | |
60 | |
61 static void inverse_transform_b_16x16_add(int eob, | |
62 int16_t *dqcoeff, uint8_t *dest, | |
63 int stride) { | |
64 if (eob <= 1) | |
65 vp9_short_idct16x16_1_add(dqcoeff, dest, stride); | |
66 else if (eob <= 10) | |
67 vp9_short_idct10_16x16_add(dqcoeff, dest, stride); | |
68 else | |
69 vp9_short_idct16x16_add(dqcoeff, dest, stride); | |
70 } | |
71 | |
72 static void subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) { | 44 static void subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) { |
73 struct macroblock_plane *const p = &x->plane[plane]; | 45 struct macroblock_plane *const p = &x->plane[plane]; |
74 const MACROBLOCKD *const xd = &x->e_mbd; | 46 const MACROBLOCKD *const xd = &x->e_mbd; |
75 const struct macroblockd_plane *const pd = &xd->plane[plane]; | 47 const struct macroblockd_plane *const pd = &xd->plane[plane]; |
76 const int bw = plane_block_width(bsize, pd); | 48 const int bw = plane_block_width(bsize, pd); |
77 const int bh = plane_block_height(bsize, pd); | 49 const int bh = plane_block_height(bsize, pd); |
78 | 50 |
79 vp9_subtract_block(bh, bw, p->src_diff, bw, | 51 vp9_subtract_block(bh, bw, p->src_diff, bw, |
80 p->src.buf, p->src.stride, | 52 p->src.buf, p->src.stride, |
81 pd->dst.buf, pd->dst.stride); | 53 pd->dst.buf, pd->dst.stride); |
82 } | 54 } |
83 | 55 |
84 void vp9_subtract_sby(MACROBLOCK *x, BLOCK_SIZE bsize) { | 56 void vp9_subtract_sby(MACROBLOCK *x, BLOCK_SIZE bsize) { |
85 subtract_plane(x, bsize, 0); | 57 subtract_plane(x, bsize, 0); |
86 } | 58 } |
87 | 59 |
88 void vp9_subtract_sbuv(MACROBLOCK *x, BLOCK_SIZE bsize) { | 60 void vp9_subtract_sbuv(MACROBLOCK *x, BLOCK_SIZE bsize) { |
89 int i; | 61 int i; |
90 | 62 |
91 for (i = 1; i < MAX_MB_PLANE; i++) | 63 for (i = 1; i < MAX_MB_PLANE; i++) |
92 subtract_plane(x, bsize, i); | 64 subtract_plane(x, bsize, i); |
93 } | 65 } |
94 | 66 |
95 void vp9_subtract_sb(MACROBLOCK *x, BLOCK_SIZE bsize) { | 67 void vp9_subtract_sb(MACROBLOCK *x, BLOCK_SIZE bsize) { |
96 vp9_subtract_sby(x, bsize); | 68 vp9_subtract_sby(x, bsize); |
97 vp9_subtract_sbuv(x, bsize); | 69 vp9_subtract_sbuv(x, bsize); |
98 } | 70 } |
99 | 71 |
100 | 72 #define RDTRUNC(RM, DM, R, D) ((128 + (R) * (RM)) & 0xFF) |
101 #define RDTRUNC(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF ) | |
102 typedef struct vp9_token_state vp9_token_state; | 73 typedef struct vp9_token_state vp9_token_state; |
103 | 74 |
104 struct vp9_token_state { | 75 struct vp9_token_state { |
105 int rate; | 76 int rate; |
106 int error; | 77 int error; |
107 int next; | 78 int next; |
108 signed char token; | 79 signed char token; |
109 short qc; | 80 short qc; |
110 }; | 81 }; |
111 | 82 |
112 // TODO: experiments to find optimal multiple numbers | 83 // TODO(jimbankoski): experiment to find optimal RD numbers. |
113 #define Y1_RD_MULT 4 | 84 #define Y1_RD_MULT 4 |
114 #define UV_RD_MULT 2 | 85 #define UV_RD_MULT 2 |
115 | 86 |
116 static const int plane_rd_mult[4] = { | 87 static const int plane_rd_mult[4] = { |
117 Y1_RD_MULT, | 88 Y1_RD_MULT, |
118 UV_RD_MULT, | 89 UV_RD_MULT, |
119 }; | 90 }; |
120 | 91 |
121 #define UPDATE_RD_COST()\ | 92 #define UPDATE_RD_COST()\ |
122 {\ | 93 {\ |
(...skipping 17 matching lines...) Expand all Loading... |
140 token_cache[scan[idx]] = bak; | 111 token_cache[scan[idx]] = bak; |
141 return pt; | 112 return pt; |
142 } | 113 } |
143 | 114 |
144 static void optimize_b(MACROBLOCK *mb, | 115 static void optimize_b(MACROBLOCK *mb, |
145 int plane, int block, BLOCK_SIZE plane_bsize, | 116 int plane, int block, BLOCK_SIZE plane_bsize, |
146 ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, | 117 ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, |
147 TX_SIZE tx_size) { | 118 TX_SIZE tx_size) { |
148 MACROBLOCKD *const xd = &mb->e_mbd; | 119 MACROBLOCKD *const xd = &mb->e_mbd; |
149 struct macroblockd_plane *pd = &xd->plane[plane]; | 120 struct macroblockd_plane *pd = &xd->plane[plane]; |
150 const int ref = is_inter_block(&xd->this_mi->mbmi); | 121 const int ref = is_inter_block(&xd->mi_8x8[0]->mbmi); |
151 vp9_token_state tokens[1025][2]; | 122 vp9_token_state tokens[1025][2]; |
152 unsigned best_index[1025][2]; | 123 unsigned best_index[1025][2]; |
153 const int16_t *coeff_ptr = BLOCK_OFFSET(mb->plane[plane].coeff, block); | 124 const int16_t *coeff_ptr = BLOCK_OFFSET(mb->plane[plane].coeff, block); |
154 int16_t *qcoeff_ptr; | 125 int16_t *qcoeff_ptr; |
155 int16_t *dqcoeff_ptr; | 126 int16_t *dqcoeff_ptr; |
156 int eob = pd->eobs[block], final_eob, sz = 0; | 127 int eob = pd->eobs[block], final_eob, sz = 0; |
157 const int i0 = 0; | 128 const int i0 = 0; |
158 int rc, x, next, i; | 129 int rc, x, next, i; |
159 int64_t rdmult, rddiv, rd_cost0, rd_cost1; | 130 int64_t rdmult, rddiv, rd_cost0, rd_cost1; |
160 int rate0, rate1, error0, error1, t0, t1; | 131 int rate0, rate1, error0, error1, t0, t1; |
161 int best, band, pt; | 132 int best, band, pt; |
162 PLANE_TYPE type = pd->plane_type; | 133 PLANE_TYPE type = pd->plane_type; |
163 int err_mult = plane_rd_mult[type]; | 134 int err_mult = plane_rd_mult[type]; |
164 int default_eob; | 135 const int default_eob = 16 << (tx_size << 1); |
165 const int16_t *scan, *nb; | 136 const int16_t *scan, *nb; |
166 const int mul = 1 + (tx_size == TX_32X32); | 137 const int mul = 1 + (tx_size == TX_32X32); |
167 uint8_t token_cache[1024]; | 138 uint8_t token_cache[1024]; |
168 const int ib = txfrm_block_to_raster_block(plane_bsize, tx_size, block); | 139 const int ib = txfrm_block_to_raster_block(plane_bsize, tx_size, block); |
169 const int16_t *dequant_ptr = pd->dequant; | 140 const int16_t *dequant_ptr = pd->dequant; |
170 const uint8_t * band_translate; | 141 const uint8_t *const band_translate = get_band_translate(tx_size); |
171 | 142 |
172 assert((!type && !plane) || (type && plane)); | 143 assert((!type && !plane) || (type && plane)); |
173 dqcoeff_ptr = BLOCK_OFFSET(pd->dqcoeff, block); | 144 dqcoeff_ptr = BLOCK_OFFSET(pd->dqcoeff, block); |
174 qcoeff_ptr = BLOCK_OFFSET(pd->qcoeff, block); | 145 qcoeff_ptr = BLOCK_OFFSET(pd->qcoeff, block); |
175 switch (tx_size) { | 146 get_scan(xd, tx_size, type, ib, &scan, &nb); |
176 default: | |
177 case TX_4X4: | |
178 default_eob = 16; | |
179 scan = get_scan_4x4(get_tx_type_4x4(type, xd, ib)); | |
180 band_translate = vp9_coefband_trans_4x4; | |
181 break; | |
182 case TX_8X8: | |
183 scan = get_scan_8x8(get_tx_type_8x8(type, xd)); | |
184 default_eob = 64; | |
185 band_translate = vp9_coefband_trans_8x8plus; | |
186 break; | |
187 case TX_16X16: | |
188 scan = get_scan_16x16(get_tx_type_16x16(type, xd)); | |
189 default_eob = 256; | |
190 band_translate = vp9_coefband_trans_8x8plus; | |
191 break; | |
192 case TX_32X32: | |
193 scan = vp9_default_scan_32x32; | |
194 default_eob = 1024; | |
195 band_translate = vp9_coefband_trans_8x8plus; | |
196 break; | |
197 } | |
198 assert(eob <= default_eob); | 147 assert(eob <= default_eob); |
199 | 148 |
200 /* Now set up a Viterbi trellis to evaluate alternative roundings. */ | 149 /* Now set up a Viterbi trellis to evaluate alternative roundings. */ |
201 rdmult = mb->rdmult * err_mult; | 150 rdmult = mb->rdmult * err_mult; |
202 if (mb->e_mbd.mi_8x8[0]->mbmi.ref_frame[0] == INTRA_FRAME) | 151 if (mb->e_mbd.mi_8x8[0]->mbmi.ref_frame[0] == INTRA_FRAME) |
203 rdmult = (rdmult * 9) >> 4; | 152 rdmult = (rdmult * 9) >> 4; |
204 rddiv = mb->rddiv; | 153 rddiv = mb->rddiv; |
205 /* Initialize the sentinel node of the trellis. */ | 154 /* Initialize the sentinel node of the trellis. */ |
206 tokens[eob][0].rate = 0; | 155 tokens[eob][0].rate = 0; |
207 tokens[eob][0].error = 0; | 156 tokens[eob][0].error = 0; |
208 tokens[eob][0].next = default_eob; | 157 tokens[eob][0].next = default_eob; |
209 tokens[eob][0].token = DCT_EOB_TOKEN; | 158 tokens[eob][0].token = DCT_EOB_TOKEN; |
210 tokens[eob][0].qc = 0; | 159 tokens[eob][0].qc = 0; |
211 *(tokens[eob] + 1) = *(tokens[eob] + 0); | 160 *(tokens[eob] + 1) = *(tokens[eob] + 0); |
212 next = eob; | 161 next = eob; |
213 for (i = 0; i < eob; i++) | 162 for (i = 0; i < eob; i++) |
214 token_cache[scan[i]] = vp9_pt_energy_class[vp9_dct_value_tokens_ptr[ | 163 token_cache[scan[i]] = vp9_pt_energy_class[vp9_dct_value_tokens_ptr[ |
215 qcoeff_ptr[scan[i]]].token]; | 164 qcoeff_ptr[scan[i]]].token]; |
216 nb = vp9_get_coef_neighbors_handle(scan); | |
217 | 165 |
218 for (i = eob; i-- > i0;) { | 166 for (i = eob; i-- > i0;) { |
219 int base_bits, d2, dx; | 167 int base_bits, d2, dx; |
220 | 168 |
221 rc = scan[i]; | 169 rc = scan[i]; |
222 x = qcoeff_ptr[rc]; | 170 x = qcoeff_ptr[rc]; |
223 /* Only add a trellis state for non-zero coefficients. */ | 171 /* Only add a trellis state for non-zero coefficients. */ |
224 if (x) { | 172 if (x) { |
225 int shortcut = 0; | 173 int shortcut = 0; |
226 error0 = tokens[next][0].error; | 174 error0 = tokens[next][0].error; |
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
305 d2 = dx * dx; | 253 d2 = dx * dx; |
306 } | 254 } |
307 tokens[i][1].rate = base_bits + (best ? rate1 : rate0); | 255 tokens[i][1].rate = base_bits + (best ? rate1 : rate0); |
308 tokens[i][1].error = d2 + (best ? error1 : error0); | 256 tokens[i][1].error = d2 + (best ? error1 : error0); |
309 tokens[i][1].next = next; | 257 tokens[i][1].next = next; |
310 tokens[i][1].token = best ? t1 : t0; | 258 tokens[i][1].token = best ? t1 : t0; |
311 tokens[i][1].qc = x; | 259 tokens[i][1].qc = x; |
312 best_index[i][1] = best; | 260 best_index[i][1] = best; |
313 /* Finally, make this the new head of the trellis. */ | 261 /* Finally, make this the new head of the trellis. */ |
314 next = i; | 262 next = i; |
315 } | 263 } else { |
316 /* There's no choice to make for a zero coefficient, so we don't | 264 /* There's no choice to make for a zero coefficient, so we don't |
317 * add a new trellis node, but we do need to update the costs. | 265 * add a new trellis node, but we do need to update the costs. |
318 */ | 266 */ |
319 else { | |
320 band = get_coef_band(band_translate, i + 1); | 267 band = get_coef_band(band_translate, i + 1); |
321 t0 = tokens[next][0].token; | 268 t0 = tokens[next][0].token; |
322 t1 = tokens[next][1].token; | 269 t1 = tokens[next][1].token; |
323 /* Update the cost of each path if we're past the EOB token. */ | 270 /* Update the cost of each path if we're past the EOB token. */ |
324 if (t0 != DCT_EOB_TOKEN) { | 271 if (t0 != DCT_EOB_TOKEN) { |
325 tokens[next][0].rate += | 272 tokens[next][0].rate += |
326 mb->token_costs[tx_size][type][ref][band][1][0][t0]; | 273 mb->token_costs[tx_size][type][ref][band][1][0][t0]; |
327 tokens[next][0].token = ZERO_TOKEN; | 274 tokens[next][0].token = ZERO_TOKEN; |
328 } | 275 } |
329 if (t1 != DCT_EOB_TOKEN) { | 276 if (t1 != DCT_EOB_TOKEN) { |
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
378 &ctx->ta[plane][x], &ctx->tl[plane][y], tx_size); | 325 &ctx->ta[plane][x], &ctx->tl[plane][y], tx_size); |
379 } | 326 } |
380 | 327 |
381 static void optimize_init_b(int plane, BLOCK_SIZE bsize, | 328 static void optimize_init_b(int plane, BLOCK_SIZE bsize, |
382 struct encode_b_args *args) { | 329 struct encode_b_args *args) { |
383 const MACROBLOCKD *xd = &args->x->e_mbd; | 330 const MACROBLOCKD *xd = &args->x->e_mbd; |
384 const struct macroblockd_plane* const pd = &xd->plane[plane]; | 331 const struct macroblockd_plane* const pd = &xd->plane[plane]; |
385 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); | 332 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); |
386 const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; | 333 const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; |
387 const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; | 334 const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; |
388 const MB_MODE_INFO *mbmi = &xd->this_mi->mbmi; | 335 const MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; |
389 const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi) : mbmi->tx_size; | 336 const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi) : mbmi->tx_size; |
390 int i; | |
391 | 337 |
392 switch (tx_size) { | 338 vp9_get_entropy_contexts(tx_size, args->ctx->ta[plane], args->ctx->tl[plane], |
393 case TX_4X4: | 339 pd->above_context, pd->left_context, |
394 vpx_memcpy(args->ctx->ta[plane], pd->above_context, | 340 num_4x4_w, num_4x4_h); |
395 sizeof(ENTROPY_CONTEXT) * num_4x4_w); | |
396 vpx_memcpy(args->ctx->tl[plane], pd->left_context, | |
397 sizeof(ENTROPY_CONTEXT) * num_4x4_h); | |
398 break; | |
399 case TX_8X8: | |
400 for (i = 0; i < num_4x4_w; i += 2) | |
401 args->ctx->ta[plane][i] = !!*(uint16_t *)&pd->above_context[i]; | |
402 for (i = 0; i < num_4x4_h; i += 2) | |
403 args->ctx->tl[plane][i] = !!*(uint16_t *)&pd->left_context[i]; | |
404 break; | |
405 case TX_16X16: | |
406 for (i = 0; i < num_4x4_w; i += 4) | |
407 args->ctx->ta[plane][i] = !!*(uint32_t *)&pd->above_context[i]; | |
408 for (i = 0; i < num_4x4_h; i += 4) | |
409 args->ctx->tl[plane][i] = !!*(uint32_t *)&pd->left_context[i]; | |
410 break; | |
411 case TX_32X32: | |
412 for (i = 0; i < num_4x4_w; i += 8) | |
413 args->ctx->ta[plane][i] = !!*(uint64_t *)&pd->above_context[i]; | |
414 for (i = 0; i < num_4x4_h; i += 8) | |
415 args->ctx->tl[plane][i] = !!*(uint64_t *)&pd->left_context[i]; | |
416 break; | |
417 default: | |
418 assert(0); | |
419 } | |
420 } | 341 } |
421 | 342 |
422 void vp9_xform_quant(int plane, int block, BLOCK_SIZE plane_bsize, | 343 void vp9_xform_quant(int plane, int block, BLOCK_SIZE plane_bsize, |
423 TX_SIZE tx_size, void *arg) { | 344 TX_SIZE tx_size, void *arg) { |
424 struct encode_b_args* const args = arg; | 345 struct encode_b_args* const args = arg; |
425 MACROBLOCK* const x = args->x; | 346 MACROBLOCK* const x = args->x; |
426 MACROBLOCKD* const xd = &x->e_mbd; | 347 MACROBLOCKD* const xd = &x->e_mbd; |
427 struct macroblock_plane *const p = &x->plane[plane]; | 348 struct macroblock_plane *const p = &x->plane[plane]; |
428 struct macroblockd_plane *const pd = &xd->plane[plane]; | 349 struct macroblockd_plane *const pd = &xd->plane[plane]; |
429 int16_t *coeff = BLOCK_OFFSET(p->coeff, block); | 350 int16_t *coeff = BLOCK_OFFSET(p->coeff, block); |
430 int16_t *qcoeff = BLOCK_OFFSET(pd->qcoeff, block); | 351 int16_t *qcoeff = BLOCK_OFFSET(pd->qcoeff, block); |
431 int16_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); | 352 int16_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); |
432 const int16_t *scan, *iscan; | 353 const int16_t *scan, *iscan; |
433 uint16_t *eob = &pd->eobs[block]; | 354 uint16_t *eob = &pd->eobs[block]; |
434 const int bwl = b_width_log2(plane_bsize), bw = 1 << bwl; | 355 const int bwl = b_width_log2(plane_bsize), bw = 1 << bwl; |
435 const int twl = bwl - tx_size, twmask = (1 << twl) - 1; | 356 const int twl = bwl - tx_size, twmask = (1 << twl) - 1; |
436 int xoff, yoff; | 357 int xoff, yoff; |
437 int16_t *src_diff; | 358 int16_t *src_diff; |
438 | 359 |
439 switch (tx_size) { | 360 switch (tx_size) { |
440 case TX_32X32: | 361 case TX_32X32: |
441 scan = vp9_default_scan_32x32; | 362 scan = vp9_default_scan_32x32; |
442 iscan = vp9_default_iscan_32x32; | 363 iscan = vp9_default_iscan_32x32; |
443 block >>= 6; | 364 block >>= 6; |
444 xoff = 32 * (block & twmask); | 365 xoff = 32 * (block & twmask); |
445 yoff = 32 * (block >> twl); | 366 yoff = 32 * (block >> twl); |
446 src_diff = p->src_diff + 4 * bw * yoff + xoff; | 367 src_diff = p->src_diff + 4 * bw * yoff + xoff; |
447 if (x->use_lp32x32fdct) | 368 if (x->use_lp32x32fdct) |
448 vp9_short_fdct32x32_rd(src_diff, coeff, bw * 8); | 369 vp9_fdct32x32_rd(src_diff, coeff, bw * 4); |
449 else | 370 else |
450 vp9_short_fdct32x32(src_diff, coeff, bw * 8); | 371 vp9_fdct32x32(src_diff, coeff, bw * 4); |
451 vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round, | 372 vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round, |
452 p->quant, p->quant_shift, qcoeff, dqcoeff, | 373 p->quant, p->quant_shift, qcoeff, dqcoeff, |
453 pd->dequant, p->zbin_extra, eob, scan, iscan); | 374 pd->dequant, p->zbin_extra, eob, scan, iscan); |
454 break; | 375 break; |
455 case TX_16X16: | 376 case TX_16X16: |
456 scan = vp9_default_scan_16x16; | 377 scan = vp9_default_scan_16x16; |
457 iscan = vp9_default_iscan_16x16; | 378 iscan = vp9_default_iscan_16x16; |
458 block >>= 4; | 379 block >>= 4; |
459 xoff = 16 * (block & twmask); | 380 xoff = 16 * (block & twmask); |
460 yoff = 16 * (block >> twl); | 381 yoff = 16 * (block >> twl); |
461 src_diff = p->src_diff + 4 * bw * yoff + xoff; | 382 src_diff = p->src_diff + 4 * bw * yoff + xoff; |
462 x->fwd_txm16x16(src_diff, coeff, bw * 8); | 383 vp9_fdct16x16(src_diff, coeff, bw * 4); |
463 vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, | 384 vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, |
464 p->quant, p->quant_shift, qcoeff, dqcoeff, | 385 p->quant, p->quant_shift, qcoeff, dqcoeff, |
465 pd->dequant, p->zbin_extra, eob, scan, iscan); | 386 pd->dequant, p->zbin_extra, eob, scan, iscan); |
466 break; | 387 break; |
467 case TX_8X8: | 388 case TX_8X8: |
468 scan = vp9_default_scan_8x8; | 389 scan = vp9_default_scan_8x8; |
469 iscan = vp9_default_iscan_8x8; | 390 iscan = vp9_default_iscan_8x8; |
470 block >>= 2; | 391 block >>= 2; |
471 xoff = 8 * (block & twmask); | 392 xoff = 8 * (block & twmask); |
472 yoff = 8 * (block >> twl); | 393 yoff = 8 * (block >> twl); |
473 src_diff = p->src_diff + 4 * bw * yoff + xoff; | 394 src_diff = p->src_diff + 4 * bw * yoff + xoff; |
474 x->fwd_txm8x8(src_diff, coeff, bw * 8); | 395 vp9_fdct8x8(src_diff, coeff, bw * 4); |
475 vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, | 396 vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, |
476 p->quant, p->quant_shift, qcoeff, dqcoeff, | 397 p->quant, p->quant_shift, qcoeff, dqcoeff, |
477 pd->dequant, p->zbin_extra, eob, scan, iscan); | 398 pd->dequant, p->zbin_extra, eob, scan, iscan); |
478 break; | 399 break; |
479 case TX_4X4: | 400 case TX_4X4: |
480 scan = vp9_default_scan_4x4; | 401 scan = vp9_default_scan_4x4; |
481 iscan = vp9_default_iscan_4x4; | 402 iscan = vp9_default_iscan_4x4; |
482 xoff = 4 * (block & twmask); | 403 xoff = 4 * (block & twmask); |
483 yoff = 4 * (block >> twl); | 404 yoff = 4 * (block >> twl); |
484 src_diff = p->src_diff + 4 * bw * yoff + xoff; | 405 src_diff = p->src_diff + 4 * bw * yoff + xoff; |
485 x->fwd_txm4x4(src_diff, coeff, bw * 8); | 406 x->fwd_txm4x4(src_diff, coeff, bw * 4); |
486 vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, | 407 vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, |
487 p->quant, p->quant_shift, qcoeff, dqcoeff, | 408 p->quant, p->quant_shift, qcoeff, dqcoeff, |
488 pd->dequant, p->zbin_extra, eob, scan, iscan); | 409 pd->dequant, p->zbin_extra, eob, scan, iscan); |
489 break; | 410 break; |
490 default: | 411 default: |
491 assert(0); | 412 assert(0); |
492 } | 413 } |
493 } | 414 } |
494 | 415 |
495 static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize, | 416 static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize, |
496 TX_SIZE tx_size, void *arg) { | 417 TX_SIZE tx_size, void *arg) { |
497 struct encode_b_args *const args = arg; | 418 struct encode_b_args *const args = arg; |
498 MACROBLOCK *const x = args->x; | 419 MACROBLOCK *const x = args->x; |
499 MACROBLOCKD *const xd = &x->e_mbd; | 420 MACROBLOCKD *const xd = &x->e_mbd; |
| 421 struct optimize_ctx *const ctx = args->ctx; |
500 struct macroblockd_plane *const pd = &xd->plane[plane]; | 422 struct macroblockd_plane *const pd = &xd->plane[plane]; |
501 const int raster_block = txfrm_block_to_raster_block(plane_bsize, tx_size, | 423 const int raster_block = txfrm_block_to_raster_block(plane_bsize, tx_size, |
502 block); | 424 block); |
503 | 425 |
504 int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); | 426 int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); |
505 uint8_t *const dst = raster_block_offset_uint8(plane_bsize, raster_block, | 427 uint8_t *const dst = raster_block_offset_uint8(plane_bsize, raster_block, |
506 pd->dst.buf, pd->dst.stride); | 428 pd->dst.buf, pd->dst.stride); |
| 429 |
| 430 // TODO(jingning): per transformed block zero forcing only enabled for |
| 431 // luma component. will integrate chroma components as well. |
| 432 if (x->zcoeff_blk[tx_size][block] && plane == 0) { |
| 433 int x, y; |
| 434 pd->eobs[block] = 0; |
| 435 txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x, &y); |
| 436 ctx->ta[plane][x] = 0; |
| 437 ctx->tl[plane][y] = 0; |
| 438 return; |
| 439 } |
| 440 |
507 vp9_xform_quant(plane, block, plane_bsize, tx_size, arg); | 441 vp9_xform_quant(plane, block, plane_bsize, tx_size, arg); |
508 | 442 |
509 if (x->optimize) | 443 if (x->optimize) |
510 vp9_optimize_b(plane, block, plane_bsize, tx_size, x, args->ctx); | 444 vp9_optimize_b(plane, block, plane_bsize, tx_size, x, ctx); |
511 | 445 |
512 if (x->skip_encode || pd->eobs[block] == 0) | 446 if (x->skip_encode || pd->eobs[block] == 0) |
513 return; | 447 return; |
514 | 448 |
515 switch (tx_size) { | 449 switch (tx_size) { |
516 case TX_32X32: | 450 case TX_32X32: |
517 vp9_short_idct32x32_add(dqcoeff, dst, pd->dst.stride); | 451 vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, pd->eobs[block]); |
518 break; | 452 break; |
519 case TX_16X16: | 453 case TX_16X16: |
520 inverse_transform_b_16x16_add(pd->eobs[block], dqcoeff, dst, | 454 vp9_idct16x16_add(dqcoeff, dst, pd->dst.stride, pd->eobs[block]); |
521 pd->dst.stride); | |
522 break; | 455 break; |
523 case TX_8X8: | 456 case TX_8X8: |
524 inverse_transform_b_8x8_add(pd->eobs[block], dqcoeff, dst, | 457 vp9_idct8x8_add(dqcoeff, dst, pd->dst.stride, pd->eobs[block]); |
525 pd->dst.stride); | |
526 break; | 458 break; |
527 case TX_4X4: | 459 case TX_4X4: |
528 // this is like vp9_short_idct4x4 but has a special case around eob<=1 | 460 // this is like vp9_short_idct4x4 but has a special case around eob<=1 |
529 // which is significant (not just an optimization) for the lossless | 461 // which is significant (not just an optimization) for the lossless |
530 // case. | 462 // case. |
531 inverse_transform_b_4x4_add(xd, pd->eobs[block], dqcoeff, | 463 xd->itxm_add(dqcoeff, dst, pd->dst.stride, pd->eobs[block]); |
532 dst, pd->dst.stride); | |
533 break; | 464 break; |
534 default: | 465 default: |
535 assert(!"Invalid transform size"); | 466 assert(!"Invalid transform size"); |
536 } | 467 } |
537 } | 468 } |
538 | 469 |
| 470 static void encode_block_pass1(int plane, int block, BLOCK_SIZE plane_bsize, |
| 471 TX_SIZE tx_size, void *arg) { |
| 472 struct encode_b_args *const args = arg; |
| 473 MACROBLOCK *const x = args->x; |
| 474 MACROBLOCKD *const xd = &x->e_mbd; |
| 475 struct macroblockd_plane *const pd = &xd->plane[plane]; |
| 476 const int raster_block = txfrm_block_to_raster_block(plane_bsize, tx_size, |
| 477 block); |
| 478 |
| 479 int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); |
| 480 uint8_t *const dst = raster_block_offset_uint8(plane_bsize, raster_block, |
| 481 pd->dst.buf, pd->dst.stride); |
| 482 |
| 483 vp9_xform_quant(plane, block, plane_bsize, tx_size, arg); |
| 484 |
| 485 if (pd->eobs[block] == 0) |
| 486 return; |
| 487 |
| 488 xd->itxm_add(dqcoeff, dst, pd->dst.stride, pd->eobs[block]); |
| 489 } |
| 490 |
539 void vp9_encode_sby(MACROBLOCK *x, BLOCK_SIZE bsize) { | 491 void vp9_encode_sby(MACROBLOCK *x, BLOCK_SIZE bsize) { |
540 MACROBLOCKD *const xd = &x->e_mbd; | 492 MACROBLOCKD *const xd = &x->e_mbd; |
541 struct optimize_ctx ctx; | 493 struct optimize_ctx ctx; |
542 struct encode_b_args arg = {x, &ctx}; | 494 struct encode_b_args arg = {x, &ctx}; |
543 | 495 |
544 vp9_subtract_sby(x, bsize); | 496 vp9_subtract_sby(x, bsize); |
545 if (x->optimize) | 497 if (x->optimize) |
546 optimize_init_b(0, bsize, &arg); | 498 optimize_init_b(0, bsize, &arg); |
547 | 499 |
548 foreach_transformed_block_in_plane(xd, bsize, 0, encode_block, &arg); | 500 foreach_transformed_block_in_plane(xd, bsize, 0, encode_block_pass1, &arg); |
549 } | 501 } |
550 | 502 |
551 void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) { | 503 void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) { |
552 MACROBLOCKD *const xd = &x->e_mbd; | 504 MACROBLOCKD *const xd = &x->e_mbd; |
553 struct optimize_ctx ctx; | 505 struct optimize_ctx ctx; |
554 struct encode_b_args arg = {x, &ctx}; | 506 struct encode_b_args arg = {x, &ctx}; |
555 | 507 |
556 vp9_subtract_sb(x, bsize); | 508 vp9_subtract_sb(x, bsize); |
557 | 509 |
558 if (x->optimize) { | 510 if (x->optimize) { |
559 int i; | 511 int i; |
560 for (i = 0; i < MAX_MB_PLANE; ++i) | 512 for (i = 0; i < MAX_MB_PLANE; ++i) |
561 optimize_init_b(i, bsize, &arg); | 513 optimize_init_b(i, bsize, &arg); |
562 } | 514 } |
563 | 515 |
564 foreach_transformed_block(xd, bsize, encode_block, &arg); | 516 foreach_transformed_block(xd, bsize, encode_block, &arg); |
565 } | 517 } |
566 | 518 |
567 void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, | 519 void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, |
568 TX_SIZE tx_size, void *arg) { | 520 TX_SIZE tx_size, void *arg) { |
569 struct encode_b_args* const args = arg; | 521 struct encode_b_args* const args = arg; |
570 MACROBLOCK *const x = args->x; | 522 MACROBLOCK *const x = args->x; |
571 MACROBLOCKD *const xd = &x->e_mbd; | 523 MACROBLOCKD *const xd = &x->e_mbd; |
572 MB_MODE_INFO *mbmi = &xd->this_mi->mbmi; | 524 MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; |
573 struct macroblock_plane *const p = &x->plane[plane]; | 525 struct macroblock_plane *const p = &x->plane[plane]; |
574 struct macroblockd_plane *const pd = &xd->plane[plane]; | 526 struct macroblockd_plane *const pd = &xd->plane[plane]; |
575 int16_t *coeff = BLOCK_OFFSET(p->coeff, block); | 527 int16_t *coeff = BLOCK_OFFSET(p->coeff, block); |
576 int16_t *qcoeff = BLOCK_OFFSET(pd->qcoeff, block); | 528 int16_t *qcoeff = BLOCK_OFFSET(pd->qcoeff, block); |
577 int16_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); | 529 int16_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); |
578 const int16_t *scan, *iscan; | 530 const int16_t *scan, *iscan; |
579 TX_TYPE tx_type; | 531 TX_TYPE tx_type; |
580 MB_PREDICTION_MODE mode; | 532 MB_PREDICTION_MODE mode; |
581 const int bwl = b_width_log2(plane_bsize), bw = 1 << bwl; | 533 const int bwl = b_width_log2(plane_bsize), bw = 1 << bwl; |
582 const int twl = bwl - tx_size, twmask = (1 << twl) - 1; | 534 const int twl = bwl - tx_size, twmask = (1 << twl) - 1; |
(...skipping 17 matching lines...) Expand all Loading... |
600 xoff = 32 * (block & twmask); | 552 xoff = 32 * (block & twmask); |
601 yoff = 32 * (block >> twl); | 553 yoff = 32 * (block >> twl); |
602 dst = pd->dst.buf + yoff * pd->dst.stride + xoff; | 554 dst = pd->dst.buf + yoff * pd->dst.stride + xoff; |
603 src = p->src.buf + yoff * p->src.stride + xoff; | 555 src = p->src.buf + yoff * p->src.stride + xoff; |
604 src_diff = p->src_diff + 4 * bw * yoff + xoff; | 556 src_diff = p->src_diff + 4 * bw * yoff + xoff; |
605 vp9_predict_intra_block(xd, block, bwl, TX_32X32, mode, | 557 vp9_predict_intra_block(xd, block, bwl, TX_32X32, mode, |
606 dst, pd->dst.stride, dst, pd->dst.stride); | 558 dst, pd->dst.stride, dst, pd->dst.stride); |
607 vp9_subtract_block(32, 32, src_diff, bw * 4, | 559 vp9_subtract_block(32, 32, src_diff, bw * 4, |
608 src, p->src.stride, dst, pd->dst.stride); | 560 src, p->src.stride, dst, pd->dst.stride); |
609 if (x->use_lp32x32fdct) | 561 if (x->use_lp32x32fdct) |
610 vp9_short_fdct32x32_rd(src_diff, coeff, bw * 8); | 562 vp9_fdct32x32_rd(src_diff, coeff, bw * 4); |
611 else | 563 else |
612 vp9_short_fdct32x32(src_diff, coeff, bw * 8); | 564 vp9_fdct32x32(src_diff, coeff, bw * 4); |
613 vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round, | 565 vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round, |
614 p->quant, p->quant_shift, qcoeff, dqcoeff, | 566 p->quant, p->quant_shift, qcoeff, dqcoeff, |
615 pd->dequant, p->zbin_extra, eob, scan, iscan); | 567 pd->dequant, p->zbin_extra, eob, scan, iscan); |
616 if (!x->skip_encode && *eob) | 568 if (!x->skip_encode && *eob) |
617 vp9_short_idct32x32_add(dqcoeff, dst, pd->dst.stride); | 569 vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, *eob); |
618 break; | 570 break; |
619 case TX_16X16: | 571 case TX_16X16: |
620 tx_type = get_tx_type_16x16(pd->plane_type, xd); | 572 tx_type = get_tx_type_16x16(pd->plane_type, xd); |
621 scan = get_scan_16x16(tx_type); | 573 scan = get_scan_16x16(tx_type); |
622 iscan = get_iscan_16x16(tx_type); | 574 iscan = get_iscan_16x16(tx_type); |
623 mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; | 575 mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; |
624 block >>= 4; | 576 block >>= 4; |
625 xoff = 16 * (block & twmask); | 577 xoff = 16 * (block & twmask); |
626 yoff = 16 * (block >> twl); | 578 yoff = 16 * (block >> twl); |
627 dst = pd->dst.buf + yoff * pd->dst.stride + xoff; | 579 dst = pd->dst.buf + yoff * pd->dst.stride + xoff; |
628 src = p->src.buf + yoff * p->src.stride + xoff; | 580 src = p->src.buf + yoff * p->src.stride + xoff; |
629 src_diff = p->src_diff + 4 * bw * yoff + xoff; | 581 src_diff = p->src_diff + 4 * bw * yoff + xoff; |
630 vp9_predict_intra_block(xd, block, bwl, TX_16X16, mode, | 582 vp9_predict_intra_block(xd, block, bwl, TX_16X16, mode, |
631 dst, pd->dst.stride, dst, pd->dst.stride); | 583 dst, pd->dst.stride, dst, pd->dst.stride); |
632 vp9_subtract_block(16, 16, src_diff, bw * 4, | 584 vp9_subtract_block(16, 16, src_diff, bw * 4, |
633 src, p->src.stride, dst, pd->dst.stride); | 585 src, p->src.stride, dst, pd->dst.stride); |
634 if (tx_type != DCT_DCT) | 586 vp9_fht16x16(tx_type, src_diff, coeff, bw * 4); |
635 vp9_short_fht16x16(src_diff, coeff, bw * 4, tx_type); | |
636 else | |
637 x->fwd_txm16x16(src_diff, coeff, bw * 8); | |
638 vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, | 587 vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, |
639 p->quant, p->quant_shift, qcoeff, dqcoeff, | 588 p->quant, p->quant_shift, qcoeff, dqcoeff, |
640 pd->dequant, p->zbin_extra, eob, scan, iscan); | 589 pd->dequant, p->zbin_extra, eob, scan, iscan); |
641 if (!x->skip_encode && *eob) { | 590 if (!x->skip_encode && *eob) |
642 if (tx_type == DCT_DCT) | 591 vp9_iht16x16_add(tx_type, dqcoeff, dst, pd->dst.stride, *eob); |
643 inverse_transform_b_16x16_add(*eob, dqcoeff, dst, pd->dst.stride); | |
644 else | |
645 vp9_short_iht16x16_add(dqcoeff, dst, pd->dst.stride, tx_type); | |
646 } | |
647 break; | 592 break; |
648 case TX_8X8: | 593 case TX_8X8: |
649 tx_type = get_tx_type_8x8(pd->plane_type, xd); | 594 tx_type = get_tx_type_8x8(pd->plane_type, xd); |
650 scan = get_scan_8x8(tx_type); | 595 scan = get_scan_8x8(tx_type); |
651 iscan = get_iscan_8x8(tx_type); | 596 iscan = get_iscan_8x8(tx_type); |
652 mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; | 597 mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; |
653 block >>= 2; | 598 block >>= 2; |
654 xoff = 8 * (block & twmask); | 599 xoff = 8 * (block & twmask); |
655 yoff = 8 * (block >> twl); | 600 yoff = 8 * (block >> twl); |
656 dst = pd->dst.buf + yoff * pd->dst.stride + xoff; | 601 dst = pd->dst.buf + yoff * pd->dst.stride + xoff; |
657 src = p->src.buf + yoff * p->src.stride + xoff; | 602 src = p->src.buf + yoff * p->src.stride + xoff; |
658 src_diff = p->src_diff + 4 * bw * yoff + xoff; | 603 src_diff = p->src_diff + 4 * bw * yoff + xoff; |
659 vp9_predict_intra_block(xd, block, bwl, TX_8X8, mode, | 604 vp9_predict_intra_block(xd, block, bwl, TX_8X8, mode, |
660 dst, pd->dst.stride, dst, pd->dst.stride); | 605 dst, pd->dst.stride, dst, pd->dst.stride); |
661 vp9_subtract_block(8, 8, src_diff, bw * 4, | 606 vp9_subtract_block(8, 8, src_diff, bw * 4, |
662 src, p->src.stride, dst, pd->dst.stride); | 607 src, p->src.stride, dst, pd->dst.stride); |
663 if (tx_type != DCT_DCT) | 608 vp9_fht8x8(tx_type, src_diff, coeff, bw * 4); |
664 vp9_short_fht8x8(src_diff, coeff, bw * 4, tx_type); | |
665 else | |
666 x->fwd_txm8x8(src_diff, coeff, bw * 8); | |
667 vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant, | 609 vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant, |
668 p->quant_shift, qcoeff, dqcoeff, | 610 p->quant_shift, qcoeff, dqcoeff, |
669 pd->dequant, p->zbin_extra, eob, scan, iscan); | 611 pd->dequant, p->zbin_extra, eob, scan, iscan); |
670 if (!x->skip_encode && *eob) { | 612 if (!x->skip_encode && *eob) |
671 if (tx_type == DCT_DCT) | 613 vp9_iht8x8_add(tx_type, dqcoeff, dst, pd->dst.stride, *eob); |
672 inverse_transform_b_8x8_add(*eob, dqcoeff, dst, pd->dst.stride); | |
673 else | |
674 vp9_short_iht8x8_add(dqcoeff, dst, pd->dst.stride, tx_type); | |
675 } | |
676 break; | 614 break; |
677 case TX_4X4: | 615 case TX_4X4: |
678 tx_type = get_tx_type_4x4(pd->plane_type, xd, block); | 616 tx_type = get_tx_type_4x4(pd->plane_type, xd, block); |
679 scan = get_scan_4x4(tx_type); | 617 scan = get_scan_4x4(tx_type); |
680 iscan = get_iscan_4x4(tx_type); | 618 iscan = get_iscan_4x4(tx_type); |
681 if (mbmi->sb_type < BLOCK_8X8 && plane == 0) | 619 if (mbmi->sb_type < BLOCK_8X8 && plane == 0) |
682 mode = xd->this_mi->bmi[block].as_mode; | 620 mode = xd->mi_8x8[0]->bmi[block].as_mode; |
683 else | 621 else |
684 mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; | 622 mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; |
685 | 623 |
686 xoff = 4 * (block & twmask); | 624 xoff = 4 * (block & twmask); |
687 yoff = 4 * (block >> twl); | 625 yoff = 4 * (block >> twl); |
688 dst = pd->dst.buf + yoff * pd->dst.stride + xoff; | 626 dst = pd->dst.buf + yoff * pd->dst.stride + xoff; |
689 src = p->src.buf + yoff * p->src.stride + xoff; | 627 src = p->src.buf + yoff * p->src.stride + xoff; |
690 src_diff = p->src_diff + 4 * bw * yoff + xoff; | 628 src_diff = p->src_diff + 4 * bw * yoff + xoff; |
691 vp9_predict_intra_block(xd, block, bwl, TX_4X4, mode, | 629 vp9_predict_intra_block(xd, block, bwl, TX_4X4, mode, |
692 dst, pd->dst.stride, dst, pd->dst.stride); | 630 dst, pd->dst.stride, dst, pd->dst.stride); |
693 vp9_subtract_block(4, 4, src_diff, bw * 4, | 631 vp9_subtract_block(4, 4, src_diff, bw * 4, |
694 src, p->src.stride, dst, pd->dst.stride); | 632 src, p->src.stride, dst, pd->dst.stride); |
695 if (tx_type != DCT_DCT) | 633 if (tx_type != DCT_DCT) |
696 vp9_short_fht4x4(src_diff, coeff, bw * 4, tx_type); | 634 vp9_short_fht4x4(src_diff, coeff, bw * 4, tx_type); |
697 else | 635 else |
698 x->fwd_txm4x4(src_diff, coeff, bw * 8); | 636 x->fwd_txm4x4(src_diff, coeff, bw * 4); |
699 vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant, | 637 vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant, |
700 p->quant_shift, qcoeff, dqcoeff, | 638 p->quant_shift, qcoeff, dqcoeff, |
701 pd->dequant, p->zbin_extra, eob, scan, iscan); | 639 pd->dequant, p->zbin_extra, eob, scan, iscan); |
702 if (!x->skip_encode && *eob) { | 640 if (!x->skip_encode && *eob) { |
703 if (tx_type == DCT_DCT) | 641 if (tx_type == DCT_DCT) |
704 // this is like vp9_short_idct4x4 but has a special case around eob<=1 | 642 // this is like vp9_short_idct4x4 but has a special case around eob<=1 |
705 // which is significant (not just an optimization) for the lossless | 643 // which is significant (not just an optimization) for the lossless |
706 // case. | 644 // case. |
707 inverse_transform_b_4x4_add(xd, *eob, dqcoeff, dst, pd->dst.stride); | 645 xd->itxm_add(dqcoeff, dst, pd->dst.stride, *eob); |
708 else | 646 else |
709 vp9_short_iht4x4_add(dqcoeff, dst, pd->dst.stride, tx_type); | 647 vp9_iht4x4_16_add(dqcoeff, dst, pd->dst.stride, tx_type); |
710 } | 648 } |
711 break; | 649 break; |
712 default: | 650 default: |
713 assert(0); | 651 assert(0); |
714 } | 652 } |
715 } | 653 } |
716 | 654 |
717 void vp9_encode_intra_block_y(MACROBLOCK *x, BLOCK_SIZE bsize) { | 655 void vp9_encode_intra_block_y(MACROBLOCK *x, BLOCK_SIZE bsize) { |
718 MACROBLOCKD* const xd = &x->e_mbd; | 656 MACROBLOCKD* const xd = &x->e_mbd; |
719 struct optimize_ctx ctx; | 657 struct optimize_ctx ctx; |
720 struct encode_b_args arg = {x, &ctx}; | 658 struct encode_b_args arg = {x, &ctx}; |
721 | 659 |
722 foreach_transformed_block_in_plane(xd, bsize, 0, vp9_encode_block_intra, | 660 foreach_transformed_block_in_plane(xd, bsize, 0, vp9_encode_block_intra, |
723 &arg); | 661 &arg); |
724 } | 662 } |
725 void vp9_encode_intra_block_uv(MACROBLOCK *x, BLOCK_SIZE bsize) { | 663 void vp9_encode_intra_block_uv(MACROBLOCK *x, BLOCK_SIZE bsize) { |
726 MACROBLOCKD* const xd = &x->e_mbd; | 664 MACROBLOCKD* const xd = &x->e_mbd; |
727 struct optimize_ctx ctx; | 665 struct optimize_ctx ctx; |
728 struct encode_b_args arg = {x, &ctx}; | 666 struct encode_b_args arg = {x, &ctx}; |
729 foreach_transformed_block_uv(xd, bsize, vp9_encode_block_intra, &arg); | 667 foreach_transformed_block_uv(xd, bsize, vp9_encode_block_intra, &arg); |
730 } | 668 } |
731 | 669 |
OLD | NEW |