OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 | 11 |
12 #include "./vp9_rtcd.h" | 12 #include "./vp9_rtcd.h" |
13 #include "./vpx_config.h" | 13 #include "./vpx_config.h" |
14 | 14 |
15 #include "vpx_mem/vpx_mem.h" | 15 #include "vpx_mem/vpx_mem.h" |
16 | 16 |
17 #include "vp9/common/vp9_idct.h" | 17 #include "vp9/common/vp9_idct.h" |
18 #include "vp9/common/vp9_reconinter.h" | 18 #include "vp9/common/vp9_reconinter.h" |
19 #include "vp9/common/vp9_reconintra.h" | 19 #include "vp9/common/vp9_reconintra.h" |
20 #include "vp9/common/vp9_systemdependent.h" | 20 #include "vp9/common/vp9_systemdependent.h" |
21 | 21 |
22 #include "vp9/encoder/vp9_dct.h" | 22 #include "vp9/encoder/vp9_dct.h" |
23 #include "vp9/encoder/vp9_encodemb.h" | 23 #include "vp9/encoder/vp9_encodemb.h" |
24 #include "vp9/encoder/vp9_quantize.h" | 24 #include "vp9/encoder/vp9_quantize.h" |
25 #include "vp9/encoder/vp9_rdopt.h" | 25 #include "vp9/encoder/vp9_rdopt.h" |
26 #include "vp9/encoder/vp9_tokenize.h" | 26 #include "vp9/encoder/vp9_tokenize.h" |
27 | 27 |
| 28 void vp9_setup_interp_filters(MACROBLOCKD *xd, |
| 29 INTERPOLATION_TYPE mcomp_filter_type, |
| 30 VP9_COMMON *cm) { |
| 31 if (xd->mi_8x8 && xd->mi_8x8[0]) { |
| 32 MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; |
| 33 |
| 34 set_scale_factors(xd, mbmi->ref_frame[0] - LAST_FRAME, |
| 35 mbmi->ref_frame[1] - LAST_FRAME, |
| 36 cm->active_ref_scale); |
| 37 } else { |
| 38 set_scale_factors(xd, -1, -1, cm->active_ref_scale); |
| 39 } |
| 40 |
| 41 xd->subpix.filter_x = xd->subpix.filter_y = |
| 42 vp9_get_filter_kernel(mcomp_filter_type == SWITCHABLE ? |
| 43 EIGHTTAP : mcomp_filter_type); |
| 44 |
| 45 assert(((intptr_t)xd->subpix.filter_x & 0xff) == 0); |
| 46 } |
| 47 |
28 void vp9_subtract_block_c(int rows, int cols, | 48 void vp9_subtract_block_c(int rows, int cols, |
29 int16_t *diff_ptr, ptrdiff_t diff_stride, | 49 int16_t *diff_ptr, ptrdiff_t diff_stride, |
30 const uint8_t *src_ptr, ptrdiff_t src_stride, | 50 const uint8_t *src_ptr, ptrdiff_t src_stride, |
31 const uint8_t *pred_ptr, ptrdiff_t pred_stride) { | 51 const uint8_t *pred_ptr, ptrdiff_t pred_stride) { |
32 int r, c; | 52 int r, c; |
33 | 53 |
34 for (r = 0; r < rows; r++) { | 54 for (r = 0; r < rows; r++) { |
35 for (c = 0; c < cols; c++) | 55 for (c = 0; c < cols; c++) |
36 diff_ptr[c] = src_ptr[c] - pred_ptr[c]; | 56 diff_ptr[c] = src_ptr[c] - pred_ptr[c]; |
37 | 57 |
38 diff_ptr += diff_stride; | 58 diff_ptr += diff_stride; |
39 pred_ptr += pred_stride; | 59 pred_ptr += pred_stride; |
40 src_ptr += src_stride; | 60 src_ptr += src_stride; |
41 } | 61 } |
42 } | 62 } |
43 | 63 |
44 static void subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) { | 64 static void subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) { |
45 struct macroblock_plane *const p = &x->plane[plane]; | 65 struct macroblock_plane *const p = &x->plane[plane]; |
46 const MACROBLOCKD *const xd = &x->e_mbd; | 66 const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane]; |
47 const struct macroblockd_plane *const pd = &xd->plane[plane]; | 67 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); |
48 const int bw = plane_block_width(bsize, pd); | 68 const int bw = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; |
49 const int bh = plane_block_height(bsize, pd); | 69 const int bh = 4 * num_4x4_blocks_high_lookup[plane_bsize]; |
50 | 70 |
51 vp9_subtract_block(bh, bw, p->src_diff, bw, | 71 vp9_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride, |
52 p->src.buf, p->src.stride, | |
53 pd->dst.buf, pd->dst.stride); | 72 pd->dst.buf, pd->dst.stride); |
54 } | 73 } |
55 | 74 |
56 void vp9_subtract_sby(MACROBLOCK *x, BLOCK_SIZE bsize) { | 75 void vp9_subtract_sby(MACROBLOCK *x, BLOCK_SIZE bsize) { |
57 subtract_plane(x, bsize, 0); | 76 subtract_plane(x, bsize, 0); |
58 } | 77 } |
59 | 78 |
60 void vp9_subtract_sbuv(MACROBLOCK *x, BLOCK_SIZE bsize) { | 79 void vp9_subtract_sbuv(MACROBLOCK *x, BLOCK_SIZE bsize) { |
61 int i; | 80 int i; |
62 | 81 |
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
110 pt = get_coef_context(nb, token_cache, idx + 1); | 129 pt = get_coef_context(nb, token_cache, idx + 1); |
111 token_cache[scan[idx]] = bak; | 130 token_cache[scan[idx]] = bak; |
112 return pt; | 131 return pt; |
113 } | 132 } |
114 | 133 |
115 static void optimize_b(MACROBLOCK *mb, | 134 static void optimize_b(MACROBLOCK *mb, |
116 int plane, int block, BLOCK_SIZE plane_bsize, | 135 int plane, int block, BLOCK_SIZE plane_bsize, |
117 ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, | 136 ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, |
118 TX_SIZE tx_size) { | 137 TX_SIZE tx_size) { |
119 MACROBLOCKD *const xd = &mb->e_mbd; | 138 MACROBLOCKD *const xd = &mb->e_mbd; |
| 139 struct macroblock_plane *p = &mb->plane[plane]; |
120 struct macroblockd_plane *pd = &xd->plane[plane]; | 140 struct macroblockd_plane *pd = &xd->plane[plane]; |
121 const int ref = is_inter_block(&xd->mi_8x8[0]->mbmi); | 141 const int ref = is_inter_block(&xd->mi_8x8[0]->mbmi); |
122 vp9_token_state tokens[1025][2]; | 142 vp9_token_state tokens[1025][2]; |
123 unsigned best_index[1025][2]; | 143 unsigned best_index[1025][2]; |
124 const int16_t *coeff_ptr = BLOCK_OFFSET(mb->plane[plane].coeff, block); | 144 const int16_t *coeff_ptr = BLOCK_OFFSET(mb->plane[plane].coeff, block); |
125 int16_t *qcoeff_ptr; | 145 int16_t *qcoeff_ptr; |
126 int16_t *dqcoeff_ptr; | 146 int16_t *dqcoeff_ptr; |
127 int eob = pd->eobs[block], final_eob, sz = 0; | 147 int eob = p->eobs[block], final_eob, sz = 0; |
128 const int i0 = 0; | 148 const int i0 = 0; |
129 int rc, x, next, i; | 149 int rc, x, next, i; |
130 int64_t rdmult, rddiv, rd_cost0, rd_cost1; | 150 int64_t rdmult, rddiv, rd_cost0, rd_cost1; |
131 int rate0, rate1, error0, error1, t0, t1; | 151 int rate0, rate1, error0, error1, t0, t1; |
132 int best, band, pt; | 152 int best, band, pt; |
133 PLANE_TYPE type = pd->plane_type; | 153 PLANE_TYPE type = pd->plane_type; |
134 int err_mult = plane_rd_mult[type]; | 154 int err_mult = plane_rd_mult[type]; |
135 const int default_eob = 16 << (tx_size << 1); | 155 const int default_eob = 16 << (tx_size << 1); |
136 const int16_t *scan, *nb; | 156 |
137 const int mul = 1 + (tx_size == TX_32X32); | 157 const int mul = 1 + (tx_size == TX_32X32); |
138 uint8_t token_cache[1024]; | 158 uint8_t token_cache[1024]; |
139 const int ib = txfrm_block_to_raster_block(plane_bsize, tx_size, block); | |
140 const int16_t *dequant_ptr = pd->dequant; | 159 const int16_t *dequant_ptr = pd->dequant; |
141 const uint8_t *const band_translate = get_band_translate(tx_size); | 160 const uint8_t *const band_translate = get_band_translate(tx_size); |
| 161 const scan_order *so = get_scan(xd, tx_size, type, block); |
| 162 const int16_t *scan = so->scan; |
| 163 const int16_t *nb = so->neighbors; |
142 | 164 |
143 assert((!type && !plane) || (type && plane)); | 165 assert((!type && !plane) || (type && plane)); |
144 dqcoeff_ptr = BLOCK_OFFSET(pd->dqcoeff, block); | 166 dqcoeff_ptr = BLOCK_OFFSET(pd->dqcoeff, block); |
145 qcoeff_ptr = BLOCK_OFFSET(pd->qcoeff, block); | 167 qcoeff_ptr = BLOCK_OFFSET(p->qcoeff, block); |
146 get_scan(xd, tx_size, type, ib, &scan, &nb); | |
147 assert(eob <= default_eob); | 168 assert(eob <= default_eob); |
148 | 169 |
149 /* Now set up a Viterbi trellis to evaluate alternative roundings. */ | 170 /* Now set up a Viterbi trellis to evaluate alternative roundings. */ |
150 rdmult = mb->rdmult * err_mult; | 171 rdmult = mb->rdmult * err_mult; |
151 if (mb->e_mbd.mi_8x8[0]->mbmi.ref_frame[0] == INTRA_FRAME) | 172 if (!is_inter_block(&mb->e_mbd.mi_8x8[0]->mbmi)) |
152 rdmult = (rdmult * 9) >> 4; | 173 rdmult = (rdmult * 9) >> 4; |
153 rddiv = mb->rddiv; | 174 rddiv = mb->rddiv; |
154 /* Initialize the sentinel node of the trellis. */ | 175 /* Initialize the sentinel node of the trellis. */ |
155 tokens[eob][0].rate = 0; | 176 tokens[eob][0].rate = 0; |
156 tokens[eob][0].error = 0; | 177 tokens[eob][0].error = 0; |
157 tokens[eob][0].next = default_eob; | 178 tokens[eob][0].next = default_eob; |
158 tokens[eob][0].token = DCT_EOB_TOKEN; | 179 tokens[eob][0].token = EOB_TOKEN; |
159 tokens[eob][0].qc = 0; | 180 tokens[eob][0].qc = 0; |
160 *(tokens[eob] + 1) = *(tokens[eob] + 0); | 181 *(tokens[eob] + 1) = *(tokens[eob] + 0); |
161 next = eob; | 182 next = eob; |
162 for (i = 0; i < eob; i++) | 183 for (i = 0; i < eob; i++) |
163 token_cache[scan[i]] = vp9_pt_energy_class[vp9_dct_value_tokens_ptr[ | 184 token_cache[scan[i]] = vp9_pt_energy_class[vp9_dct_value_tokens_ptr[ |
164 qcoeff_ptr[scan[i]]].token]; | 185 qcoeff_ptr[scan[i]]].token]; |
165 | 186 |
166 for (i = eob; i-- > i0;) { | 187 for (i = eob; i-- > i0;) { |
167 int base_bits, d2, dx; | 188 int base_bits, d2, dx; |
168 | 189 |
169 rc = scan[i]; | 190 rc = scan[i]; |
170 x = qcoeff_ptr[rc]; | 191 x = qcoeff_ptr[rc]; |
171 /* Only add a trellis state for non-zero coefficients. */ | 192 /* Only add a trellis state for non-zero coefficients. */ |
172 if (x) { | 193 if (x) { |
173 int shortcut = 0; | 194 int shortcut = 0; |
174 error0 = tokens[next][0].error; | 195 error0 = tokens[next][0].error; |
175 error1 = tokens[next][1].error; | 196 error1 = tokens[next][1].error; |
176 /* Evaluate the first possibility for this state. */ | 197 /* Evaluate the first possibility for this state. */ |
177 rate0 = tokens[next][0].rate; | 198 rate0 = tokens[next][0].rate; |
178 rate1 = tokens[next][1].rate; | 199 rate1 = tokens[next][1].rate; |
179 t0 = (vp9_dct_value_tokens_ptr + x)->token; | 200 t0 = (vp9_dct_value_tokens_ptr + x)->token; |
180 /* Consider both possible successor states. */ | 201 /* Consider both possible successor states. */ |
181 if (next < default_eob) { | 202 if (next < default_eob) { |
182 band = get_coef_band(band_translate, i + 1); | 203 band = band_translate[i + 1]; |
183 pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache); | 204 pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache); |
184 rate0 += | 205 rate0 += |
185 mb->token_costs[tx_size][type][ref][band][0][pt] | 206 mb->token_costs[tx_size][type][ref][band][0][pt] |
186 [tokens[next][0].token]; | 207 [tokens[next][0].token]; |
187 rate1 += | 208 rate1 += |
188 mb->token_costs[tx_size][type][ref][band][0][pt] | 209 mb->token_costs[tx_size][type][ref][band][0][pt] |
189 [tokens[next][1].token]; | 210 [tokens[next][1].token]; |
190 } | 211 } |
191 UPDATE_RD_COST(); | 212 UPDATE_RD_COST(); |
192 /* And pick the best. */ | 213 /* And pick the best. */ |
(...skipping 22 matching lines...) Expand all Loading... |
215 if (shortcut) { | 236 if (shortcut) { |
216 sz = -(x < 0); | 237 sz = -(x < 0); |
217 x -= 2 * sz + 1; | 238 x -= 2 * sz + 1; |
218 } | 239 } |
219 | 240 |
220 /* Consider both possible successor states. */ | 241 /* Consider both possible successor states. */ |
221 if (!x) { | 242 if (!x) { |
222 /* If we reduced this coefficient to zero, check to see if | 243 /* If we reduced this coefficient to zero, check to see if |
223 * we need to move the EOB back here. | 244 * we need to move the EOB back here. |
224 */ | 245 */ |
225 t0 = tokens[next][0].token == DCT_EOB_TOKEN ? | 246 t0 = tokens[next][0].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN; |
226 DCT_EOB_TOKEN : ZERO_TOKEN; | 247 t1 = tokens[next][1].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN; |
227 t1 = tokens[next][1].token == DCT_EOB_TOKEN ? | |
228 DCT_EOB_TOKEN : ZERO_TOKEN; | |
229 } else { | 248 } else { |
230 t0 = t1 = (vp9_dct_value_tokens_ptr + x)->token; | 249 t0 = t1 = (vp9_dct_value_tokens_ptr + x)->token; |
231 } | 250 } |
232 if (next < default_eob) { | 251 if (next < default_eob) { |
233 band = get_coef_band(band_translate, i + 1); | 252 band = band_translate[i + 1]; |
234 if (t0 != DCT_EOB_TOKEN) { | 253 if (t0 != EOB_TOKEN) { |
235 pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache); | 254 pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache); |
236 rate0 += mb->token_costs[tx_size][type][ref][band][!x][pt] | 255 rate0 += mb->token_costs[tx_size][type][ref][band][!x][pt] |
237 [tokens[next][0].token]; | 256 [tokens[next][0].token]; |
238 } | 257 } |
239 if (t1 != DCT_EOB_TOKEN) { | 258 if (t1 != EOB_TOKEN) { |
240 pt = trellis_get_coeff_context(scan, nb, i, t1, token_cache); | 259 pt = trellis_get_coeff_context(scan, nb, i, t1, token_cache); |
241 rate1 += mb->token_costs[tx_size][type][ref][band][!x][pt] | 260 rate1 += mb->token_costs[tx_size][type][ref][band][!x][pt] |
242 [tokens[next][1].token]; | 261 [tokens[next][1].token]; |
243 } | 262 } |
244 } | 263 } |
245 | 264 |
246 UPDATE_RD_COST(); | 265 UPDATE_RD_COST(); |
247 /* And pick the best. */ | 266 /* And pick the best. */ |
248 best = rd_cost1 < rd_cost0; | 267 best = rd_cost1 < rd_cost0; |
249 base_bits = *(vp9_dct_value_cost_ptr + x); | 268 base_bits = *(vp9_dct_value_cost_ptr + x); |
250 | 269 |
251 if (shortcut) { | 270 if (shortcut) { |
252 dx -= (dequant_ptr[rc != 0] + sz) ^ sz; | 271 dx -= (dequant_ptr[rc != 0] + sz) ^ sz; |
253 d2 = dx * dx; | 272 d2 = dx * dx; |
254 } | 273 } |
255 tokens[i][1].rate = base_bits + (best ? rate1 : rate0); | 274 tokens[i][1].rate = base_bits + (best ? rate1 : rate0); |
256 tokens[i][1].error = d2 + (best ? error1 : error0); | 275 tokens[i][1].error = d2 + (best ? error1 : error0); |
257 tokens[i][1].next = next; | 276 tokens[i][1].next = next; |
258 tokens[i][1].token = best ? t1 : t0; | 277 tokens[i][1].token = best ? t1 : t0; |
259 tokens[i][1].qc = x; | 278 tokens[i][1].qc = x; |
260 best_index[i][1] = best; | 279 best_index[i][1] = best; |
261 /* Finally, make this the new head of the trellis. */ | 280 /* Finally, make this the new head of the trellis. */ |
262 next = i; | 281 next = i; |
263 } else { | 282 } else { |
264 /* There's no choice to make for a zero coefficient, so we don't | 283 /* There's no choice to make for a zero coefficient, so we don't |
265 * add a new trellis node, but we do need to update the costs. | 284 * add a new trellis node, but we do need to update the costs. |
266 */ | 285 */ |
267 band = get_coef_band(band_translate, i + 1); | 286 band = band_translate[i + 1]; |
268 t0 = tokens[next][0].token; | 287 t0 = tokens[next][0].token; |
269 t1 = tokens[next][1].token; | 288 t1 = tokens[next][1].token; |
270 /* Update the cost of each path if we're past the EOB token. */ | 289 /* Update the cost of each path if we're past the EOB token. */ |
271 if (t0 != DCT_EOB_TOKEN) { | 290 if (t0 != EOB_TOKEN) { |
272 tokens[next][0].rate += | 291 tokens[next][0].rate += |
273 mb->token_costs[tx_size][type][ref][band][1][0][t0]; | 292 mb->token_costs[tx_size][type][ref][band][1][0][t0]; |
274 tokens[next][0].token = ZERO_TOKEN; | 293 tokens[next][0].token = ZERO_TOKEN; |
275 } | 294 } |
276 if (t1 != DCT_EOB_TOKEN) { | 295 if (t1 != EOB_TOKEN) { |
277 tokens[next][1].rate += | 296 tokens[next][1].rate += |
278 mb->token_costs[tx_size][type][ref][band][1][0][t1]; | 297 mb->token_costs[tx_size][type][ref][band][1][0][t1]; |
279 tokens[next][1].token = ZERO_TOKEN; | 298 tokens[next][1].token = ZERO_TOKEN; |
280 } | 299 } |
281 best_index[i][0] = best_index[i][1] = 0; | 300 best_index[i][0] = best_index[i][1] = 0; |
282 /* Don't update next, because we didn't add a new node. */ | 301 /* Don't update next, because we didn't add a new node. */ |
283 } | 302 } |
284 } | 303 } |
285 | 304 |
286 /* Now pick the best path through the whole trellis. */ | 305 /* Now pick the best path through the whole trellis. */ |
287 band = get_coef_band(band_translate, i + 1); | 306 band = band_translate[i + 1]; |
288 pt = combine_entropy_contexts(*a, *l); | 307 pt = combine_entropy_contexts(*a, *l); |
289 rate0 = tokens[next][0].rate; | 308 rate0 = tokens[next][0].rate; |
290 rate1 = tokens[next][1].rate; | 309 rate1 = tokens[next][1].rate; |
291 error0 = tokens[next][0].error; | 310 error0 = tokens[next][0].error; |
292 error1 = tokens[next][1].error; | 311 error1 = tokens[next][1].error; |
293 t0 = tokens[next][0].token; | 312 t0 = tokens[next][0].token; |
294 t1 = tokens[next][1].token; | 313 t1 = tokens[next][1].token; |
295 rate0 += mb->token_costs[tx_size][type][ref][band][0][pt][t0]; | 314 rate0 += mb->token_costs[tx_size][type][ref][band][0][pt][t0]; |
296 rate1 += mb->token_costs[tx_size][type][ref][band][0][pt][t1]; | 315 rate1 += mb->token_costs[tx_size][type][ref][band][0][pt][t1]; |
297 UPDATE_RD_COST(); | 316 UPDATE_RD_COST(); |
298 best = rd_cost1 < rd_cost0; | 317 best = rd_cost1 < rd_cost0; |
299 final_eob = i0 - 1; | 318 final_eob = i0 - 1; |
300 vpx_memset(qcoeff_ptr, 0, sizeof(*qcoeff_ptr) * (16 << (tx_size * 2))); | 319 vpx_memset(qcoeff_ptr, 0, sizeof(*qcoeff_ptr) * (16 << (tx_size * 2))); |
301 vpx_memset(dqcoeff_ptr, 0, sizeof(*dqcoeff_ptr) * (16 << (tx_size * 2))); | 320 vpx_memset(dqcoeff_ptr, 0, sizeof(*dqcoeff_ptr) * (16 << (tx_size * 2))); |
302 for (i = next; i < eob; i = next) { | 321 for (i = next; i < eob; i = next) { |
303 x = tokens[i][best].qc; | 322 x = tokens[i][best].qc; |
304 if (x) { | 323 if (x) { |
305 final_eob = i; | 324 final_eob = i; |
306 } | 325 } |
307 rc = scan[i]; | 326 rc = scan[i]; |
308 qcoeff_ptr[rc] = x; | 327 qcoeff_ptr[rc] = x; |
309 dqcoeff_ptr[rc] = (x * dequant_ptr[rc != 0]) / mul; | 328 dqcoeff_ptr[rc] = (x * dequant_ptr[rc != 0]) / mul; |
310 | 329 |
311 next = tokens[i][best].next; | 330 next = tokens[i][best].next; |
312 best = best_index[i][best]; | 331 best = best_index[i][best]; |
313 } | 332 } |
314 final_eob++; | 333 final_eob++; |
315 | 334 |
316 xd->plane[plane].eobs[block] = final_eob; | 335 mb->plane[plane].eobs[block] = final_eob; |
317 *a = *l = (final_eob > 0); | 336 *a = *l = (final_eob > 0); |
318 } | 337 } |
319 | 338 |
320 void vp9_optimize_b(int plane, int block, BLOCK_SIZE plane_bsize, | 339 void vp9_optimize_b(int plane, int block, BLOCK_SIZE plane_bsize, |
321 TX_SIZE tx_size, MACROBLOCK *mb, struct optimize_ctx *ctx) { | 340 TX_SIZE tx_size, MACROBLOCK *mb, struct optimize_ctx *ctx) { |
322 int x, y; | 341 int x, y; |
323 txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x, &y); | 342 txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x, &y); |
324 optimize_b(mb, plane, block, plane_bsize, | 343 optimize_b(mb, plane, block, plane_bsize, |
325 &ctx->ta[plane][x], &ctx->tl[plane][y], tx_size); | 344 &ctx->ta[plane][x], &ctx->tl[plane][y], tx_size); |
326 } | 345 } |
(...skipping 14 matching lines...) Expand all Loading... |
341 } | 360 } |
342 | 361 |
343 void vp9_xform_quant(int plane, int block, BLOCK_SIZE plane_bsize, | 362 void vp9_xform_quant(int plane, int block, BLOCK_SIZE plane_bsize, |
344 TX_SIZE tx_size, void *arg) { | 363 TX_SIZE tx_size, void *arg) { |
345 struct encode_b_args* const args = arg; | 364 struct encode_b_args* const args = arg; |
346 MACROBLOCK* const x = args->x; | 365 MACROBLOCK* const x = args->x; |
347 MACROBLOCKD* const xd = &x->e_mbd; | 366 MACROBLOCKD* const xd = &x->e_mbd; |
348 struct macroblock_plane *const p = &x->plane[plane]; | 367 struct macroblock_plane *const p = &x->plane[plane]; |
349 struct macroblockd_plane *const pd = &xd->plane[plane]; | 368 struct macroblockd_plane *const pd = &xd->plane[plane]; |
350 int16_t *coeff = BLOCK_OFFSET(p->coeff, block); | 369 int16_t *coeff = BLOCK_OFFSET(p->coeff, block); |
351 int16_t *qcoeff = BLOCK_OFFSET(pd->qcoeff, block); | 370 int16_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block); |
352 int16_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); | 371 int16_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); |
353 const int16_t *scan, *iscan; | 372 const scan_order *scan_order; |
354 uint16_t *eob = &pd->eobs[block]; | 373 uint16_t *eob = &p->eobs[block]; |
355 const int bwl = b_width_log2(plane_bsize), bw = 1 << bwl; | 374 const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; |
356 const int twl = bwl - tx_size, twmask = (1 << twl) - 1; | 375 int i, j; |
357 int xoff, yoff; | |
358 int16_t *src_diff; | 376 int16_t *src_diff; |
| 377 txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j); |
| 378 src_diff = &p->src_diff[4 * (j * diff_stride + i)]; |
359 | 379 |
360 switch (tx_size) { | 380 switch (tx_size) { |
361 case TX_32X32: | 381 case TX_32X32: |
362 scan = vp9_default_scan_32x32; | 382 scan_order = &vp9_default_scan_orders[TX_32X32]; |
363 iscan = vp9_default_iscan_32x32; | |
364 block >>= 6; | |
365 xoff = 32 * (block & twmask); | |
366 yoff = 32 * (block >> twl); | |
367 src_diff = p->src_diff + 4 * bw * yoff + xoff; | |
368 if (x->use_lp32x32fdct) | 383 if (x->use_lp32x32fdct) |
369 vp9_fdct32x32_rd(src_diff, coeff, bw * 4); | 384 vp9_fdct32x32_rd(src_diff, coeff, diff_stride); |
370 else | 385 else |
371 vp9_fdct32x32(src_diff, coeff, bw * 4); | 386 vp9_fdct32x32(src_diff, coeff, diff_stride); |
372 vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round, | 387 vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round, |
373 p->quant, p->quant_shift, qcoeff, dqcoeff, | 388 p->quant, p->quant_shift, qcoeff, dqcoeff, |
374 pd->dequant, p->zbin_extra, eob, scan, iscan); | 389 pd->dequant, p->zbin_extra, eob, scan_order->scan, |
| 390 scan_order->iscan); |
375 break; | 391 break; |
376 case TX_16X16: | 392 case TX_16X16: |
377 scan = vp9_default_scan_16x16; | 393 scan_order = &vp9_default_scan_orders[TX_16X16]; |
378 iscan = vp9_default_iscan_16x16; | 394 vp9_fdct16x16(src_diff, coeff, diff_stride); |
379 block >>= 4; | |
380 xoff = 16 * (block & twmask); | |
381 yoff = 16 * (block >> twl); | |
382 src_diff = p->src_diff + 4 * bw * yoff + xoff; | |
383 vp9_fdct16x16(src_diff, coeff, bw * 4); | |
384 vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, | 395 vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, |
385 p->quant, p->quant_shift, qcoeff, dqcoeff, | 396 p->quant, p->quant_shift, qcoeff, dqcoeff, |
386 pd->dequant, p->zbin_extra, eob, scan, iscan); | 397 pd->dequant, p->zbin_extra, eob, |
| 398 scan_order->scan, scan_order->iscan); |
387 break; | 399 break; |
388 case TX_8X8: | 400 case TX_8X8: |
389 scan = vp9_default_scan_8x8; | 401 scan_order = &vp9_default_scan_orders[TX_8X8]; |
390 iscan = vp9_default_iscan_8x8; | 402 vp9_fdct8x8(src_diff, coeff, diff_stride); |
391 block >>= 2; | |
392 xoff = 8 * (block & twmask); | |
393 yoff = 8 * (block >> twl); | |
394 src_diff = p->src_diff + 4 * bw * yoff + xoff; | |
395 vp9_fdct8x8(src_diff, coeff, bw * 4); | |
396 vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, | 403 vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, |
397 p->quant, p->quant_shift, qcoeff, dqcoeff, | 404 p->quant, p->quant_shift, qcoeff, dqcoeff, |
398 pd->dequant, p->zbin_extra, eob, scan, iscan); | 405 pd->dequant, p->zbin_extra, eob, |
| 406 scan_order->scan, scan_order->iscan); |
399 break; | 407 break; |
400 case TX_4X4: | 408 case TX_4X4: |
401 scan = vp9_default_scan_4x4; | 409 scan_order = &vp9_default_scan_orders[TX_4X4]; |
402 iscan = vp9_default_iscan_4x4; | 410 x->fwd_txm4x4(src_diff, coeff, diff_stride); |
403 xoff = 4 * (block & twmask); | |
404 yoff = 4 * (block >> twl); | |
405 src_diff = p->src_diff + 4 * bw * yoff + xoff; | |
406 x->fwd_txm4x4(src_diff, coeff, bw * 4); | |
407 vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, | 411 vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, |
408 p->quant, p->quant_shift, qcoeff, dqcoeff, | 412 p->quant, p->quant_shift, qcoeff, dqcoeff, |
409 pd->dequant, p->zbin_extra, eob, scan, iscan); | 413 pd->dequant, p->zbin_extra, eob, |
| 414 scan_order->scan, scan_order->iscan); |
410 break; | 415 break; |
411 default: | 416 default: |
412 assert(0); | 417 assert(0); |
413 } | 418 } |
414 } | 419 } |
415 | 420 |
416 static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize, | 421 static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize, |
417 TX_SIZE tx_size, void *arg) { | 422 TX_SIZE tx_size, void *arg) { |
418 struct encode_b_args *const args = arg; | 423 struct encode_b_args *const args = arg; |
419 MACROBLOCK *const x = args->x; | 424 MACROBLOCK *const x = args->x; |
420 MACROBLOCKD *const xd = &x->e_mbd; | 425 MACROBLOCKD *const xd = &x->e_mbd; |
421 struct optimize_ctx *const ctx = args->ctx; | 426 struct optimize_ctx *const ctx = args->ctx; |
| 427 struct macroblock_plane *const p = &x->plane[plane]; |
422 struct macroblockd_plane *const pd = &xd->plane[plane]; | 428 struct macroblockd_plane *const pd = &xd->plane[plane]; |
423 const int raster_block = txfrm_block_to_raster_block(plane_bsize, tx_size, | |
424 block); | |
425 | |
426 int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); | 429 int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); |
427 uint8_t *const dst = raster_block_offset_uint8(plane_bsize, raster_block, | 430 int i, j; |
428 pd->dst.buf, pd->dst.stride); | 431 uint8_t *dst; |
| 432 txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j); |
| 433 dst = &pd->dst.buf[4 * j * pd->dst.stride + 4 * i]; |
429 | 434 |
430 // TODO(jingning): per transformed block zero forcing only enabled for | 435 // TODO(jingning): per transformed block zero forcing only enabled for |
431 // luma component. will integrate chroma components as well. | 436 // luma component. will integrate chroma components as well. |
432 if (x->zcoeff_blk[tx_size][block] && plane == 0) { | 437 if (x->zcoeff_blk[tx_size][block] && plane == 0) { |
433 int x, y; | 438 p->eobs[block] = 0; |
434 pd->eobs[block] = 0; | 439 ctx->ta[plane][i] = 0; |
435 txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x, &y); | 440 ctx->tl[plane][j] = 0; |
436 ctx->ta[plane][x] = 0; | |
437 ctx->tl[plane][y] = 0; | |
438 return; | 441 return; |
439 } | 442 } |
440 | 443 |
441 vp9_xform_quant(plane, block, plane_bsize, tx_size, arg); | 444 if (!x->skip_recode) |
| 445 vp9_xform_quant(plane, block, plane_bsize, tx_size, arg); |
442 | 446 |
443 if (x->optimize) | 447 if (x->optimize && (!x->skip_recode || !x->skip_optimize)) { |
444 vp9_optimize_b(plane, block, plane_bsize, tx_size, x, ctx); | 448 vp9_optimize_b(plane, block, plane_bsize, tx_size, x, ctx); |
| 449 } else { |
| 450 ctx->ta[plane][i] = p->eobs[block] > 0; |
| 451 ctx->tl[plane][j] = p->eobs[block] > 0; |
| 452 } |
445 | 453 |
446 if (x->skip_encode || pd->eobs[block] == 0) | 454 if (x->skip_encode || p->eobs[block] == 0) |
447 return; | 455 return; |
448 | 456 |
449 switch (tx_size) { | 457 switch (tx_size) { |
450 case TX_32X32: | 458 case TX_32X32: |
451 vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, pd->eobs[block]); | 459 vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); |
452 break; | 460 break; |
453 case TX_16X16: | 461 case TX_16X16: |
454 vp9_idct16x16_add(dqcoeff, dst, pd->dst.stride, pd->eobs[block]); | 462 vp9_idct16x16_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); |
455 break; | 463 break; |
456 case TX_8X8: | 464 case TX_8X8: |
457 vp9_idct8x8_add(dqcoeff, dst, pd->dst.stride, pd->eobs[block]); | 465 vp9_idct8x8_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); |
458 break; | 466 break; |
459 case TX_4X4: | 467 case TX_4X4: |
460 // this is like vp9_short_idct4x4 but has a special case around eob<=1 | 468 // this is like vp9_short_idct4x4 but has a special case around eob<=1 |
461 // which is significant (not just an optimization) for the lossless | 469 // which is significant (not just an optimization) for the lossless |
462 // case. | 470 // case. |
463 xd->itxm_add(dqcoeff, dst, pd->dst.stride, pd->eobs[block]); | 471 xd->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); |
464 break; | 472 break; |
465 default: | 473 default: |
466 assert(!"Invalid transform size"); | 474 assert(0 && "Invalid transform size"); |
467 } | 475 } |
468 } | 476 } |
469 | 477 |
470 static void encode_block_pass1(int plane, int block, BLOCK_SIZE plane_bsize, | 478 static void encode_block_pass1(int plane, int block, BLOCK_SIZE plane_bsize, |
471 TX_SIZE tx_size, void *arg) { | 479 TX_SIZE tx_size, void *arg) { |
472 struct encode_b_args *const args = arg; | 480 struct encode_b_args *const args = arg; |
473 MACROBLOCK *const x = args->x; | 481 MACROBLOCK *const x = args->x; |
474 MACROBLOCKD *const xd = &x->e_mbd; | 482 MACROBLOCKD *const xd = &x->e_mbd; |
| 483 struct macroblock_plane *const p = &x->plane[plane]; |
475 struct macroblockd_plane *const pd = &xd->plane[plane]; | 484 struct macroblockd_plane *const pd = &xd->plane[plane]; |
476 const int raster_block = txfrm_block_to_raster_block(plane_bsize, tx_size, | |
477 block); | |
478 | |
479 int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); | 485 int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); |
480 uint8_t *const dst = raster_block_offset_uint8(plane_bsize, raster_block, | 486 int i, j; |
481 pd->dst.buf, pd->dst.stride); | 487 uint8_t *dst; |
| 488 txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j); |
| 489 dst = &pd->dst.buf[4 * j * pd->dst.stride + 4 * i]; |
482 | 490 |
483 vp9_xform_quant(plane, block, plane_bsize, tx_size, arg); | 491 vp9_xform_quant(plane, block, plane_bsize, tx_size, arg); |
484 | 492 |
485 if (pd->eobs[block] == 0) | 493 if (p->eobs[block] == 0) |
486 return; | 494 return; |
487 | 495 |
488 xd->itxm_add(dqcoeff, dst, pd->dst.stride, pd->eobs[block]); | 496 xd->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); |
489 } | 497 } |
490 | 498 |
491 void vp9_encode_sby(MACROBLOCK *x, BLOCK_SIZE bsize) { | 499 void vp9_encode_sby(MACROBLOCK *x, BLOCK_SIZE bsize) { |
492 MACROBLOCKD *const xd = &x->e_mbd; | 500 MACROBLOCKD *const xd = &x->e_mbd; |
493 struct optimize_ctx ctx; | 501 struct optimize_ctx ctx; |
494 struct encode_b_args arg = {x, &ctx}; | 502 struct encode_b_args arg = {x, &ctx}; |
495 | 503 |
496 vp9_subtract_sby(x, bsize); | 504 vp9_subtract_sby(x, bsize); |
497 if (x->optimize) | 505 if (x->optimize) |
498 optimize_init_b(0, bsize, &arg); | 506 optimize_init_b(0, bsize, &arg); |
499 | 507 |
500 foreach_transformed_block_in_plane(xd, bsize, 0, encode_block_pass1, &arg); | 508 foreach_transformed_block_in_plane(xd, bsize, 0, encode_block_pass1, &arg); |
501 } | 509 } |
502 | 510 |
503 void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) { | 511 void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) { |
504 MACROBLOCKD *const xd = &x->e_mbd; | 512 MACROBLOCKD *const xd = &x->e_mbd; |
505 struct optimize_ctx ctx; | 513 struct optimize_ctx ctx; |
506 struct encode_b_args arg = {x, &ctx}; | 514 struct encode_b_args arg = {x, &ctx}; |
507 | 515 |
508 vp9_subtract_sb(x, bsize); | 516 if (!x->skip_recode) |
| 517 vp9_subtract_sb(x, bsize); |
509 | 518 |
510 if (x->optimize) { | 519 if (x->optimize && (!x->skip_recode || !x->skip_optimize)) { |
511 int i; | 520 int i; |
512 for (i = 0; i < MAX_MB_PLANE; ++i) | 521 for (i = 0; i < MAX_MB_PLANE; ++i) |
513 optimize_init_b(i, bsize, &arg); | 522 optimize_init_b(i, bsize, &arg); |
514 } | 523 } |
515 | 524 |
516 foreach_transformed_block(xd, bsize, encode_block, &arg); | 525 foreach_transformed_block(xd, bsize, encode_block, &arg); |
517 } | 526 } |
518 | 527 |
519 void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, | 528 void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, |
520 TX_SIZE tx_size, void *arg) { | 529 TX_SIZE tx_size, void *arg) { |
521 struct encode_b_args* const args = arg; | 530 struct encode_b_args* const args = arg; |
522 MACROBLOCK *const x = args->x; | 531 MACROBLOCK *const x = args->x; |
523 MACROBLOCKD *const xd = &x->e_mbd; | 532 MACROBLOCKD *const xd = &x->e_mbd; |
524 MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; | 533 MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; |
525 struct macroblock_plane *const p = &x->plane[plane]; | 534 struct macroblock_plane *const p = &x->plane[plane]; |
526 struct macroblockd_plane *const pd = &xd->plane[plane]; | 535 struct macroblockd_plane *const pd = &xd->plane[plane]; |
527 int16_t *coeff = BLOCK_OFFSET(p->coeff, block); | 536 int16_t *coeff = BLOCK_OFFSET(p->coeff, block); |
528 int16_t *qcoeff = BLOCK_OFFSET(pd->qcoeff, block); | 537 int16_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block); |
529 int16_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); | 538 int16_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); |
530 const int16_t *scan, *iscan; | 539 const scan_order *scan_order; |
531 TX_TYPE tx_type; | 540 TX_TYPE tx_type; |
532 MB_PREDICTION_MODE mode; | 541 MB_PREDICTION_MODE mode; |
533 const int bwl = b_width_log2(plane_bsize), bw = 1 << bwl; | 542 const int bwl = b_width_log2(plane_bsize); |
534 const int twl = bwl - tx_size, twmask = (1 << twl) - 1; | 543 const int diff_stride = 4 * (1 << bwl); |
535 int xoff, yoff; | |
536 uint8_t *src, *dst; | 544 uint8_t *src, *dst; |
537 int16_t *src_diff; | 545 int16_t *src_diff; |
538 uint16_t *eob = &pd->eobs[block]; | 546 uint16_t *eob = &p->eobs[block]; |
| 547 int i, j; |
| 548 txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j); |
| 549 dst = &pd->dst.buf[4 * (j * pd->dst.stride + i)]; |
| 550 src = &p->src.buf[4 * (j * p->src.stride + i)]; |
| 551 src_diff = &p->src_diff[4 * (j * diff_stride + i)]; |
539 | 552 |
540 if (xd->mb_to_right_edge < 0 || xd->mb_to_bottom_edge < 0) | 553 if (xd->mb_to_right_edge < 0 || xd->mb_to_bottom_edge < 0) |
541 extend_for_intra(xd, plane_bsize, plane, block, tx_size); | 554 extend_for_intra(xd, plane_bsize, plane, i, j); |
542 | 555 |
543 // if (x->optimize) | 556 // if (x->optimize) |
544 // vp9_optimize_b(plane, block, plane_bsize, tx_size, x, args->ctx); | 557 // vp9_optimize_b(plane, block, plane_bsize, tx_size, x, args->ctx); |
545 | 558 |
546 switch (tx_size) { | 559 switch (tx_size) { |
547 case TX_32X32: | 560 case TX_32X32: |
548 scan = vp9_default_scan_32x32; | 561 scan_order = &vp9_default_scan_orders[TX_32X32]; |
549 iscan = vp9_default_iscan_32x32; | |
550 mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; | 562 mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; |
551 block >>= 6; | 563 vp9_predict_intra_block(xd, block >> 6, bwl, TX_32X32, mode, |
552 xoff = 32 * (block & twmask); | 564 x->skip_encode ? src : dst, |
553 yoff = 32 * (block >> twl); | 565 x->skip_encode ? p->src.stride : pd->dst.stride, |
554 dst = pd->dst.buf + yoff * pd->dst.stride + xoff; | 566 dst, pd->dst.stride); |
555 src = p->src.buf + yoff * p->src.stride + xoff; | 567 if (!x->skip_recode) { |
556 src_diff = p->src_diff + 4 * bw * yoff + xoff; | 568 vp9_subtract_block(32, 32, src_diff, diff_stride, |
557 vp9_predict_intra_block(xd, block, bwl, TX_32X32, mode, | 569 src, p->src.stride, dst, pd->dst.stride); |
558 dst, pd->dst.stride, dst, pd->dst.stride); | 570 if (x->use_lp32x32fdct) |
559 vp9_subtract_block(32, 32, src_diff, bw * 4, | 571 vp9_fdct32x32_rd(src_diff, coeff, diff_stride); |
560 src, p->src.stride, dst, pd->dst.stride); | 572 else |
561 if (x->use_lp32x32fdct) | 573 vp9_fdct32x32(src_diff, coeff, diff_stride); |
562 vp9_fdct32x32_rd(src_diff, coeff, bw * 4); | 574 vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round, |
563 else | 575 p->quant, p->quant_shift, qcoeff, dqcoeff, |
564 vp9_fdct32x32(src_diff, coeff, bw * 4); | 576 pd->dequant, p->zbin_extra, eob, scan_order->scan, |
565 vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round, | 577 scan_order->iscan); |
566 p->quant, p->quant_shift, qcoeff, dqcoeff, | 578 } |
567 pd->dequant, p->zbin_extra, eob, scan, iscan); | |
568 if (!x->skip_encode && *eob) | 579 if (!x->skip_encode && *eob) |
569 vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, *eob); | 580 vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, *eob); |
570 break; | 581 break; |
571 case TX_16X16: | 582 case TX_16X16: |
572 tx_type = get_tx_type_16x16(pd->plane_type, xd); | 583 tx_type = get_tx_type_16x16(pd->plane_type, xd); |
573 scan = get_scan_16x16(tx_type); | 584 scan_order = &vp9_scan_orders[TX_16X16][tx_type]; |
574 iscan = get_iscan_16x16(tx_type); | |
575 mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; | 585 mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; |
576 block >>= 4; | 586 vp9_predict_intra_block(xd, block >> 4, bwl, TX_16X16, mode, |
577 xoff = 16 * (block & twmask); | 587 x->skip_encode ? src : dst, |
578 yoff = 16 * (block >> twl); | 588 x->skip_encode ? p->src.stride : pd->dst.stride, |
579 dst = pd->dst.buf + yoff * pd->dst.stride + xoff; | 589 dst, pd->dst.stride); |
580 src = p->src.buf + yoff * p->src.stride + xoff; | 590 if (!x->skip_recode) { |
581 src_diff = p->src_diff + 4 * bw * yoff + xoff; | 591 vp9_subtract_block(16, 16, src_diff, diff_stride, |
582 vp9_predict_intra_block(xd, block, bwl, TX_16X16, mode, | 592 src, p->src.stride, dst, pd->dst.stride); |
583 dst, pd->dst.stride, dst, pd->dst.stride); | 593 vp9_fht16x16(tx_type, src_diff, coeff, diff_stride); |
584 vp9_subtract_block(16, 16, src_diff, bw * 4, | 594 vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, |
585 src, p->src.stride, dst, pd->dst.stride); | 595 p->quant, p->quant_shift, qcoeff, dqcoeff, |
586 vp9_fht16x16(tx_type, src_diff, coeff, bw * 4); | 596 pd->dequant, p->zbin_extra, eob, scan_order->scan, |
587 vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, | 597 scan_order->iscan); |
588 p->quant, p->quant_shift, qcoeff, dqcoeff, | 598 } |
589 pd->dequant, p->zbin_extra, eob, scan, iscan); | |
590 if (!x->skip_encode && *eob) | 599 if (!x->skip_encode && *eob) |
591 vp9_iht16x16_add(tx_type, dqcoeff, dst, pd->dst.stride, *eob); | 600 vp9_iht16x16_add(tx_type, dqcoeff, dst, pd->dst.stride, *eob); |
592 break; | 601 break; |
593 case TX_8X8: | 602 case TX_8X8: |
594 tx_type = get_tx_type_8x8(pd->plane_type, xd); | 603 tx_type = get_tx_type_8x8(pd->plane_type, xd); |
595 scan = get_scan_8x8(tx_type); | 604 scan_order = &vp9_scan_orders[TX_8X8][tx_type]; |
596 iscan = get_iscan_8x8(tx_type); | |
597 mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; | 605 mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; |
598 block >>= 2; | 606 vp9_predict_intra_block(xd, block >> 2, bwl, TX_8X8, mode, |
599 xoff = 8 * (block & twmask); | 607 x->skip_encode ? src : dst, |
600 yoff = 8 * (block >> twl); | 608 x->skip_encode ? p->src.stride : pd->dst.stride, |
601 dst = pd->dst.buf + yoff * pd->dst.stride + xoff; | 609 dst, pd->dst.stride); |
602 src = p->src.buf + yoff * p->src.stride + xoff; | 610 if (!x->skip_recode) { |
603 src_diff = p->src_diff + 4 * bw * yoff + xoff; | 611 vp9_subtract_block(8, 8, src_diff, diff_stride, |
604 vp9_predict_intra_block(xd, block, bwl, TX_8X8, mode, | 612 src, p->src.stride, dst, pd->dst.stride); |
605 dst, pd->dst.stride, dst, pd->dst.stride); | 613 vp9_fht8x8(tx_type, src_diff, coeff, diff_stride); |
606 vp9_subtract_block(8, 8, src_diff, bw * 4, | 614 vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant, |
607 src, p->src.stride, dst, pd->dst.stride); | 615 p->quant_shift, qcoeff, dqcoeff, |
608 vp9_fht8x8(tx_type, src_diff, coeff, bw * 4); | 616 pd->dequant, p->zbin_extra, eob, scan_order->scan, |
609 vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant, | 617 scan_order->iscan); |
610 p->quant_shift, qcoeff, dqcoeff, | 618 } |
611 pd->dequant, p->zbin_extra, eob, scan, iscan); | |
612 if (!x->skip_encode && *eob) | 619 if (!x->skip_encode && *eob) |
613 vp9_iht8x8_add(tx_type, dqcoeff, dst, pd->dst.stride, *eob); | 620 vp9_iht8x8_add(tx_type, dqcoeff, dst, pd->dst.stride, *eob); |
614 break; | 621 break; |
615 case TX_4X4: | 622 case TX_4X4: |
616 tx_type = get_tx_type_4x4(pd->plane_type, xd, block); | 623 tx_type = get_tx_type_4x4(pd->plane_type, xd, block); |
617 scan = get_scan_4x4(tx_type); | 624 scan_order = &vp9_scan_orders[TX_4X4][tx_type]; |
618 iscan = get_iscan_4x4(tx_type); | |
619 if (mbmi->sb_type < BLOCK_8X8 && plane == 0) | 625 if (mbmi->sb_type < BLOCK_8X8 && plane == 0) |
620 mode = xd->mi_8x8[0]->bmi[block].as_mode; | 626 mode = xd->mi_8x8[0]->bmi[block].as_mode; |
621 else | 627 else |
622 mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; | 628 mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; |
623 | 629 |
624 xoff = 4 * (block & twmask); | |
625 yoff = 4 * (block >> twl); | |
626 dst = pd->dst.buf + yoff * pd->dst.stride + xoff; | |
627 src = p->src.buf + yoff * p->src.stride + xoff; | |
628 src_diff = p->src_diff + 4 * bw * yoff + xoff; | |
629 vp9_predict_intra_block(xd, block, bwl, TX_4X4, mode, | 630 vp9_predict_intra_block(xd, block, bwl, TX_4X4, mode, |
630 dst, pd->dst.stride, dst, pd->dst.stride); | 631 x->skip_encode ? src : dst, |
631 vp9_subtract_block(4, 4, src_diff, bw * 4, | 632 x->skip_encode ? p->src.stride : pd->dst.stride, |
632 src, p->src.stride, dst, pd->dst.stride); | 633 dst, pd->dst.stride); |
633 if (tx_type != DCT_DCT) | 634 |
634 vp9_short_fht4x4(src_diff, coeff, bw * 4, tx_type); | 635 if (!x->skip_recode) { |
635 else | 636 vp9_subtract_block(4, 4, src_diff, diff_stride, |
636 x->fwd_txm4x4(src_diff, coeff, bw * 4); | 637 src, p->src.stride, dst, pd->dst.stride); |
637 vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant, | 638 if (tx_type != DCT_DCT) |
638 p->quant_shift, qcoeff, dqcoeff, | 639 vp9_short_fht4x4(src_diff, coeff, diff_stride, tx_type); |
639 pd->dequant, p->zbin_extra, eob, scan, iscan); | 640 else |
| 641 x->fwd_txm4x4(src_diff, coeff, diff_stride); |
| 642 vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant, |
| 643 p->quant_shift, qcoeff, dqcoeff, |
| 644 pd->dequant, p->zbin_extra, eob, scan_order->scan, |
| 645 scan_order->iscan); |
| 646 } |
| 647 |
640 if (!x->skip_encode && *eob) { | 648 if (!x->skip_encode && *eob) { |
641 if (tx_type == DCT_DCT) | 649 if (tx_type == DCT_DCT) |
642 // this is like vp9_short_idct4x4 but has a special case around eob<=1 | 650 // this is like vp9_short_idct4x4 but has a special case around eob<=1 |
643 // which is significant (not just an optimization) for the lossless | 651 // which is significant (not just an optimization) for the lossless |
644 // case. | 652 // case. |
645 xd->itxm_add(dqcoeff, dst, pd->dst.stride, *eob); | 653 xd->itxm_add(dqcoeff, dst, pd->dst.stride, *eob); |
646 else | 654 else |
647 vp9_iht4x4_16_add(dqcoeff, dst, pd->dst.stride, tx_type); | 655 vp9_iht4x4_16_add(dqcoeff, dst, pd->dst.stride, tx_type); |
648 } | 656 } |
649 break; | 657 break; |
(...skipping 10 matching lines...) Expand all Loading... |
660 foreach_transformed_block_in_plane(xd, bsize, 0, vp9_encode_block_intra, | 668 foreach_transformed_block_in_plane(xd, bsize, 0, vp9_encode_block_intra, |
661 &arg); | 669 &arg); |
662 } | 670 } |
663 void vp9_encode_intra_block_uv(MACROBLOCK *x, BLOCK_SIZE bsize) { | 671 void vp9_encode_intra_block_uv(MACROBLOCK *x, BLOCK_SIZE bsize) { |
664 MACROBLOCKD* const xd = &x->e_mbd; | 672 MACROBLOCKD* const xd = &x->e_mbd; |
665 struct optimize_ctx ctx; | 673 struct optimize_ctx ctx; |
666 struct encode_b_args arg = {x, &ctx}; | 674 struct encode_b_args arg = {x, &ctx}; |
667 foreach_transformed_block_uv(xd, bsize, vp9_encode_block_intra, &arg); | 675 foreach_transformed_block_uv(xd, bsize, vp9_encode_block_intra, &arg); |
668 } | 676 } |
669 | 677 |
| 678 int vp9_encode_intra(MACROBLOCK *x, int use_16x16_pred) { |
| 679 MB_MODE_INFO * mbmi = &x->e_mbd.mi_8x8[0]->mbmi; |
| 680 x->skip_encode = 0; |
| 681 mbmi->mode = DC_PRED; |
| 682 mbmi->ref_frame[0] = INTRA_FRAME; |
| 683 mbmi->tx_size = use_16x16_pred ? (mbmi->sb_type >= BLOCK_16X16 ? TX_16X16 |
| 684 : TX_8X8) |
| 685 : TX_4X4; |
| 686 vp9_encode_intra_block_y(x, mbmi->sb_type); |
| 687 return vp9_get_mb_ss(x->plane[0].src_diff); |
| 688 } |
OLD | NEW |