OLD | NEW |
1 /* | 1 /* |
2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder | 2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder |
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> | 3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> |
4 * | 4 * |
5 * This file is part of FFmpeg. | 5 * This file is part of FFmpeg. |
6 * | 6 * |
7 * FFmpeg is free software; you can redistribute it and/or | 7 * FFmpeg is free software; you can redistribute it and/or |
8 * modify it under the terms of the GNU Lesser General Public | 8 * modify it under the terms of the GNU Lesser General Public |
9 * License as published by the Free Software Foundation; either | 9 * License as published by the Free Software Foundation; either |
10 * version 2.1 of the License, or (at your option) any later version. | 10 * version 2.1 of the License, or (at your option) any later version. |
(...skipping 10 matching lines...) Expand all Loading... |
21 | 21 |
22 /** | 22 /** |
23 * @file libavcodec/h264.h | 23 * @file libavcodec/h264.h |
24 * H.264 / AVC / MPEG4 part10 codec. | 24 * H.264 / AVC / MPEG4 part10 codec. |
25 * @author Michael Niedermayer <michaelni@gmx.at> | 25 * @author Michael Niedermayer <michaelni@gmx.at> |
26 */ | 26 */ |
27 | 27 |
28 #ifndef AVCODEC_H264_H | 28 #ifndef AVCODEC_H264_H |
29 #define AVCODEC_H264_H | 29 #define AVCODEC_H264_H |
30 | 30 |
| 31 #include "libavutil/intreadwrite.h" |
31 #include "dsputil.h" | 32 #include "dsputil.h" |
32 #include "cabac.h" | 33 #include "cabac.h" |
33 #include "mpegvideo.h" | 34 #include "mpegvideo.h" |
34 #include "h264pred.h" | 35 #include "h264pred.h" |
35 #include "rectangle.h" | 36 #include "rectangle.h" |
36 | 37 |
37 #define interlaced_dct interlaced_dct_is_a_bad_name | 38 #define interlaced_dct interlaced_dct_is_a_bad_name |
38 #define mb_intra mb_intra_is_not_initialized_see_mb_type | 39 #define mb_intra mb_intra_is_not_initialized_see_mb_type |
39 | 40 |
40 #define LUMA_DC_BLOCK_INDEX 25 | 41 #define LUMA_DC_BLOCK_INDEX 25 |
(...skipping 12 matching lines...) Expand all Loading... |
53 #define MAX_MMCO_COUNT 66 | 54 #define MAX_MMCO_COUNT 66 |
54 | 55 |
55 #define MAX_DELAYED_PIC_COUNT 16 | 56 #define MAX_DELAYED_PIC_COUNT 16 |
56 | 57 |
57 /* Compiling in interlaced support reduces the speed | 58 /* Compiling in interlaced support reduces the speed |
58 * of progressive decoding by about 2%. */ | 59 * of progressive decoding by about 2%. */ |
59 #define ALLOW_INTERLACE | 60 #define ALLOW_INTERLACE |
60 | 61 |
61 #define ALLOW_NOCHROMA | 62 #define ALLOW_NOCHROMA |
62 | 63 |
| 64 #define FMO 0 |
| 65 |
63 /** | 66 /** |
64 * The maximum number of slices supported by the decoder. | 67 * The maximum number of slices supported by the decoder. |
65 * must be a power of 2 | 68 * must be a power of 2 |
66 */ | 69 */ |
67 #define MAX_SLICES 16 | 70 #define MAX_SLICES 16 |
68 | 71 |
69 #ifdef ALLOW_INTERLACE | 72 #ifdef ALLOW_INTERLACE |
70 #define MB_MBAFF h->mb_mbaff | 73 #define MB_MBAFF h->mb_mbaff |
71 #define MB_FIELD h->mb_field_decoding_flag | 74 #define MB_FIELD h->mb_field_decoding_flag |
72 #define FRAME_MBAFF h->mb_aff_frame | 75 #define FRAME_MBAFF h->mb_aff_frame |
(...skipping 179 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
252 MMCOOpcode opcode; | 255 MMCOOpcode opcode; |
253 int short_pic_num; ///< pic_num without wrapping (pic_num & max_pic_num) | 256 int short_pic_num; ///< pic_num without wrapping (pic_num & max_pic_num) |
254 int long_arg; ///< index, pic_num, or num long refs depending on opcod
e | 257 int long_arg; ///< index, pic_num, or num long refs depending on opcod
e |
255 } MMCO; | 258 } MMCO; |
256 | 259 |
257 /** | 260 /** |
258 * H264Context | 261 * H264Context |
259 */ | 262 */ |
260 typedef struct H264Context{ | 263 typedef struct H264Context{ |
261 MpegEncContext s; | 264 MpegEncContext s; |
262 int nal_ref_idc; | |
263 int nal_unit_type; | |
264 uint8_t *rbsp_buffer[2]; | |
265 unsigned int rbsp_buffer_size[2]; | |
266 | |
267 /** | |
268 * Used to parse AVC variant of h264 | |
269 */ | |
270 int is_avc; ///< this flag is != 0 if codec is avc1 | |
271 int got_avcC; ///< flag used to parse avcC data only once | |
272 int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4) | |
273 | |
274 int chroma_qp[2]; //QPc | 265 int chroma_qp[2]; //QPc |
275 | 266 |
276 int qp_thresh; ///< QP threshold to skip loopfilter | 267 int qp_thresh; ///< QP threshold to skip loopfilter |
277 | 268 |
278 int prev_mb_skipped; | 269 int prev_mb_skipped; |
279 int next_mb_skipped; | 270 int next_mb_skipped; |
280 | 271 |
281 //prediction stuff | 272 //prediction stuff |
282 int chroma_pred_mode; | 273 int chroma_pred_mode; |
283 int intra16x16_pred_mode; | 274 int intra16x16_pred_mode; |
284 | 275 |
| 276 int topleft_mb_xy; |
285 int top_mb_xy; | 277 int top_mb_xy; |
| 278 int topright_mb_xy; |
286 int left_mb_xy[2]; | 279 int left_mb_xy[2]; |
287 | 280 |
| 281 int topleft_type; |
288 int top_type; | 282 int top_type; |
| 283 int topright_type; |
289 int left_type[2]; | 284 int left_type[2]; |
290 | 285 |
| 286 const uint8_t * left_block; |
| 287 int topleft_partition; |
| 288 |
291 int8_t intra4x4_pred_mode_cache[5*8]; | 289 int8_t intra4x4_pred_mode_cache[5*8]; |
292 int8_t (*intra4x4_pred_mode)[8]; | 290 int8_t (*intra4x4_pred_mode); |
293 H264PredContext hpc; | 291 H264PredContext hpc; |
294 unsigned int topleft_samples_available; | 292 unsigned int topleft_samples_available; |
295 unsigned int top_samples_available; | 293 unsigned int top_samples_available; |
296 unsigned int topright_samples_available; | 294 unsigned int topright_samples_available; |
297 unsigned int left_samples_available; | 295 unsigned int left_samples_available; |
298 uint8_t (*top_borders[2])[16+2*8]; | 296 uint8_t (*top_borders[2])[16+2*8]; |
299 uint8_t left_border[2*(17+2*9)]; | |
300 | 297 |
301 /** | 298 /** |
302 * non zero coeff count cache. | 299 * non zero coeff count cache. |
303 * is 64 if not available. | 300 * is 64 if not available. |
304 */ | 301 */ |
305 DECLARE_ALIGNED_8(uint8_t, non_zero_count_cache)[6*8]; | 302 DECLARE_ALIGNED(8, uint8_t, non_zero_count_cache)[6*8]; |
306 | 303 |
307 /* | 304 /* |
308 .UU.YYYY | 305 .UU.YYYY |
309 .UU.YYYY | 306 .UU.YYYY |
310 .vv.YYYY | 307 .vv.YYYY |
311 .VV.YYYY | 308 .VV.YYYY |
312 */ | 309 */ |
313 uint8_t (*non_zero_count)[32]; | 310 uint8_t (*non_zero_count)[32]; |
314 | 311 |
315 /** | 312 /** |
316 * Motion vector cache. | 313 * Motion vector cache. |
317 */ | 314 */ |
318 DECLARE_ALIGNED_16(int16_t, mv_cache)[2][5*8][2]; | 315 DECLARE_ALIGNED(16, int16_t, mv_cache)[2][5*8][2]; |
319 DECLARE_ALIGNED_8(int8_t, ref_cache)[2][5*8]; | 316 DECLARE_ALIGNED(8, int8_t, ref_cache)[2][5*8]; |
320 #define LIST_NOT_USED -1 //FIXME rename? | 317 #define LIST_NOT_USED -1 //FIXME rename? |
321 #define PART_NOT_AVAILABLE -2 | 318 #define PART_NOT_AVAILABLE -2 |
322 | 319 |
323 /** | 320 /** |
324 * is 1 if the specific list MV&references are set to 0,0,-2. | 321 * is 1 if the specific list MV&references are set to 0,0,-2. |
325 */ | 322 */ |
326 int mv_cache_clean[2]; | 323 int mv_cache_clean[2]; |
327 | 324 |
328 /** | 325 /** |
329 * number of neighbors (top and/or left) that used 8x8 dct | 326 * number of neighbors (top and/or left) that used 8x8 dct |
330 */ | 327 */ |
331 int neighbor_transform_size; | 328 int neighbor_transform_size; |
332 | 329 |
333 /** | 330 /** |
334 * block_offset[ 0..23] for frame macroblocks | 331 * block_offset[ 0..23] for frame macroblocks |
335 * block_offset[24..47] for field macroblocks | 332 * block_offset[24..47] for field macroblocks |
336 */ | 333 */ |
337 int block_offset[2*(16+8)]; | 334 int block_offset[2*(16+8)]; |
338 | 335 |
339 uint32_t *mb2b_xy; //FIXME are these 4 a good idea? | 336 uint32_t *mb2b_xy; //FIXME are these 4 a good idea? |
340 uint32_t *mb2b8_xy; | 337 uint32_t *mb2br_xy; |
341 int b_stride; //FIXME use s->b4_stride | 338 int b_stride; //FIXME use s->b4_stride |
342 int b8_stride; | |
343 | 339 |
344 int mb_linesize; ///< may be equal to s->linesize or s->linesize*2, for mb
aff | 340 int mb_linesize; ///< may be equal to s->linesize or s->linesize*2, for mb
aff |
345 int mb_uvlinesize; | 341 int mb_uvlinesize; |
346 | 342 |
347 int emu_edge_width; | 343 int emu_edge_width; |
348 int emu_edge_height; | 344 int emu_edge_height; |
349 | 345 |
350 int halfpel_flag; | |
351 int thirdpel_flag; | |
352 | |
353 int unknown_svq3_flag; | |
354 int next_slice_index; | |
355 | |
356 SPS *sps_buffers[MAX_SPS_COUNT]; | |
357 SPS sps; ///< current sps | 346 SPS sps; ///< current sps |
358 | 347 |
359 PPS *pps_buffers[MAX_PPS_COUNT]; | |
360 /** | 348 /** |
361 * current pps | 349 * current pps |
362 */ | 350 */ |
363 PPS pps; //FIXME move to Picture perhaps? (->no) do we need that? | 351 PPS pps; //FIXME move to Picture perhaps? (->no) do we need that? |
364 | 352 |
365 uint32_t dequant4_buffer[6][52][16]; | 353 uint32_t dequant4_buffer[6][52][16]; //FIXME should these be moved down? |
366 uint32_t dequant8_buffer[2][52][64]; | 354 uint32_t dequant8_buffer[2][52][64]; |
367 uint32_t (*dequant4_coeff[6])[16]; | 355 uint32_t (*dequant4_coeff[6])[16]; |
368 uint32_t (*dequant8_coeff[2])[64]; | 356 uint32_t (*dequant8_coeff[2])[64]; |
369 int dequant_coeff_pps; ///< reinit tables when pps changes | |
370 | 357 |
371 int slice_num; | 358 int slice_num; |
372 uint16_t *slice_table_base; | |
373 uint16_t *slice_table; ///< slice_table_base + 2*mb_stride + 1 | 359 uint16_t *slice_table; ///< slice_table_base + 2*mb_stride + 1 |
374 int slice_type; | 360 int slice_type; |
375 int slice_type_nos; ///< S free slice type (SI/SP are remapped to I/P
) | 361 int slice_type_nos; ///< S free slice type (SI/SP are remapped to I/P
) |
376 int slice_type_fixed; | 362 int slice_type_fixed; |
377 | 363 |
378 //interlacing specific flags | 364 //interlacing specific flags |
379 int mb_aff_frame; | 365 int mb_aff_frame; |
380 int mb_field_decoding_flag; | 366 int mb_field_decoding_flag; |
381 int mb_mbaff; ///< mb_aff_frame && mb_field_decoding_flag | 367 int mb_mbaff; ///< mb_aff_frame && mb_field_decoding_flag |
382 | 368 |
383 DECLARE_ALIGNED_8(uint16_t, sub_mb_type)[4]; | 369 DECLARE_ALIGNED(8, uint16_t, sub_mb_type)[4]; |
384 | |
385 //POC stuff | |
386 int poc_lsb; | |
387 int poc_msb; | |
388 int delta_poc_bottom; | |
389 int delta_poc[2]; | |
390 int frame_num; | |
391 int prev_poc_msb; ///< poc_msb of the last reference pic for POC
type 0 | |
392 int prev_poc_lsb; ///< poc_lsb of the last reference pic for POC
type 0 | |
393 int frame_num_offset; ///< for POC type 2 | |
394 int prev_frame_num_offset; ///< for POC type 2 | |
395 int prev_frame_num; ///< frame_num of the last pic for POC type 1/
2 | |
396 | |
397 /** | |
398 * frame_num for frames or 2*frame_num+1 for field pics. | |
399 */ | |
400 int curr_pic_num; | |
401 | |
402 /** | |
403 * max_frame_num or 2*max_frame_num for field pics. | |
404 */ | |
405 int max_pic_num; | |
406 | 370 |
407 //Weighted pred stuff | 371 //Weighted pred stuff |
408 int use_weight; | 372 int use_weight; |
409 int use_weight_chroma; | 373 int use_weight_chroma; |
410 int luma_log2_weight_denom; | 374 int luma_log2_weight_denom; |
411 int chroma_log2_weight_denom; | 375 int chroma_log2_weight_denom; |
412 int luma_weight[2][48]; | 376 //The following 2 can be changed to int8_t but that causes 10cpu cycles spee
dloss |
413 int luma_offset[2][48]; | 377 int luma_weight[48][2][2]; |
414 int chroma_weight[2][48][2]; | 378 int chroma_weight[48][2][2][2]; |
415 int chroma_offset[2][48][2]; | |
416 int implicit_weight[48][48]; | 379 int implicit_weight[48][48]; |
417 | 380 |
418 //deblock | |
419 int deblocking_filter; ///< disable_deblocking_filter_idc with 1<->0 | |
420 int slice_alpha_c0_offset; | |
421 int slice_beta_offset; | |
422 | |
423 int redundant_pic_count; | |
424 | |
425 int direct_spatial_mv_pred; | 381 int direct_spatial_mv_pred; |
| 382 int col_parity; |
| 383 int col_fieldoff; |
426 int dist_scale_factor[16]; | 384 int dist_scale_factor[16]; |
427 int dist_scale_factor_field[2][32]; | 385 int dist_scale_factor_field[2][32]; |
428 int map_col_to_list0[2][16+32]; | 386 int map_col_to_list0[2][16+32]; |
429 int map_col_to_list0_field[2][2][16+32]; | 387 int map_col_to_list0_field[2][2][16+32]; |
430 | 388 |
431 /** | 389 /** |
432 * num_ref_idx_l0/1_active_minus1 + 1 | 390 * num_ref_idx_l0/1_active_minus1 + 1 |
433 */ | 391 */ |
434 uint8_t *list_counts; ///< Array of list_count per MB specifying
the slice type | 392 uint8_t *list_counts; ///< Array of list_count per MB specifying
the slice type |
435 unsigned int ref_count[2]; ///< counts frames or fields, depending on curr
ent mb mode | 393 unsigned int ref_count[2]; ///< counts frames or fields, depending on curr
ent mb mode |
436 unsigned int list_count; | 394 unsigned int list_count; |
437 Picture *short_ref[32]; | |
438 Picture *long_ref[32]; | |
439 Picture default_ref_list[2][32]; ///< base reference list for all slices of
a coded picture | |
440 Picture ref_list[2][48]; /**< 0..15: frame refs, 16..47: mbaff field
refs. | 395 Picture ref_list[2][48]; /**< 0..15: frame refs, 16..47: mbaff field
refs. |
441 Reordered version of default_ref_list | 396 Reordered version of default_ref_list |
442 according to picture reordering in sli
ce header */ | 397 according to picture reordering in sli
ce header */ |
443 int ref2frm[MAX_SLICES][2][64]; ///< reference to frame number lists, used
in the loop filter, the first 2 are for -2,-1 | 398 int ref2frm[MAX_SLICES][2][64]; ///< reference to frame number lists, used
in the loop filter, the first 2 are for -2,-1 |
444 Picture *delayed_pic[MAX_DELAYED_PIC_COUNT+2]; //FIXME size? | |
445 Picture *next_output_pic; | |
446 int outputed_poc; | |
447 int next_outputed_poc; | |
448 | |
449 /** | |
450 * memory management control operations buffer. | |
451 */ | |
452 MMCO mmco[MAX_MMCO_COUNT]; | |
453 int mmco_index; | |
454 | |
455 int long_ref_count; ///< number of actual long term references | |
456 int short_ref_count; ///< number of actual short term references | |
457 | 399 |
458 //data partitioning | 400 //data partitioning |
459 GetBitContext intra_gb; | 401 GetBitContext intra_gb; |
460 GetBitContext inter_gb; | 402 GetBitContext inter_gb; |
461 GetBitContext *intra_gb_ptr; | 403 GetBitContext *intra_gb_ptr; |
462 GetBitContext *inter_gb_ptr; | 404 GetBitContext *inter_gb_ptr; |
463 | 405 |
464 DECLARE_ALIGNED_16(DCTELEM, mb)[16*24]; | 406 DECLARE_ALIGNED(16, DCTELEM, mb)[16*24]; |
465 DCTELEM mb_padding[256]; ///< as mb is addressed by scantable[i] and
scantable is uint8_t we can either check that i is not too large or ensure that
there is some unused stuff after mb | 407 DCTELEM mb_padding[256]; ///< as mb is addressed by scantable[i] and
scantable is uint8_t we can either check that i is not too large or ensure that
there is some unused stuff after mb |
466 | 408 |
467 /** | 409 /** |
468 * Cabac | 410 * Cabac |
469 */ | 411 */ |
470 CABACContext cabac; | 412 CABACContext cabac; |
471 uint8_t cabac_state[460]; | 413 uint8_t cabac_state[460]; |
472 int cabac_init_idc; | |
473 | 414 |
474 /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0
-> chroma_cbp(0,1,2), 0x0? luma_cbp */ | 415 /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0
-> chroma_cbp(0,1,2), 0x0? luma_cbp */ |
475 uint16_t *cbp_table; | 416 uint16_t *cbp_table; |
476 int cbp; | 417 int cbp; |
477 int top_cbp; | 418 int top_cbp; |
478 int left_cbp; | 419 int left_cbp; |
479 /* chroma_pred_mode for i4x4 or i16x16, else 0 */ | 420 /* chroma_pred_mode for i4x4 or i16x16, else 0 */ |
480 uint8_t *chroma_pred_mode_table; | 421 uint8_t *chroma_pred_mode_table; |
481 int last_qscale_diff; | 422 int last_qscale_diff; |
482 int16_t (*mvd_table[2])[2]; | 423 uint8_t (*mvd_table[2])[2]; |
483 DECLARE_ALIGNED_16(int16_t, mvd_cache)[2][5*8][2]; | 424 DECLARE_ALIGNED(16, uint8_t, mvd_cache)[2][5*8][2]; |
484 uint8_t *direct_table; | 425 uint8_t *direct_table; |
485 uint8_t direct_cache[5*8]; | 426 uint8_t direct_cache[5*8]; |
486 | 427 |
487 uint8_t zigzag_scan[16]; | 428 uint8_t zigzag_scan[16]; |
488 uint8_t zigzag_scan8x8[64]; | 429 uint8_t zigzag_scan8x8[64]; |
489 uint8_t zigzag_scan8x8_cavlc[64]; | 430 uint8_t zigzag_scan8x8_cavlc[64]; |
490 uint8_t field_scan[16]; | 431 uint8_t field_scan[16]; |
491 uint8_t field_scan8x8[64]; | 432 uint8_t field_scan8x8[64]; |
492 uint8_t field_scan8x8_cavlc[64]; | 433 uint8_t field_scan8x8_cavlc[64]; |
493 const uint8_t *zigzag_scan_q0; | 434 const uint8_t *zigzag_scan_q0; |
494 const uint8_t *zigzag_scan8x8_q0; | 435 const uint8_t *zigzag_scan8x8_q0; |
495 const uint8_t *zigzag_scan8x8_cavlc_q0; | 436 const uint8_t *zigzag_scan8x8_cavlc_q0; |
496 const uint8_t *field_scan_q0; | 437 const uint8_t *field_scan_q0; |
497 const uint8_t *field_scan8x8_q0; | 438 const uint8_t *field_scan8x8_q0; |
498 const uint8_t *field_scan8x8_cavlc_q0; | 439 const uint8_t *field_scan8x8_cavlc_q0; |
499 | 440 |
500 int x264_build; | 441 int x264_build; |
501 | 442 |
| 443 int mb_xy; |
| 444 |
| 445 int is_complex; |
| 446 |
| 447 //deblock |
| 448 int deblocking_filter; ///< disable_deblocking_filter_idc with 1<->0 |
| 449 int slice_alpha_c0_offset; |
| 450 int slice_beta_offset; |
| 451 |
| 452 //============================================================= |
| 453 //Things below are not used in the MB or more inner code |
| 454 |
| 455 int nal_ref_idc; |
| 456 int nal_unit_type; |
| 457 uint8_t *rbsp_buffer[2]; |
| 458 unsigned int rbsp_buffer_size[2]; |
| 459 |
| 460 /** |
| 461 * Used to parse AVC variant of h264 |
| 462 */ |
| 463 int is_avc; ///< this flag is != 0 if codec is avc1 |
| 464 int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4) |
| 465 |
| 466 SPS *sps_buffers[MAX_SPS_COUNT]; |
| 467 PPS *pps_buffers[MAX_PPS_COUNT]; |
| 468 |
| 469 int dequant_coeff_pps; ///< reinit tables when pps changes |
| 470 |
| 471 uint16_t *slice_table_base; |
| 472 |
| 473 |
| 474 //POC stuff |
| 475 int poc_lsb; |
| 476 int poc_msb; |
| 477 int delta_poc_bottom; |
| 478 int delta_poc[2]; |
| 479 int frame_num; |
| 480 int prev_poc_msb; ///< poc_msb of the last reference pic for POC
type 0 |
| 481 int prev_poc_lsb; ///< poc_lsb of the last reference pic for POC
type 0 |
| 482 int frame_num_offset; ///< for POC type 2 |
| 483 int prev_frame_num_offset; ///< for POC type 2 |
| 484 int prev_frame_num; ///< frame_num of the last pic for POC type 1/
2 |
| 485 |
| 486 /** |
| 487 * frame_num for frames or 2*frame_num+1 for field pics. |
| 488 */ |
| 489 int curr_pic_num; |
| 490 |
| 491 /** |
| 492 * max_frame_num or 2*max_frame_num for field pics. |
| 493 */ |
| 494 int max_pic_num; |
| 495 |
| 496 int redundant_pic_count; |
| 497 |
| 498 Picture *short_ref[32]; |
| 499 Picture *long_ref[32]; |
| 500 Picture default_ref_list[2][32]; ///< base reference list for all slices of
a coded picture |
| 501 Picture *delayed_pic[MAX_DELAYED_PIC_COUNT+2]; //FIXME size? |
| 502 Picture *next_output_pic; |
| 503 int outputed_poc; |
| 504 int next_outputed_poc; |
| 505 |
| 506 /** |
| 507 * memory management control operations buffer. |
| 508 */ |
| 509 MMCO mmco[MAX_MMCO_COUNT]; |
| 510 int mmco_index; |
| 511 |
| 512 int long_ref_count; ///< number of actual long term references |
| 513 int short_ref_count; ///< number of actual short term references |
| 514 |
| 515 int cabac_init_idc; |
| 516 |
502 /** | 517 /** |
503 * @defgroup multithreading Members for slice based multithreading | 518 * @defgroup multithreading Members for slice based multithreading |
504 * @{ | 519 * @{ |
505 */ | 520 */ |
506 struct H264Context *thread_context[MAX_THREADS]; | 521 struct H264Context *thread_context[MAX_THREADS]; |
507 | 522 |
508 /** | 523 /** |
509 * current slice number, used to initalize slice_num of each thread/context | 524 * current slice number, used to initalize slice_num of each thread/context |
510 */ | 525 */ |
511 int current_slice; | 526 int current_slice; |
512 | 527 |
513 /** | 528 /** |
514 * Max number of threads / contexts. | 529 * Max number of threads / contexts. |
515 * This is equal to AVCodecContext.thread_count unless | 530 * This is equal to AVCodecContext.thread_count unless |
516 * multithreaded decoding is impossible, in which case it is | 531 * multithreaded decoding is impossible, in which case it is |
517 * reduced to 1. | 532 * reduced to 1. |
518 */ | 533 */ |
519 int max_contexts; | 534 int max_contexts; |
520 | 535 |
521 /** | 536 /** |
522 * 1 if the single thread fallback warning has already been | 537 * 1 if the single thread fallback warning has already been |
523 * displayed, 0 otherwise. | 538 * displayed, 0 otherwise. |
524 */ | 539 */ |
525 int single_decode_warning; | 540 int single_decode_warning; |
526 | 541 |
527 int last_slice_type; | 542 int last_slice_type; |
528 /** @} */ | 543 /** @} */ |
529 | 544 |
530 int mb_xy; | |
531 | |
532 uint32_t svq3_watermark_key; | |
533 | |
534 /** | 545 /** |
535 * pic_struct in picture timing SEI message | 546 * pic_struct in picture timing SEI message |
536 */ | 547 */ |
537 SEI_PicStructType sei_pic_struct; | 548 SEI_PicStructType sei_pic_struct; |
538 | 549 |
539 /** | 550 /** |
540 * Complement sei_pic_struct | 551 * Complement sei_pic_struct |
541 * SEI_PIC_STRUCT_TOP_BOTTOM and SEI_PIC_STRUCT_BOTTOM_TOP indicate interlac
ed frames. | 552 * SEI_PIC_STRUCT_TOP_BOTTOM and SEI_PIC_STRUCT_BOTTOM_TOP indicate interlac
ed frames. |
542 * However, soft telecined frames may have these values. | 553 * However, soft telecined frames may have these values. |
543 * This is used in an attempt to flag soft telecine progressive. | 554 * This is used in an attempt to flag soft telecine progressive. |
(...skipping 19 matching lines...) Expand all Loading... |
563 | 574 |
564 /** | 575 /** |
565 * recovery_frame_cnt from SEI message | 576 * recovery_frame_cnt from SEI message |
566 * | 577 * |
567 * Set to -1 if no recovery point SEI message found or to number of frames | 578 * Set to -1 if no recovery point SEI message found or to number of frames |
568 * before playback synchronizes. Frames having recovery point are key | 579 * before playback synchronizes. Frames having recovery point are key |
569 * frames. | 580 * frames. |
570 */ | 581 */ |
571 int sei_recovery_frame_cnt; | 582 int sei_recovery_frame_cnt; |
572 | 583 |
573 int is_complex; | |
574 | |
575 int luma_weight_flag[2]; ///< 7.4.3.2 luma_weight_lX_flag | 584 int luma_weight_flag[2]; ///< 7.4.3.2 luma_weight_lX_flag |
576 int chroma_weight_flag[2]; ///< 7.4.3.2 chroma_weight_lX_flag | 585 int chroma_weight_flag[2]; ///< 7.4.3.2 chroma_weight_lX_flag |
577 | 586 |
578 // Timestamp stuff | 587 // Timestamp stuff |
579 int sei_buffering_period_present; ///< Buffering period SEI flag | 588 int sei_buffering_period_present; ///< Buffering period SEI flag |
580 int initial_cpb_removal_delay[32]; ///< Initial timestamps for CPBs | 589 int initial_cpb_removal_delay[32]; ///< Initial timestamps for CPBs |
| 590 |
| 591 //SVQ3 specific fields |
| 592 int halfpel_flag; |
| 593 int thirdpel_flag; |
| 594 int unknown_svq3_flag; |
| 595 int next_slice_index; |
| 596 uint32_t svq3_watermark_key; |
581 }H264Context; | 597 }H264Context; |
582 | 598 |
583 | 599 |
584 extern const uint8_t ff_h264_chroma_qp[52]; | 600 extern const uint8_t ff_h264_chroma_qp[52]; |
585 | 601 |
586 #if CONFIG_SVQ3_DECODER | 602 #if CONFIG_SVQ3_DECODER |
587 void ff_svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp); | 603 void ff_svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp); |
588 | 604 |
589 void ff_svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc
); | 605 void ff_svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc
); |
590 #else | 606 #else |
(...skipping 138 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
729 }; | 745 }; |
730 | 746 |
731 static av_always_inline uint32_t pack16to32(int a, int b){ | 747 static av_always_inline uint32_t pack16to32(int a, int b){ |
732 #if HAVE_BIGENDIAN | 748 #if HAVE_BIGENDIAN |
733 return (b&0xFFFF) + (a<<16); | 749 return (b&0xFFFF) + (a<<16); |
734 #else | 750 #else |
735 return (a&0xFFFF) + (b<<16); | 751 return (a&0xFFFF) + (b<<16); |
736 #endif | 752 #endif |
737 } | 753 } |
738 | 754 |
| 755 static av_always_inline uint16_t pack8to16(int a, int b){ |
| 756 #if HAVE_BIGENDIAN |
| 757 return (b&0xFF) + (a<<8); |
| 758 #else |
| 759 return (a&0xFF) + (b<<8); |
| 760 #endif |
| 761 } |
| 762 |
739 /** | 763 /** |
740 * gets the chroma qp. | 764 * gets the chroma qp. |
741 */ | 765 */ |
742 static inline int get_chroma_qp(H264Context *h, int t, int qscale){ | 766 static inline int get_chroma_qp(H264Context *h, int t, int qscale){ |
743 return h->pps.chroma_qp_table[t][qscale]; | 767 return h->pps.chroma_qp_table[t][qscale]; |
744 } | 768 } |
745 | 769 |
746 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int
* const my); | 770 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int
* const my); |
747 | 771 |
748 static void fill_decode_caches(H264Context *h, int mb_type){ | 772 static void fill_decode_neighbors(H264Context *h, int mb_type){ |
749 MpegEncContext * const s = &h->s; | 773 MpegEncContext * const s = &h->s; |
750 const int mb_xy= h->mb_xy; | 774 const int mb_xy= h->mb_xy; |
751 int topleft_xy, top_xy, topright_xy, left_xy[2]; | 775 int topleft_xy, top_xy, topright_xy, left_xy[2]; |
752 int topleft_type, top_type, topright_type, left_type[2]; | |
753 const uint8_t * left_block; | |
754 int topleft_partition= -1; | |
755 int i; | |
756 static const uint8_t left_block_options[4][16]={ | 776 static const uint8_t left_block_options[4][16]={ |
757 {0,1,2,3,7,10,8,11,7+0*8, 7+1*8, 7+2*8, 7+3*8, 2+0*8, 2+3*8, 2+1*8, 2+2*
8}, | 777 {0,1,2,3,7,10,8,11,7+0*8, 7+1*8, 7+2*8, 7+3*8, 2+0*8, 2+3*8, 2+1*8, 2+2*
8}, |
758 {2,2,3,3,8,11,8,11,7+2*8, 7+2*8, 7+3*8, 7+3*8, 2+1*8, 2+2*8, 2+1*8, 2+2*
8}, | 778 {2,2,3,3,8,11,8,11,7+2*8, 7+2*8, 7+3*8, 7+3*8, 2+1*8, 2+2*8, 2+1*8, 2+2*
8}, |
759 {0,0,1,1,7,10,7,10,7+0*8, 7+0*8, 7+1*8, 7+1*8, 2+0*8, 2+3*8, 2+0*8, 2+3*
8}, | 779 {0,0,1,1,7,10,7,10,7+0*8, 7+0*8, 7+1*8, 7+1*8, 2+0*8, 2+3*8, 2+0*8, 2+3*
8}, |
760 {0,2,0,2,7,10,7,10,7+0*8, 7+2*8, 7+0*8, 7+2*8, 2+0*8, 2+3*8, 2+0*8, 2+3*
8} | 780 {0,2,0,2,7,10,7,10,7+0*8, 7+2*8, 7+0*8, 7+2*8, 2+0*8, 2+3*8, 2+0*8, 2+3*
8} |
761 }; | 781 }; |
762 | 782 |
| 783 h->topleft_partition= -1; |
| 784 |
763 top_xy = mb_xy - (s->mb_stride << MB_FIELD); | 785 top_xy = mb_xy - (s->mb_stride << MB_FIELD); |
764 | 786 |
765 /* Wow, what a mess, why didn't they simplify the interlacing & intra | 787 /* Wow, what a mess, why didn't they simplify the interlacing & intra |
766 * stuff, I can't imagine that these complex rules are worth it. */ | 788 * stuff, I can't imagine that these complex rules are worth it. */ |
767 | 789 |
768 topleft_xy = top_xy - 1; | 790 topleft_xy = top_xy - 1; |
769 topright_xy= top_xy + 1; | 791 topright_xy= top_xy + 1; |
770 left_xy[1] = left_xy[0] = mb_xy-1; | 792 left_xy[1] = left_xy[0] = mb_xy-1; |
771 left_block = left_block_options[0]; | 793 h->left_block = left_block_options[0]; |
772 if(FRAME_MBAFF){ | 794 if(FRAME_MBAFF){ |
773 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_t
ype[mb_xy-1]); | 795 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_t
ype[mb_xy-1]); |
774 const int curr_mb_field_flag = IS_INTERLACED(mb_type); | 796 const int curr_mb_field_flag = IS_INTERLACED(mb_type); |
775 if(s->mb_y&1){ | 797 if(s->mb_y&1){ |
776 if (left_mb_field_flag != curr_mb_field_flag) { | 798 if (left_mb_field_flag != curr_mb_field_flag) { |
777 left_xy[1] = left_xy[0] = mb_xy - s->mb_stride - 1; | 799 left_xy[1] = left_xy[0] = mb_xy - s->mb_stride - 1; |
778 if (curr_mb_field_flag) { | 800 if (curr_mb_field_flag) { |
779 left_xy[1] += s->mb_stride; | 801 left_xy[1] += s->mb_stride; |
780 left_block = left_block_options[3]; | 802 h->left_block = left_block_options[3]; |
781 } else { | 803 } else { |
782 topleft_xy += s->mb_stride; | 804 topleft_xy += s->mb_stride; |
783 // take top left mv from the middle of the mb, as opposed to
all other modes which use the bottom right partition | 805 // take top left mv from the middle of the mb, as opposed to
all other modes which use the bottom right partition |
784 topleft_partition = 0; | 806 h->topleft_partition = 0; |
785 left_block = left_block_options[1]; | 807 h->left_block = left_block_options[1]; |
786 } | 808 } |
787 } | 809 } |
788 }else{ | 810 }else{ |
789 if(curr_mb_field_flag){ | 811 if(curr_mb_field_flag){ |
790 topleft_xy += s->mb_stride & (((s->current_picture.mb_type[top_
xy - 1]>>7)&1)-1); | 812 topleft_xy += s->mb_stride & (((s->current_picture.mb_type[top_
xy - 1]>>7)&1)-1); |
791 topright_xy += s->mb_stride & (((s->current_picture.mb_type[top_
xy + 1]>>7)&1)-1); | 813 topright_xy += s->mb_stride & (((s->current_picture.mb_type[top_
xy + 1]>>7)&1)-1); |
792 top_xy += s->mb_stride & (((s->current_picture.mb_type[top_
xy ]>>7)&1)-1); | 814 top_xy += s->mb_stride & (((s->current_picture.mb_type[top_
xy ]>>7)&1)-1); |
793 } | 815 } |
794 if (left_mb_field_flag != curr_mb_field_flag) { | 816 if (left_mb_field_flag != curr_mb_field_flag) { |
795 left_xy[1] = left_xy[0] = mb_xy - 1; | |
796 if (curr_mb_field_flag) { | 817 if (curr_mb_field_flag) { |
797 left_xy[1] += s->mb_stride; | 818 left_xy[1] += s->mb_stride; |
798 left_block = left_block_options[3]; | 819 h->left_block = left_block_options[3]; |
799 } else { | 820 } else { |
800 left_block = left_block_options[2]; | 821 h->left_block = left_block_options[2]; |
801 } | 822 } |
802 } | 823 } |
803 } | 824 } |
804 } | 825 } |
805 | 826 |
806 h->top_mb_xy = top_xy; | 827 h->topleft_mb_xy = topleft_xy; |
| 828 h->top_mb_xy = top_xy; |
| 829 h->topright_mb_xy= topright_xy; |
807 h->left_mb_xy[0] = left_xy[0]; | 830 h->left_mb_xy[0] = left_xy[0]; |
808 h->left_mb_xy[1] = left_xy[1]; | 831 h->left_mb_xy[1] = left_xy[1]; |
809 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_
picture.mb_type[topleft_xy] : 0; | 832 //FIXME do we need all in the context? |
810 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_
picture.mb_type[top_xy] : 0; | |
811 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_
picture.mb_type[topright_xy]: 0; | |
812 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_
picture.mb_type[left_xy[0]] : 0; | |
813 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_
picture.mb_type[left_xy[1]] : 0; | |
814 | 833 |
| 834 h->topleft_type = s->current_picture.mb_type[topleft_xy] ; |
| 835 h->top_type = s->current_picture.mb_type[top_xy] ; |
| 836 h->topright_type= s->current_picture.mb_type[topright_xy]; |
| 837 h->left_type[0] = s->current_picture.mb_type[left_xy[0]] ; |
| 838 h->left_type[1] = s->current_picture.mb_type[left_xy[1]] ; |
| 839 |
| 840 if(FMO){ |
| 841 if(h->slice_table[topleft_xy ] != h->slice_num) h->topleft_type = 0; |
| 842 if(h->slice_table[top_xy ] != h->slice_num) h->top_type = 0; |
| 843 if(h->slice_table[left_xy[0] ] != h->slice_num) h->left_type[0] = h->left_ty
pe[1] = 0; |
| 844 }else{ |
| 845 if(h->slice_table[topleft_xy ] != h->slice_num){ |
| 846 h->topleft_type = 0; |
| 847 if(h->slice_table[top_xy ] != h->slice_num) h->top_type = 0; |
| 848 if(h->slice_table[left_xy[0] ] != h->slice_num) h->left_type[0] = h-
>left_type[1] = 0; |
| 849 } |
| 850 } |
| 851 if(h->slice_table[topright_xy] != h->slice_num) h->topright_type= 0; |
| 852 } |
| 853 |
| 854 static void fill_decode_caches(H264Context *h, int mb_type){ |
| 855 MpegEncContext * const s = &h->s; |
| 856 int topleft_xy, top_xy, topright_xy, left_xy[2]; |
| 857 int topleft_type, top_type, topright_type, left_type[2]; |
| 858 const uint8_t * left_block= h->left_block; |
| 859 int i; |
| 860 |
| 861 topleft_xy = h->topleft_mb_xy ; |
| 862 top_xy = h->top_mb_xy ; |
| 863 topright_xy = h->topright_mb_xy; |
| 864 left_xy[0] = h->left_mb_xy[0] ; |
| 865 left_xy[1] = h->left_mb_xy[1] ; |
| 866 topleft_type = h->topleft_type ; |
| 867 top_type = h->top_type ; |
| 868 topright_type= h->topright_type ; |
| 869 left_type[0] = h->left_type[0] ; |
| 870 left_type[1] = h->left_type[1] ; |
| 871 |
| 872 if(!IS_SKIP(mb_type)){ |
815 if(IS_INTRA(mb_type)){ | 873 if(IS_INTRA(mb_type)){ |
816 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1; | 874 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1; |
817 h->topleft_samples_available= | 875 h->topleft_samples_available= |
818 h->top_samples_available= | 876 h->top_samples_available= |
819 h->left_samples_available= 0xFFFF; | 877 h->left_samples_available= 0xFFFF; |
820 h->topright_samples_available= 0xEEEA; | 878 h->topright_samples_available= 0xEEEA; |
821 | 879 |
822 if(!(top_type & type_mask)){ | 880 if(!(top_type & type_mask)){ |
823 h->topleft_samples_available= 0xB3FF; | 881 h->topleft_samples_available= 0xB3FF; |
824 h->top_samples_available= 0x33FF; | 882 h->top_samples_available= 0x33FF; |
825 h->topright_samples_available= 0x26EA; | 883 h->topright_samples_available= 0x26EA; |
826 } | 884 } |
827 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){ | 885 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){ |
828 if(IS_INTERLACED(mb_type)){ | 886 if(IS_INTERLACED(mb_type)){ |
829 if(!(left_type[0] & type_mask)){ | 887 if(!(left_type[0] & type_mask)){ |
830 h->topleft_samples_available&= 0xDFFF; | 888 h->topleft_samples_available&= 0xDFFF; |
831 h->left_samples_available&= 0x5FFF; | 889 h->left_samples_available&= 0x5FFF; |
832 } | 890 } |
833 if(!(left_type[1] & type_mask)){ | 891 if(!(left_type[1] & type_mask)){ |
834 h->topleft_samples_available&= 0xFF5F; | 892 h->topleft_samples_available&= 0xFF5F; |
835 h->left_samples_available&= 0xFF5F; | 893 h->left_samples_available&= 0xFF5F; |
836 } | 894 } |
837 }else{ | 895 }else{ |
838 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ]
== h->slice_num | 896 int left_typei = s->current_picture.mb_type[left_xy[0] + s->
mb_stride]; |
839 ? s->current_picture.mb_type[left_xy[0] + s-
>mb_stride] : 0; | 897 |
840 assert(left_xy[0] == left_xy[1]); | 898 assert(left_xy[0] == left_xy[1]); |
841 if(!((left_typei & type_mask) && (left_type[0] & type_mask))
){ | 899 if(!((left_typei & type_mask) && (left_type[0] & type_mask))
){ |
842 h->topleft_samples_available&= 0xDF5F; | 900 h->topleft_samples_available&= 0xDF5F; |
843 h->left_samples_available&= 0x5F5F; | 901 h->left_samples_available&= 0x5F5F; |
844 } | 902 } |
845 } | 903 } |
846 }else{ | 904 }else{ |
847 if(!(left_type[0] & type_mask)){ | 905 if(!(left_type[0] & type_mask)){ |
848 h->topleft_samples_available&= 0xDF5F; | 906 h->topleft_samples_available&= 0xDF5F; |
849 h->left_samples_available&= 0x5F5F; | 907 h->left_samples_available&= 0x5F5F; |
850 } | 908 } |
851 } | 909 } |
852 | 910 |
853 if(!(topleft_type & type_mask)) | 911 if(!(topleft_type & type_mask)) |
854 h->topleft_samples_available&= 0x7FFF; | 912 h->topleft_samples_available&= 0x7FFF; |
855 | 913 |
856 if(!(topright_type & type_mask)) | 914 if(!(topright_type & type_mask)) |
857 h->topright_samples_available&= 0xFBFF; | 915 h->topright_samples_available&= 0xFBFF; |
858 | 916 |
859 if(IS_INTRA4x4(mb_type)){ | 917 if(IS_INTRA4x4(mb_type)){ |
860 if(IS_INTRA4x4(top_type)){ | 918 if(IS_INTRA4x4(top_type)){ |
861 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[to
p_xy][4]; | 919 AV_COPY32(h->intra4x4_pred_mode_cache+4+8*0, h->intra4x4_pre
d_mode + h->mb2br_xy[top_xy]); |
862 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[to
p_xy][5]; | |
863 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[to
p_xy][6]; | |
864 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[to
p_xy][3]; | |
865 }else{ | 920 }else{ |
866 int pred; | |
867 if(!(top_type & type_mask)) | |
868 pred= -1; | |
869 else{ | |
870 pred= 2; | |
871 } | |
872 h->intra4x4_pred_mode_cache[4+8*0]= | 921 h->intra4x4_pred_mode_cache[4+8*0]= |
873 h->intra4x4_pred_mode_cache[5+8*0]= | 922 h->intra4x4_pred_mode_cache[5+8*0]= |
874 h->intra4x4_pred_mode_cache[6+8*0]= | 923 h->intra4x4_pred_mode_cache[6+8*0]= |
875 h->intra4x4_pred_mode_cache[7+8*0]= pred; | 924 h->intra4x4_pred_mode_cache[7+8*0]= 2 - 3*!(top_type & type_
mask); |
876 } | 925 } |
877 for(i=0; i<2; i++){ | 926 for(i=0; i<2; i++){ |
878 if(IS_INTRA4x4(left_type[i])){ | 927 if(IS_INTRA4x4(left_type[i])){ |
879 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_
pred_mode[left_xy[i]][left_block[0+2*i]]; | 928 int8_t *mode= h->intra4x4_pred_mode + h->mb2br_xy[left_x
y[i]]; |
880 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_
pred_mode[left_xy[i]][left_block[1+2*i]]; | 929 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= mode[6-left_
block[0+2*i]]; |
| 930 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= mode[6-left_
block[1+2*i]]; |
881 }else{ | 931 }else{ |
882 int pred; | |
883 if(!(left_type[i] & type_mask)) | |
884 pred= -1; | |
885 else{ | |
886 pred= 2; | |
887 } | |
888 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= | 932 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= |
889 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred; | 933 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= 2 - 3*!(left
_type[i] & type_mask); |
890 } | 934 } |
891 } | 935 } |
892 } | 936 } |
893 } | 937 } |
894 | 938 |
895 | 939 |
896 /* | 940 /* |
897 0 . T T. T T T T | 941 0 . T T. T T T T |
898 1 L . .L . . . . | 942 1 L . .L . . . . |
899 2 L . .L . . . . | 943 2 L . .L . . . . |
900 3 . T TL . . . . | 944 3 . T TL . . . . |
901 4 L . .L . . . . | 945 4 L . .L . . . . |
902 5 L . .. . . . . | 946 5 L . .. . . . . |
903 */ | 947 */ |
904 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a t
ypo in the spec) | 948 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a t
ypo in the spec) |
905 if(top_type){ | 949 if(top_type){ |
906 *(uint32_t*)&h->non_zero_count_cache[4+8*0]= *(uint32_t*)&h->non_zero_co
unt[top_xy][4+3*8]; | 950 AV_COPY32(&h->non_zero_count_cache[4+8*0], &h->non_zero_count[top_xy][4+
3*8]); |
907 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][1+1*8]; | 951 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][1+1*8]; |
908 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][2+1*8]; | 952 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][2+1*8]; |
909 | 953 |
910 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][1+2*8]; | 954 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][1+2*8]; |
911 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][2+2*8]; | 955 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][2+2*8]; |
912 }else { | 956 }else { |
913 h->non_zero_count_cache[1+8*0]= | 957 h->non_zero_count_cache[1+8*0]= |
914 h->non_zero_count_cache[2+8*0]= | 958 h->non_zero_count_cache[2+8*0]= |
915 | 959 |
916 h->non_zero_count_cache[1+8*3]= | 960 h->non_zero_count_cache[1+8*3]= |
917 h->non_zero_count_cache[2+8*3]= | 961 h->non_zero_count_cache[2+8*3]= |
918 *(uint32_t*)&h->non_zero_count_cache[4+8*0]= CABAC && !IS_INTRA(mb_t
ype) ? 0 : 0x40404040; | 962 AV_WN32A(&h->non_zero_count_cache[4+8*0], CABAC && !IS_INTRA(mb_type
) ? 0 : 0x40404040); |
919 } | 963 } |
920 | 964 |
921 for (i=0; i<2; i++) { | 965 for (i=0; i<2; i++) { |
922 if(left_type[i]){ | 966 if(left_type[i]){ |
923 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]
][left_block[8+0+2*i]]; | 967 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]
][left_block[8+0+2*i]]; |
924 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]
][left_block[8+1+2*i]]; | 968 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]
][left_block[8+1+2*i]]; |
925 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_x
y[i]][left_block[8+4+2*i]]; | 969 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_x
y[i]][left_block[8+4+2*i]]; |
926 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_x
y[i]][left_block[8+5+2*i]]; | 970 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_x
y[i]][left_block[8+5+2*i]]; |
927 }else{ | 971 }else{ |
928 h->non_zero_count_cache[3+8*1 + 2*8*i]= | 972 h->non_zero_count_cache[3+8*1 + 2*8*i]= |
929 h->non_zero_count_cache[3+8*2 + 2*8*i]= | 973 h->non_zero_count_cache[3+8*2 + 2*8*i]= |
930 h->non_zero_count_cache[0+8*1 + 8*i]= | 974 h->non_zero_count_cache[0+8*1 + 8*i]= |
931 h->non_zero_count_cache[0+8*4 + 8*i]= CABAC && !IS_INTRA(mb_ty
pe) ? 0 : 64; | 975 h->non_zero_count_cache[0+8*4 + 8*i]= CABAC && !IS_INTRA(mb_ty
pe) ? 0 : 64; |
932 } | 976 } |
933 } | 977 } |
934 | 978 |
935 if( CABAC ) { | 979 if( CABAC ) { |
936 // top_cbp | 980 // top_cbp |
937 if(top_type) { | 981 if(top_type) { |
938 h->top_cbp = h->cbp_table[top_xy]; | 982 h->top_cbp = h->cbp_table[top_xy]; |
939 } else if(IS_INTRA(mb_type)) { | |
940 h->top_cbp = 0x1C0; | |
941 } else { | 983 } else { |
942 h->top_cbp = 0; | 984 h->top_cbp = IS_INTRA(mb_type) ? 0x1CF : 0x00F; |
943 } | 985 } |
944 // left_cbp | 986 // left_cbp |
945 if (left_type[0]) { | 987 if (left_type[0]) { |
946 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0; | 988 h->left_cbp = (h->cbp_table[left_xy[0]] & 0x1f0) |
947 } else if(IS_INTRA(mb_type)) { | 989 | ((h->cbp_table[left_xy[0]]>>(left_block[0]&(~1)))&2) |
948 h->left_cbp = 0x1C0; | 990 | (((h->cbp_table[left_xy[1]]>>(left_block[2]&(~1)))&2)
<< 2); |
949 } else { | 991 } else { |
950 h->left_cbp = 0; | 992 h->left_cbp = IS_INTRA(mb_type) ? 0x1CF : 0x00F; |
951 } | 993 } |
952 if (left_type[0]) { | 994 } |
953 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))
&0x1) << 1; | |
954 } | |
955 if (left_type[1]) { | |
956 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))
&0x1) << 3; | |
957 } | |
958 } | 995 } |
959 | 996 |
960 #if 1 | 997 #if 1 |
961 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){ | 998 if(IS_INTER(mb_type) || (IS_DIRECT(mb_type) && h->direct_spatial_mv_pred)){ |
962 int list; | 999 int list; |
963 for(list=0; list<h->list_count; list++){ | 1000 for(list=0; list<h->list_count; list++){ |
964 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type)){ | 1001 if(!USES_LIST(mb_type, list)){ |
965 /*if(!h->mv_cache_clean[list]){ | 1002 /*if(!h->mv_cache_clean[list]){ |
966 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIX
ME clean only input? clean at all? | 1003 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIX
ME clean only input? clean at all? |
967 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(in
t8_t)); | 1004 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(in
t8_t)); |
968 h->mv_cache_clean[list]= 1; | 1005 h->mv_cache_clean[list]= 1; |
969 }*/ | 1006 }*/ |
970 continue; | 1007 continue; |
971 } | 1008 } |
| 1009 assert(!(IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)); |
| 1010 |
972 h->mv_cache_clean[list]= 0; | 1011 h->mv_cache_clean[list]= 0; |
973 | 1012 |
974 if(USES_LIST(top_type, list)){ | 1013 if(USES_LIST(top_type, list)){ |
975 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride; | 1014 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride; |
976 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride; | |
977 AV_COPY128(h->mv_cache[list][scan8[0] + 0 - 1*8], s->current_pic
ture.motion_val[list][b_xy + 0]); | 1015 AV_COPY128(h->mv_cache[list][scan8[0] + 0 - 1*8], s->current_pic
ture.motion_val[list][b_xy + 0]); |
978 h->ref_cache[list][scan8[0] + 0 - 1*8]= | 1016 h->ref_cache[list][scan8[0] + 0 - 1*8]= |
979 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.r
ef_index[list][b8_xy + 0]; | 1017 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.r
ef_index[list][4*top_xy + 2]; |
980 h->ref_cache[list][scan8[0] + 2 - 1*8]= | 1018 h->ref_cache[list][scan8[0] + 2 - 1*8]= |
981 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.r
ef_index[list][b8_xy + 1]; | 1019 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.r
ef_index[list][4*top_xy + 3]; |
982 }else{ | 1020 }else{ |
983 AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]); | 1021 AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]); |
984 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type
? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101; | 1022 AV_WN32A(&h->ref_cache[list][scan8[0] + 0 - 1*8], ((top_type ? L
IST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101); |
985 } | 1023 } |
986 | 1024 |
| 1025 if(mb_type & (MB_TYPE_16x8|MB_TYPE_8x8)){ |
987 for(i=0; i<2; i++){ | 1026 for(i=0; i<2; i++){ |
988 int cache_idx = scan8[0] - 1 + i*2*8; | 1027 int cache_idx = scan8[0] - 1 + i*2*8; |
989 if(USES_LIST(left_type[i], list)){ | 1028 if(USES_LIST(left_type[i], list)){ |
990 const int b_xy= h->mb2b_xy[left_xy[i]] + 3; | 1029 const int b_xy= h->mb2b_xy[left_xy[i]] + 3; |
991 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1; | 1030 const int b8_xy= 4*left_xy[i] + 1; |
992 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->c
urrent_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]]; | 1031 AV_COPY32(h->mv_cache[list][cache_idx ], s->current_picture
.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]]); |
993 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->c
urrent_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]]; | 1032 AV_COPY32(h->mv_cache[list][cache_idx+8], s->current_picture
.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]]); |
994 h->ref_cache[list][cache_idx ]= s->current_picture.ref_
index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)]; | 1033 h->ref_cache[list][cache_idx ]= s->current_picture.ref_
index[list][b8_xy + (left_block[0+i*2]&~1)]; |
995 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_
index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)]; | 1034 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_
index[list][b8_xy + (left_block[1+i*2]&~1)]; |
996 }else{ | 1035 }else{ |
997 *(uint32_t*)h->mv_cache [list][cache_idx ]= | 1036 AV_ZERO32(h->mv_cache [list][cache_idx ]); |
998 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0; | 1037 AV_ZERO32(h->mv_cache [list][cache_idx+8]); |
999 h->ref_cache[list][cache_idx ]= | 1038 h->ref_cache[list][cache_idx ]= |
1000 h->ref_cache[list][cache_idx+8]= (left_type[i]) ? LIST_NOT_U
SED : PART_NOT_AVAILABLE; | 1039 h->ref_cache[list][cache_idx+8]= (left_type[i]) ? LIST_NOT_U
SED : PART_NOT_AVAILABLE; |
1001 } | 1040 } |
1002 } | 1041 } |
1003 | |
1004 if((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAF
F) | |
1005 continue; | |
1006 | |
1007 if(USES_LIST(topleft_type, list)){ | |
1008 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (top
left_partition & 2*h->b_stride); | |
1009 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partitio
n & h->b8_stride); | |
1010 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s
->current_picture.motion_val[list][b_xy]; | |
1011 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_i
ndex[list][b8_xy]; | |
1012 }else{ | 1042 }else{ |
1013 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0; | 1043 if(USES_LIST(left_type[0], list)){ |
1014 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_
USED : PART_NOT_AVAILABLE; | 1044 const int b_xy= h->mb2b_xy[left_xy[0]] + 3; |
| 1045 const int b8_xy= 4*left_xy[0] + 1; |
| 1046 AV_COPY32(h->mv_cache[list][scan8[0] - 1], s->current_pictur
e.motion_val[list][b_xy + h->b_stride*left_block[0]]); |
| 1047 h->ref_cache[list][scan8[0] - 1]= s->current_picture.ref_ind
ex[list][b8_xy + (left_block[0]&~1)]; |
| 1048 }else{ |
| 1049 AV_ZERO32(h->mv_cache [list][scan8[0] - 1]); |
| 1050 h->ref_cache[list][scan8[0] - 1]= left_type[0] ? LIST_NOT_US
ED : PART_NOT_AVAILABLE; |
| 1051 } |
1015 } | 1052 } |
1016 | 1053 |
1017 if(USES_LIST(topright_type, list)){ | 1054 if(USES_LIST(topright_type, list)){ |
1018 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride; | 1055 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride; |
1019 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride; | 1056 AV_COPY32(h->mv_cache[list][scan8[0] + 4 - 1*8], s->current_pict
ure.motion_val[list][b_xy]); |
1020 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s
->current_picture.motion_val[list][b_xy]; | 1057 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_i
ndex[list][4*topright_xy + 2]; |
1021 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_i
ndex[list][b8_xy]; | |
1022 }else{ | 1058 }else{ |
1023 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0; | 1059 AV_ZERO32(h->mv_cache [list][scan8[0] + 4 - 1*8]); |
1024 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT
_USED : PART_NOT_AVAILABLE; | 1060 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT
_USED : PART_NOT_AVAILABLE; |
1025 } | 1061 } |
| 1062 if(h->ref_cache[list][scan8[0] + 4 - 1*8] < 0){ |
| 1063 if(USES_LIST(topleft_type, list)){ |
| 1064 const int b_xy = h->mb2b_xy [topleft_xy] + 3 + h->b_stride +
(h->topleft_partition & 2*h->b_stride); |
| 1065 const int b8_xy= 4*topleft_xy + 1 + (h->topleft_partition &
2); |
| 1066 AV_COPY32(h->mv_cache[list][scan8[0] - 1 - 1*8], s->current_
picture.motion_val[list][b_xy]); |
| 1067 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.r
ef_index[list][b8_xy]; |
| 1068 }else{ |
| 1069 AV_ZERO32(h->mv_cache[list][scan8[0] - 1 - 1*8]); |
| 1070 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_
NOT_USED : PART_NOT_AVAILABLE; |
| 1071 } |
| 1072 } |
1026 | 1073 |
1027 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF) | 1074 if((mb_type&(MB_TYPE_SKIP|MB_TYPE_DIRECT2)) && !FRAME_MBAFF) |
1028 continue; | 1075 continue; |
1029 | 1076 |
1030 h->ref_cache[list][scan8[5 ]+1] = | 1077 if(!(mb_type&(MB_TYPE_SKIP|MB_TYPE_DIRECT2))) { |
1031 h->ref_cache[list][scan8[7 ]+1] = | |
1032 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somew
here else) | |
1033 h->ref_cache[list][scan8[4 ]] = | 1078 h->ref_cache[list][scan8[4 ]] = |
1034 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE; | 1079 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE; |
1035 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]= | 1080 AV_ZERO32(h->mv_cache [list][scan8[4 ]]); |
1036 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]= | 1081 AV_ZERO32(h->mv_cache [list][scan8[12]]); |
1037 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (
init somewhere else) | |
1038 *(uint32_t*)h->mv_cache [list][scan8[4 ]]= | |
1039 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0; | |
1040 | 1082 |
1041 if( CABAC ) { | 1083 if( CABAC ) { |
1042 /* XXX beurk, Load mvd */ | 1084 /* XXX beurk, Load mvd */ |
1043 if(USES_LIST(top_type, list)){ | 1085 if(USES_LIST(top_type, list)){ |
1044 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride; | 1086 const int b_xy= h->mb2br_xy[top_xy]; |
1045 AV_COPY128(h->mvd_cache[list][scan8[0] + 0 - 1*8], h->mvd_ta
ble[list][b_xy + 0]); | 1087 AV_COPY64(h->mvd_cache[list][scan8[0] + 0 - 1*8], h->mvd_tab
le[list][b_xy + 0]); |
1046 }else{ | 1088 }else{ |
1047 AV_ZERO128(h->mvd_cache[list][scan8[0] + 0 - 1*8]); | 1089 AV_ZERO64(h->mvd_cache[list][scan8[0] + 0 - 1*8]); |
1048 } | 1090 } |
1049 if(USES_LIST(left_type[0], list)){ | 1091 if(USES_LIST(left_type[0], list)){ |
1050 const int b_xy= h->mb2b_xy[left_xy[0]] + 3; | 1092 const int b_xy= h->mb2br_xy[left_xy[0]] + 6; |
1051 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32
_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]]; | 1093 AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 0*8], h->mvd_tab
le[list][b_xy - left_block[0]]); |
1052 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32
_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]]; | 1094 AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 1*8], h->mvd_tab
le[list][b_xy - left_block[1]]); |
1053 }else{ | 1095 }else{ |
1054 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]= | 1096 AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 0*8]); |
1055 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0; | 1097 AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 1*8]); |
1056 } | 1098 } |
1057 if(USES_LIST(left_type[1], list)){ | 1099 if(USES_LIST(left_type[1], list)){ |
1058 const int b_xy= h->mb2b_xy[left_xy[1]] + 3; | 1100 const int b_xy= h->mb2br_xy[left_xy[1]] + 6; |
1059 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32
_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]]; | 1101 AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 2*8], h->mvd_tab
le[list][b_xy - left_block[2]]); |
1060 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32
_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]]; | 1102 AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 3*8], h->mvd_tab
le[list][b_xy - left_block[3]]); |
1061 }else{ | 1103 }else{ |
1062 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]= | 1104 AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 2*8]); |
1063 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0; | 1105 AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 3*8]); |
1064 } | 1106 } |
1065 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]= | 1107 AV_ZERO16(h->mvd_cache [list][scan8[4 ]]); |
1066 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]= | 1108 AV_ZERO16(h->mvd_cache [list][scan8[12]]); |
1067 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove pas
t 3 (init somewhere else) | |
1068 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]= | |
1069 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0; | |
1070 | |
1071 if(h->slice_type_nos == FF_B_TYPE){ | 1109 if(h->slice_type_nos == FF_B_TYPE){ |
1072 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1); | 1110 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, MB_TYPE_
16x16>>1, 1); |
1073 | 1111 |
1074 if(IS_DIRECT(top_type)){ | 1112 if(IS_DIRECT(top_type)){ |
1075 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101
; | 1113 AV_WN32A(&h->direct_cache[scan8[0] - 1*8], 0x01010101*(M
B_TYPE_DIRECT2>>1)); |
1076 }else if(IS_8X8(top_type)){ | 1114 }else if(IS_8X8(top_type)){ |
1077 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride; | 1115 int b8_xy = 4*top_xy; |
1078 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_
xy]; | 1116 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_
xy + 2]; |
1079 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_
xy + 1]; | 1117 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_
xy + 3]; |
1080 }else{ | 1118 }else{ |
1081 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0; | 1119 AV_WN32A(&h->direct_cache[scan8[0] - 1*8], 0x01010101*(M
B_TYPE_16x16>>1)); |
1082 } | 1120 } |
1083 | 1121 |
1084 if(IS_DIRECT(left_type[0])) | 1122 if(IS_DIRECT(left_type[0])) |
1085 h->direct_cache[scan8[0] - 1 + 0*8]= 1; | 1123 h->direct_cache[scan8[0] - 1 + 0*8]= MB_TYPE_DIRECT2>>1; |
1086 else if(IS_8X8(left_type[0])) | 1124 else if(IS_8X8(left_type[0])) |
1087 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->
mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)]; | 1125 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[4*l
eft_xy[0] + 1 + (left_block[0]&~1)]; |
1088 else | 1126 else |
1089 h->direct_cache[scan8[0] - 1 + 0*8]= 0; | 1127 h->direct_cache[scan8[0] - 1 + 0*8]= MB_TYPE_16x16>>1; |
1090 | 1128 |
1091 if(IS_DIRECT(left_type[1])) | 1129 if(IS_DIRECT(left_type[1])) |
1092 h->direct_cache[scan8[0] - 1 + 2*8]= 1; | 1130 h->direct_cache[scan8[0] - 1 + 2*8]= MB_TYPE_DIRECT2>>1; |
1093 else if(IS_8X8(left_type[1])) | 1131 else if(IS_8X8(left_type[1])) |
1094 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->
mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)]; | 1132 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[4*l
eft_xy[1] + 1 + (left_block[2]&~1)]; |
1095 else | 1133 else |
1096 h->direct_cache[scan8[0] - 1 + 2*8]= 0; | 1134 h->direct_cache[scan8[0] - 1 + 2*8]= MB_TYPE_16x16>>1; |
1097 } | 1135 } |
1098 } | 1136 } |
1099 | 1137 } |
1100 if(FRAME_MBAFF){ | 1138 if(FRAME_MBAFF){ |
1101 #define MAP_MVS\ | 1139 #define MAP_MVS\ |
1102 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\ | 1140 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\ |
1103 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\ | 1141 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\ |
1104 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\ | 1142 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\ |
1105 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\ | 1143 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\ |
1106 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\ | 1144 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\ |
1107 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\ | 1145 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\ |
1108 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\ | 1146 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\ |
1109 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\ | 1147 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\ |
1110 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\ | 1148 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\ |
1111 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1]) | 1149 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1]) |
1112 if(MB_FIELD){ | 1150 if(MB_FIELD){ |
1113 #define MAP_F2F(idx, mb_type)\ | 1151 #define MAP_F2F(idx, mb_type)\ |
1114 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){
\ | 1152 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){
\ |
1115 h->ref_cache[list][idx] <<= 1;\ | 1153 h->ref_cache[list][idx] <<= 1;\ |
1116 h->mv_cache[list][idx][1] /= 2;\ | 1154 h->mv_cache[list][idx][1] /= 2;\ |
1117 h->mvd_cache[list][idx][1] /= 2;\ | 1155 h->mvd_cache[list][idx][1] >>=1;\ |
1118 } | 1156 } |
1119 MAP_MVS | 1157 MAP_MVS |
1120 #undef MAP_F2F | 1158 #undef MAP_F2F |
1121 }else{ | 1159 }else{ |
1122 #define MAP_F2F(idx, mb_type)\ | 1160 #define MAP_F2F(idx, mb_type)\ |
1123 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\ | 1161 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\ |
1124 h->ref_cache[list][idx] >>= 1;\ | 1162 h->ref_cache[list][idx] >>= 1;\ |
1125 h->mv_cache[list][idx][1] <<= 1;\ | 1163 h->mv_cache[list][idx][1] <<= 1;\ |
1126 h->mvd_cache[list][idx][1] <<= 1;\ | 1164 h->mvd_cache[list][idx][1] <<= 1;\ |
1127 } | 1165 } |
(...skipping 10 matching lines...) Expand all Loading... |
1138 | 1176 |
1139 /** | 1177 /** |
1140 * | 1178 * |
1141 * @returns non zero if the loop filter can be skiped | 1179 * @returns non zero if the loop filter can be skiped |
1142 */ | 1180 */ |
1143 static int fill_filter_caches(H264Context *h, int mb_type){ | 1181 static int fill_filter_caches(H264Context *h, int mb_type){ |
1144 MpegEncContext * const s = &h->s; | 1182 MpegEncContext * const s = &h->s; |
1145 const int mb_xy= h->mb_xy; | 1183 const int mb_xy= h->mb_xy; |
1146 int top_xy, left_xy[2]; | 1184 int top_xy, left_xy[2]; |
1147 int top_type, left_type[2]; | 1185 int top_type, left_type[2]; |
1148 int i; | |
1149 | 1186 |
1150 top_xy = mb_xy - (s->mb_stride << MB_FIELD); | 1187 top_xy = mb_xy - (s->mb_stride << MB_FIELD); |
1151 | 1188 |
1152 //FIXME deblocking could skip the intra and nnz parts. | 1189 //FIXME deblocking could skip the intra and nnz parts. |
1153 | 1190 |
1154 /* Wow, what a mess, why didn't they simplify the interlacing & intra | 1191 /* Wow, what a mess, why didn't they simplify the interlacing & intra |
1155 * stuff, I can't imagine that these complex rules are worth it. */ | 1192 * stuff, I can't imagine that these complex rules are worth it. */ |
1156 | 1193 |
1157 left_xy[1] = left_xy[0] = mb_xy-1; | 1194 left_xy[1] = left_xy[0] = mb_xy-1; |
1158 if(FRAME_MBAFF){ | 1195 if(FRAME_MBAFF){ |
(...skipping 25 matching lines...) Expand all Loading... |
1184 && (left_xy[0]<0 || ((qp + s->current_picture.qscale_table[left_xy[0]
] + 1)>>1) <= qp_thresh) | 1221 && (left_xy[0]<0 || ((qp + s->current_picture.qscale_table[left_xy[0]
] + 1)>>1) <= qp_thresh) |
1185 && (top_xy < 0 || ((qp + s->current_picture.qscale_table[top_xy
] + 1)>>1) <= qp_thresh)){ | 1222 && (top_xy < 0 || ((qp + s->current_picture.qscale_table[top_xy
] + 1)>>1) <= qp_thresh)){ |
1186 if(!FRAME_MBAFF) | 1223 if(!FRAME_MBAFF) |
1187 return 1; | 1224 return 1; |
1188 if( (left_xy[0]< 0 || ((qp + s->current_picture.qscale_
table[left_xy[1] ] + 1)>>1) <= qp_thresh) | 1225 if( (left_xy[0]< 0 || ((qp + s->current_picture.qscale_
table[left_xy[1] ] + 1)>>1) <= qp_thresh) |
1189 && (top_xy < s->mb_stride || ((qp + s->current_picture.qscale_
table[top_xy -s->mb_stride] + 1)>>1) <= qp_thresh)) | 1226 && (top_xy < s->mb_stride || ((qp + s->current_picture.qscale_
table[top_xy -s->mb_stride] + 1)>>1) <= qp_thresh)) |
1190 return 1; | 1227 return 1; |
1191 } | 1228 } |
1192 } | 1229 } |
1193 | 1230 |
| 1231 top_type = s->current_picture.mb_type[top_xy] ; |
| 1232 left_type[0] = s->current_picture.mb_type[left_xy[0]]; |
| 1233 left_type[1] = s->current_picture.mb_type[left_xy[1]]; |
1194 if(h->deblocking_filter == 2){ | 1234 if(h->deblocking_filter == 2){ |
1195 h->top_type = top_type = h->slice_table[top_xy ] == h->slice_
num ? s->current_picture.mb_type[top_xy] : 0; | 1235 if(h->slice_table[top_xy ] != h->slice_num) top_type= 0; |
1196 h->left_type[0]= left_type[0] = h->slice_table[left_xy[0] ] == h->slice_
num ? s->current_picture.mb_type[left_xy[0]] : 0; | 1236 if(h->slice_table[left_xy[0] ] != h->slice_num) left_type[0]= left_type[
1]= 0; |
1197 h->left_type[1]= left_type[1] = h->slice_table[left_xy[1] ] == h->slice_
num ? s->current_picture.mb_type[left_xy[1]] : 0; | |
1198 }else{ | 1237 }else{ |
1199 h->top_type = top_type = h->slice_table[top_xy ] < 0xFFFF ? s
->current_picture.mb_type[top_xy] : 0; | 1238 if(h->slice_table[top_xy ] == 0xFFFF) top_type= 0; |
1200 h->left_type[0]= left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s
->current_picture.mb_type[left_xy[0]] : 0; | 1239 if(h->slice_table[left_xy[0] ] == 0xFFFF) left_type[0]= left_type[1] =0; |
1201 h->left_type[1]= left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s
->current_picture.mb_type[left_xy[1]] : 0; | |
1202 } | 1240 } |
| 1241 h->top_type = top_type ; |
| 1242 h->left_type[0]= left_type[0]; |
| 1243 h->left_type[1]= left_type[1]; |
| 1244 |
1203 if(IS_INTRA(mb_type)) | 1245 if(IS_INTRA(mb_type)) |
1204 return 0; | 1246 return 0; |
1205 | 1247 |
1206 AV_COPY64(&h->non_zero_count_cache[0+8*1], &h->non_zero_count[mb_xy][ 0]); | 1248 AV_COPY64(&h->non_zero_count_cache[0+8*1], &h->non_zero_count[mb_xy][ 0]); |
1207 AV_COPY64(&h->non_zero_count_cache[0+8*2], &h->non_zero_count[mb_xy][ 8]); | 1249 AV_COPY64(&h->non_zero_count_cache[0+8*2], &h->non_zero_count[mb_xy][ 8]); |
1208 *((uint32_t*)&h->non_zero_count_cache[0+8*5])= *((uint32_t*)&h->non_zero_cou
nt[mb_xy][16]); | 1250 AV_COPY32(&h->non_zero_count_cache[0+8*5], &h->non_zero_count[mb_xy][16]); |
1209 *((uint32_t*)&h->non_zero_count_cache[4+8*3])= *((uint32_t*)&h->non_zero_cou
nt[mb_xy][20]); | 1251 AV_COPY32(&h->non_zero_count_cache[4+8*3], &h->non_zero_count[mb_xy][20]); |
1210 AV_COPY64(&h->non_zero_count_cache[0+8*4], &h->non_zero_count[mb_xy][24]); | 1252 AV_COPY64(&h->non_zero_count_cache[0+8*4], &h->non_zero_count[mb_xy][24]); |
1211 | 1253 |
1212 h->cbp= h->cbp_table[mb_xy]; | 1254 h->cbp= h->cbp_table[mb_xy]; |
1213 | 1255 |
1214 { | 1256 { |
1215 int list; | 1257 int list; |
1216 for(list=0; list<h->list_count; list++){ | 1258 for(list=0; list<h->list_count; list++){ |
1217 int8_t *ref; | 1259 int8_t *ref; |
1218 int y, b_stride; | 1260 int y, b_stride; |
1219 int16_t (*mv_dst)[2]; | 1261 int16_t (*mv_dst)[2]; |
1220 int16_t (*mv_src)[2]; | 1262 int16_t (*mv_src)[2]; |
1221 | 1263 |
1222 if(!USES_LIST(mb_type, list)){ | 1264 if(!USES_LIST(mb_type, list)){ |
1223 fill_rectangle( h->mv_cache[list][scan8[0]], 4, 4, 8, pack16to3
2(0,0), 4); | 1265 fill_rectangle( h->mv_cache[list][scan8[0]], 4, 4, 8, pack16to3
2(0,0), 4); |
1224 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] = | 1266 AV_WN32A(&h->ref_cache[list][scan8[ 0]], ((LIST_NOT_USED)&0xFF)*
0x01010101u); |
1225 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = | 1267 AV_WN32A(&h->ref_cache[list][scan8[ 2]], ((LIST_NOT_USED)&0xFF)*
0x01010101u); |
1226 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] = | 1268 AV_WN32A(&h->ref_cache[list][scan8[ 8]], ((LIST_NOT_USED)&0xFF)*
0x01010101u); |
1227 *(uint32_t*)&h->ref_cache[list][scan8[10]] = ((LIST_NOT_USED)&0x
FF)*0x01010101; | 1269 AV_WN32A(&h->ref_cache[list][scan8[10]], ((LIST_NOT_USED)&0xFF)*
0x01010101u); |
1228 continue; | 1270 continue; |
1229 } | 1271 } |
1230 | 1272 |
1231 ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]]; | 1273 ref = &s->current_picture.ref_index[list][4*mb_xy]; |
1232 { | 1274 { |
1233 int (*ref2frm)[64] = h->ref2frm[ h->slice_num&(MAX_SLICES-1) ][0
] + (MB_MBAFF ? 20 : 2); | 1275 int (*ref2frm)[64] = h->ref2frm[ h->slice_num&(MAX_SLICES-1) ][0
] + (MB_MBAFF ? 20 : 2); |
1234 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] = | 1276 AV_WN32A(&h->ref_cache[list][scan8[ 0]], (pack16to32(ref2frm[lis
t][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101); |
1235 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref2frm
[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101; | 1277 AV_WN32A(&h->ref_cache[list][scan8[ 2]], (pack16to32(ref2frm[lis
t][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101); |
1236 ref += h->b8_stride; | 1278 ref += 2; |
1237 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] = | 1279 AV_WN32A(&h->ref_cache[list][scan8[ 8]], (pack16to32(ref2frm[lis
t][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101); |
1238 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref2frm
[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101; | 1280 AV_WN32A(&h->ref_cache[list][scan8[10]], (pack16to32(ref2frm[lis
t][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101); |
1239 } | 1281 } |
1240 | 1282 |
1241 b_stride = h->b_stride; | 1283 b_stride = h->b_stride; |
1242 mv_dst = &h->mv_cache[list][scan8[0]]; | 1284 mv_dst = &h->mv_cache[list][scan8[0]]; |
1243 mv_src = &s->current_picture.motion_val[list][4*s->mb_x + 4*s->mb_
y*b_stride]; | 1285 mv_src = &s->current_picture.motion_val[list][4*s->mb_x + 4*s->mb_
y*b_stride]; |
1244 for(y=0; y<4; y++){ | 1286 for(y=0; y<4; y++){ |
1245 AV_COPY128(mv_dst + 8*y, mv_src + y*b_stride); | 1287 AV_COPY128(mv_dst + 8*y, mv_src + y*b_stride); |
1246 } | 1288 } |
1247 | 1289 |
1248 } | 1290 } |
1249 } | 1291 } |
1250 | 1292 |
1251 | 1293 |
1252 /* | 1294 /* |
1253 0 . T T. T T T T | 1295 0 . T T. T T T T |
1254 1 L . .L . . . . | 1296 1 L . .L . . . . |
1255 2 L . .L . . . . | 1297 2 L . .L . . . . |
1256 3 . T TL . . . . | 1298 3 . T TL . . . . |
1257 4 L . .L . . . . | 1299 4 L . .L . . . . |
1258 5 L . .. . . . . | 1300 5 L . .. . . . . |
1259 */ | 1301 */ |
1260 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a t
ypo in the spec) | 1302 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a t
ypo in the spec) |
1261 if(top_type){ | 1303 if(top_type){ |
1262 *(uint32_t*)&h->non_zero_count_cache[4+8*0]= *(uint32_t*)&h->non_zero_co
unt[top_xy][4+3*8]; | 1304 AV_COPY32(&h->non_zero_count_cache[4+8*0], &h->non_zero_count[top_xy][4+
3*8]); |
1263 } | 1305 } |
1264 | 1306 |
1265 if(left_type[0]){ | 1307 if(left_type[0]){ |
1266 h->non_zero_count_cache[3+8*1]= h->non_zero_count[left_xy[0]][7+0*8]; | 1308 h->non_zero_count_cache[3+8*1]= h->non_zero_count[left_xy[0]][7+0*8]; |
1267 h->non_zero_count_cache[3+8*2]= h->non_zero_count[left_xy[0]][7+1*8]; | 1309 h->non_zero_count_cache[3+8*2]= h->non_zero_count[left_xy[0]][7+1*8]; |
1268 h->non_zero_count_cache[3+8*3]= h->non_zero_count[left_xy[0]][7+2*8]; | 1310 h->non_zero_count_cache[3+8*3]= h->non_zero_count[left_xy[0]][7+2*8]; |
1269 h->non_zero_count_cache[3+8*4]= h->non_zero_count[left_xy[0]][7+3*8]; | 1311 h->non_zero_count_cache[3+8*4]= h->non_zero_count[left_xy[0]][7+3*8]; |
1270 } | 1312 } |
1271 | 1313 |
1272 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from w
hat the loop filter needs | 1314 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from w
hat the loop filter needs |
(...skipping 26 matching lines...) Expand all Loading... |
1299 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[
1+12]]= | 1341 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[
1+12]]= |
1300 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[
3+12]]= h->cbp & 8; | 1342 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[
3+12]]= h->cbp & 8; |
1301 } | 1343 } |
1302 } | 1344 } |
1303 | 1345 |
1304 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){ | 1346 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){ |
1305 int list; | 1347 int list; |
1306 for(list=0; list<h->list_count; list++){ | 1348 for(list=0; list<h->list_count; list++){ |
1307 if(USES_LIST(top_type, list)){ | 1349 if(USES_LIST(top_type, list)){ |
1308 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride; | 1350 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride; |
1309 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride; | 1351 const int b8_xy= 4*top_xy + 2; |
1310 int (*ref2frm)[64] = h->ref2frm[ h->slice_table[top_xy]&(MAX_SLI
CES-1) ][0] + (MB_MBAFF ? 20 : 2); | 1352 int (*ref2frm)[64] = h->ref2frm[ h->slice_table[top_xy]&(MAX_SLI
CES-1) ][0] + (MB_MBAFF ? 20 : 2); |
1311 AV_COPY128(h->mv_cache[list][scan8[0] + 0 - 1*8], s->current_pic
ture.motion_val[list][b_xy + 0]); | 1353 AV_COPY128(h->mv_cache[list][scan8[0] + 0 - 1*8], s->current_pic
ture.motion_val[list][b_xy + 0]); |
1312 h->ref_cache[list][scan8[0] + 0 - 1*8]= | 1354 h->ref_cache[list][scan8[0] + 0 - 1*8]= |
1313 h->ref_cache[list][scan8[0] + 1 - 1*8]= ref2frm[list][s->current
_picture.ref_index[list][b8_xy + 0]]; | 1355 h->ref_cache[list][scan8[0] + 1 - 1*8]= ref2frm[list][s->current
_picture.ref_index[list][b8_xy + 0]]; |
1314 h->ref_cache[list][scan8[0] + 2 - 1*8]= | 1356 h->ref_cache[list][scan8[0] + 2 - 1*8]= |
1315 h->ref_cache[list][scan8[0] + 3 - 1*8]= ref2frm[list][s->current
_picture.ref_index[list][b8_xy + 1]]; | 1357 h->ref_cache[list][scan8[0] + 3 - 1*8]= ref2frm[list][s->current
_picture.ref_index[list][b8_xy + 1]]; |
1316 }else{ | 1358 }else{ |
1317 AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]); | 1359 AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]); |
1318 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((LIST_NOT_
USED)&0xFF)*0x01010101; | 1360 AV_WN32A(&h->ref_cache[list][scan8[0] + 0 - 1*8], ((LIST_NOT_USE
D)&0xFF)*0x01010101u); |
1319 } | 1361 } |
1320 | 1362 |
1321 if(!IS_INTERLACED(mb_type^left_type[0])){ | 1363 if(!IS_INTERLACED(mb_type^left_type[0])){ |
1322 if(USES_LIST(left_type[0], list)){ | 1364 if(USES_LIST(left_type[0], list)){ |
1323 const int b_xy= h->mb2b_xy[left_xy[0]] + 3; | 1365 const int b_xy= h->mb2b_xy[left_xy[0]] + 3; |
1324 const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1; | 1366 const int b8_xy= 4*left_xy[0] + 1; |
1325 int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[0]]&
(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2); | 1367 int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[0]]&
(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2); |
1326 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0 ]= *(uint32_t
*)s->current_picture.motion_val[list][b_xy + h->b_stride*0]; | 1368 AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 0 ], s->current_p
icture.motion_val[list][b_xy + h->b_stride*0]); |
1327 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 8 ]= *(uint32_t
*)s->current_picture.motion_val[list][b_xy + h->b_stride*1]; | 1369 AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 8 ], s->current_p
icture.motion_val[list][b_xy + h->b_stride*1]); |
1328 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 +16 ]= *(uint32_t
*)s->current_picture.motion_val[list][b_xy + h->b_stride*2]; | 1370 AV_COPY32(h->mv_cache[list][scan8[0] - 1 +16 ], s->current_p
icture.motion_val[list][b_xy + h->b_stride*2]); |
1329 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 +24 ]= *(uint32_t
*)s->current_picture.motion_val[list][b_xy + h->b_stride*3]; | 1371 AV_COPY32(h->mv_cache[list][scan8[0] - 1 +24 ], s->current_p
icture.motion_val[list][b_xy + h->b_stride*3]); |
1330 h->ref_cache[list][scan8[0] - 1 + 0 ]= | 1372 h->ref_cache[list][scan8[0] - 1 + 0 ]= |
1331 h->ref_cache[list][scan8[0] - 1 + 8 ]= ref2frm[list][s->curr
ent_picture.ref_index[list][b8_xy + h->b8_stride*0]]; | 1373 h->ref_cache[list][scan8[0] - 1 + 8 ]= ref2frm[list][s->curr
ent_picture.ref_index[list][b8_xy + 2*0]]; |
1332 h->ref_cache[list][scan8[0] - 1 +16 ]= | 1374 h->ref_cache[list][scan8[0] - 1 +16 ]= |
1333 h->ref_cache[list][scan8[0] - 1 +24 ]= ref2frm[list][s->curr
ent_picture.ref_index[list][b8_xy + h->b8_stride*1]]; | 1375 h->ref_cache[list][scan8[0] - 1 +24 ]= ref2frm[list][s->curr
ent_picture.ref_index[list][b8_xy + 2*1]]; |
1334 }else{ | 1376 }else{ |
1335 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 0 ]= | 1377 AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 0 ]); |
1336 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 8 ]= | 1378 AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 8 ]); |
1337 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 +16 ]= | 1379 AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +16 ]); |
1338 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 +24 ]= 0; | 1380 AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +24 ]); |
1339 h->ref_cache[list][scan8[0] - 1 + 0 ]= | 1381 h->ref_cache[list][scan8[0] - 1 + 0 ]= |
1340 h->ref_cache[list][scan8[0] - 1 + 8 ]= | 1382 h->ref_cache[list][scan8[0] - 1 + 8 ]= |
1341 h->ref_cache[list][scan8[0] - 1 + 16 ]= | 1383 h->ref_cache[list][scan8[0] - 1 + 16 ]= |
1342 h->ref_cache[list][scan8[0] - 1 + 24 ]= LIST_NOT_USED; | 1384 h->ref_cache[list][scan8[0] - 1 + 24 ]= LIST_NOT_USED; |
1343 } | 1385 } |
1344 } | 1386 } |
1345 } | 1387 } |
1346 } | 1388 } |
1347 | 1389 |
1348 return 0; | 1390 return 0; |
(...skipping 12 matching lines...) Expand all Loading... |
1361 | 1403 |
1362 if(min<0) return DC_PRED; | 1404 if(min<0) return DC_PRED; |
1363 else return min; | 1405 else return min; |
1364 } | 1406 } |
1365 | 1407 |
1366 static inline void write_back_non_zero_count(H264Context *h){ | 1408 static inline void write_back_non_zero_count(H264Context *h){ |
1367 const int mb_xy= h->mb_xy; | 1409 const int mb_xy= h->mb_xy; |
1368 | 1410 |
1369 AV_COPY64(&h->non_zero_count[mb_xy][ 0], &h->non_zero_count_cache[0+8*1]); | 1411 AV_COPY64(&h->non_zero_count[mb_xy][ 0], &h->non_zero_count_cache[0+8*1]); |
1370 AV_COPY64(&h->non_zero_count[mb_xy][ 8], &h->non_zero_count_cache[0+8*2]); | 1412 AV_COPY64(&h->non_zero_count[mb_xy][ 8], &h->non_zero_count_cache[0+8*2]); |
1371 *((uint32_t*)&h->non_zero_count[mb_xy][16]) = *((uint32_t*)&h->non_zero_coun
t_cache[0+8*5]); | 1413 AV_COPY32(&h->non_zero_count[mb_xy][16], &h->non_zero_count_cache[0+8*5]); |
1372 *((uint32_t*)&h->non_zero_count[mb_xy][20]) = *((uint32_t*)&h->non_zero_coun
t_cache[4+8*3]); | 1414 AV_COPY32(&h->non_zero_count[mb_xy][20], &h->non_zero_count_cache[4+8*3]); |
1373 AV_COPY64(&h->non_zero_count[mb_xy][24], &h->non_zero_count_cache[0+8*4]); | 1415 AV_COPY64(&h->non_zero_count[mb_xy][24], &h->non_zero_count_cache[0+8*4]); |
1374 } | 1416 } |
1375 | 1417 |
1376 static inline void write_back_motion(H264Context *h, int mb_type){ | 1418 static inline void write_back_motion(H264Context *h, int mb_type){ |
1377 MpegEncContext * const s = &h->s; | 1419 MpegEncContext * const s = &h->s; |
1378 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride; | 1420 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride; //try mb2b(8)_xy |
1379 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride; | 1421 const int b8_xy= 4*h->mb_xy; |
1380 int list; | 1422 int list; |
1381 | 1423 |
1382 if(!USES_LIST(mb_type, 0)) | 1424 if(!USES_LIST(mb_type, 0)) |
1383 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stri
de, (uint8_t)LIST_NOT_USED, 1); | 1425 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, 2, (uint8_
t)LIST_NOT_USED, 1); |
1384 | 1426 |
1385 for(list=0; list<h->list_count; list++){ | 1427 for(list=0; list<h->list_count; list++){ |
1386 int y, b_stride; | 1428 int y, b_stride; |
1387 int16_t (*mv_dst)[2]; | 1429 int16_t (*mv_dst)[2]; |
1388 int16_t (*mv_src)[2]; | 1430 int16_t (*mv_src)[2]; |
1389 | 1431 |
1390 if(!USES_LIST(mb_type, list)) | 1432 if(!USES_LIST(mb_type, list)) |
1391 continue; | 1433 continue; |
1392 | 1434 |
1393 b_stride = h->b_stride; | 1435 b_stride = h->b_stride; |
1394 mv_dst = &s->current_picture.motion_val[list][b_xy]; | 1436 mv_dst = &s->current_picture.motion_val[list][b_xy]; |
1395 mv_src = &h->mv_cache[list][scan8[0]]; | 1437 mv_src = &h->mv_cache[list][scan8[0]]; |
1396 for(y=0; y<4; y++){ | 1438 for(y=0; y<4; y++){ |
1397 AV_COPY128(mv_dst + y*b_stride, mv_src + 8*y); | 1439 AV_COPY128(mv_dst + y*b_stride, mv_src + 8*y); |
1398 } | 1440 } |
1399 if( CABAC ) { | 1441 if( CABAC ) { |
1400 int16_t (*mvd_dst)[2] = &h->mvd_table[list][b_xy]; | 1442 uint8_t (*mvd_dst)[2] = &h->mvd_table[list][FMO ? 8*h->mb_xy : h->mb
2br_xy[h->mb_xy]]; |
1401 int16_t (*mvd_src)[2] = &h->mvd_cache[list][scan8[0]]; | 1443 uint8_t (*mvd_src)[2] = &h->mvd_cache[list][scan8[0]]; |
1402 if(IS_SKIP(mb_type)) | 1444 if(IS_SKIP(mb_type)) |
1403 fill_rectangle(mvd_dst, 4, 4, h->b_stride, 0, 4); | 1445 AV_ZERO128(mvd_dst); |
1404 else | 1446 else{ |
1405 for(y=0; y<4; y++){ | 1447 AV_COPY64(mvd_dst, mvd_src + 8*3); |
1406 AV_COPY128(mvd_dst + y*b_stride, mvd_src + 8*y); | 1448 AV_COPY16(mvd_dst + 3 + 3, mvd_src + 3 + 8*0); |
| 1449 AV_COPY16(mvd_dst + 3 + 2, mvd_src + 3 + 8*1); |
| 1450 AV_COPY16(mvd_dst + 3 + 1, mvd_src + 3 + 8*2); |
1407 } | 1451 } |
1408 } | 1452 } |
1409 | 1453 |
1410 { | 1454 { |
1411 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy]; | 1455 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy]; |
1412 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]]; | 1456 ref_index[0+0*2]= h->ref_cache[list][scan8[0]]; |
1413 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]]; | 1457 ref_index[1+0*2]= h->ref_cache[list][scan8[4]]; |
1414 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]]; | 1458 ref_index[0+1*2]= h->ref_cache[list][scan8[8]]; |
1415 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]]; | 1459 ref_index[1+1*2]= h->ref_cache[list][scan8[12]]; |
1416 } | 1460 } |
1417 } | 1461 } |
1418 | 1462 |
1419 if(h->slice_type_nos == FF_B_TYPE && CABAC){ | 1463 if(h->slice_type_nos == FF_B_TYPE && CABAC){ |
1420 if(IS_8X8(mb_type)){ | 1464 if(IS_8X8(mb_type)){ |
1421 uint8_t *direct_table = &h->direct_table[b8_xy]; | 1465 uint8_t *direct_table = &h->direct_table[4*h->mb_xy]; |
1422 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 :
0; | 1466 direct_table[1] = h->sub_mb_type[1]>>1; |
1423 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 :
0; | 1467 direct_table[2] = h->sub_mb_type[2]>>1; |
1424 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 :
0; | 1468 direct_table[3] = h->sub_mb_type[3]>>1; |
1425 } | 1469 } |
1426 } | 1470 } |
1427 } | 1471 } |
1428 | 1472 |
1429 static inline int get_dct8x8_allowed(H264Context *h){ | 1473 static inline int get_dct8x8_allowed(H264Context *h){ |
1430 if(h->sps.direct_8x8_inference_flag) | 1474 if(h->sps.direct_8x8_inference_flag) |
1431 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYP
E_8x8 )*0x0001000100010001ULL)); | 1475 return !(AV_RN64A(h->sub_mb_type) & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_
8x8 )*0x0001000100010001ULL)); |
1432 else | 1476 else |
1433 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYP
E_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL)); | 1477 return !(AV_RN64A(h->sub_mb_type) & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_
8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL)); |
1434 } | |
1435 | |
1436 static void predict_field_decoding_flag(H264Context *h){ | |
1437 MpegEncContext * const s = &h->s; | |
1438 const int mb_xy= h->mb_xy; | |
1439 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num) | |
1440 ? s->current_picture.mb_type[mb_xy-1] | |
1441 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num) | |
1442 ? s->current_picture.mb_type[mb_xy-s->mb_stride] | |
1443 : 0; | |
1444 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0; | |
1445 } | 1478 } |
1446 | 1479 |
1447 /** | 1480 /** |
1448 * decodes a P_SKIP or B_SKIP macroblock | 1481 * decodes a P_SKIP or B_SKIP macroblock |
1449 */ | 1482 */ |
1450 static void decode_mb_skip(H264Context *h){ | 1483 static void decode_mb_skip(H264Context *h){ |
1451 MpegEncContext * const s = &h->s; | 1484 MpegEncContext * const s = &h->s; |
1452 const int mb_xy= h->mb_xy; | 1485 const int mb_xy= h->mb_xy; |
1453 int mb_type=0; | 1486 int mb_type=0; |
1454 | 1487 |
1455 memset(h->non_zero_count[mb_xy], 0, 32); | 1488 memset(h->non_zero_count[mb_xy], 0, 32); |
1456 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui | 1489 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui |
1457 | 1490 |
1458 if(MB_FIELD) | 1491 if(MB_FIELD) |
1459 mb_type|= MB_TYPE_INTERLACED; | 1492 mb_type|= MB_TYPE_INTERLACED; |
1460 | 1493 |
1461 if( h->slice_type_nos == FF_B_TYPE ) | 1494 if( h->slice_type_nos == FF_B_TYPE ) |
1462 { | 1495 { |
1463 // just for fill_caches. pred_direct_motion will set the real mb_type | 1496 // just for fill_caches. pred_direct_motion will set the real mb_type |
1464 mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP; | 1497 mb_type|= MB_TYPE_L0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP; |
1465 | 1498 if(h->direct_spatial_mv_pred){ |
| 1499 fill_decode_neighbors(h, mb_type); |
1466 fill_decode_caches(h, mb_type); //FIXME check what is needed and what no
t ... | 1500 fill_decode_caches(h, mb_type); //FIXME check what is needed and what no
t ... |
| 1501 } |
1467 ff_h264_pred_direct_motion(h, &mb_type); | 1502 ff_h264_pred_direct_motion(h, &mb_type); |
1468 mb_type|= MB_TYPE_SKIP; | 1503 mb_type|= MB_TYPE_SKIP; |
1469 } | 1504 } |
1470 else | 1505 else |
1471 { | 1506 { |
1472 int mx, my; | 1507 int mx, my; |
1473 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP; | 1508 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP; |
1474 | 1509 |
| 1510 fill_decode_neighbors(h, mb_type); |
1475 fill_decode_caches(h, mb_type); //FIXME check what is needed and what no
t ... | 1511 fill_decode_caches(h, mb_type); //FIXME check what is needed and what no
t ... |
1476 pred_pskip_motion(h, &mx, &my); | 1512 pred_pskip_motion(h, &mx, &my); |
1477 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1); | 1513 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1); |
1478 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4
); | 1514 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4
); |
1479 } | 1515 } |
1480 | 1516 |
1481 write_back_motion(h, mb_type); | 1517 write_back_motion(h, mb_type); |
1482 s->current_picture.mb_type[mb_xy]= mb_type; | 1518 s->current_picture.mb_type[mb_xy]= mb_type; |
1483 s->current_picture.qscale_table[mb_xy]= s->qscale; | 1519 s->current_picture.qscale_table[mb_xy]= s->qscale; |
1484 h->slice_table[ mb_xy ]= h->slice_num; | 1520 h->slice_table[ mb_xy ]= h->slice_num; |
1485 h->prev_mb_skipped= 1; | 1521 h->prev_mb_skipped= 1; |
1486 } | 1522 } |
1487 | 1523 |
1488 #include "h264_mvpred.h" //For pred_pskip_motion() | 1524 #include "h264_mvpred.h" //For pred_pskip_motion() |
1489 | 1525 |
1490 #endif /* AVCODEC_H264_H */ | 1526 #endif /* AVCODEC_H264_H */ |
OLD | NEW |