Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(14)

Side by Side Diff: patched-ffmpeg-mt/libavcodec/h264.h

Issue 789004: ffmpeg roll of source to mar 9 version... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/ffmpeg/
Patch Set: '' Created 10 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 /* 1 /*
2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder 2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> 3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4 * 4 *
5 * This file is part of FFmpeg. 5 * This file is part of FFmpeg.
6 * 6 *
7 * FFmpeg is free software; you can redistribute it and/or 7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public 8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either 9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version. 10 * version 2.1 of the License, or (at your option) any later version.
(...skipping 10 matching lines...) Expand all
21 21
22 /** 22 /**
23 * @file libavcodec/h264.h 23 * @file libavcodec/h264.h
24 * H.264 / AVC / MPEG4 part10 codec. 24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at> 25 * @author Michael Niedermayer <michaelni@gmx.at>
26 */ 26 */
27 27
28 #ifndef AVCODEC_H264_H 28 #ifndef AVCODEC_H264_H
29 #define AVCODEC_H264_H 29 #define AVCODEC_H264_H
30 30
31 #include "libavutil/intreadwrite.h"
31 #include "dsputil.h" 32 #include "dsputil.h"
32 #include "cabac.h" 33 #include "cabac.h"
33 #include "mpegvideo.h" 34 #include "mpegvideo.h"
34 #include "h264pred.h" 35 #include "h264pred.h"
35 #include "rectangle.h" 36 #include "rectangle.h"
36 37
37 #define interlaced_dct interlaced_dct_is_a_bad_name 38 #define interlaced_dct interlaced_dct_is_a_bad_name
38 #define mb_intra mb_intra_is_not_initialized_see_mb_type 39 #define mb_intra mb_intra_is_not_initialized_see_mb_type
39 40
40 #define LUMA_DC_BLOCK_INDEX 25 41 #define LUMA_DC_BLOCK_INDEX 25
(...skipping 12 matching lines...) Expand all
53 #define MAX_MMCO_COUNT 66 54 #define MAX_MMCO_COUNT 66
54 55
55 #define MAX_DELAYED_PIC_COUNT 16 56 #define MAX_DELAYED_PIC_COUNT 16
56 57
57 /* Compiling in interlaced support reduces the speed 58 /* Compiling in interlaced support reduces the speed
58 * of progressive decoding by about 2%. */ 59 * of progressive decoding by about 2%. */
59 #define ALLOW_INTERLACE 60 #define ALLOW_INTERLACE
60 61
61 #define ALLOW_NOCHROMA 62 #define ALLOW_NOCHROMA
62 63
64 #define FMO 0
65
63 /** 66 /**
64 * The maximum number of slices supported by the decoder. 67 * The maximum number of slices supported by the decoder.
65 * must be a power of 2 68 * must be a power of 2
66 */ 69 */
67 #define MAX_SLICES 16 70 #define MAX_SLICES 16
68 71
69 #ifdef ALLOW_INTERLACE 72 #ifdef ALLOW_INTERLACE
70 #define MB_MBAFF h->mb_mbaff 73 #define MB_MBAFF h->mb_mbaff
71 #define MB_FIELD h->mb_field_decoding_flag 74 #define MB_FIELD h->mb_field_decoding_flag
72 #define FRAME_MBAFF h->mb_aff_frame 75 #define FRAME_MBAFF h->mb_aff_frame
(...skipping 179 matching lines...) Expand 10 before | Expand all | Expand 10 after
252 MMCOOpcode opcode; 255 MMCOOpcode opcode;
253 int short_pic_num; ///< pic_num without wrapping (pic_num & max_pic_num) 256 int short_pic_num; ///< pic_num without wrapping (pic_num & max_pic_num)
254 int long_arg; ///< index, pic_num, or num long refs depending on opcod e 257 int long_arg; ///< index, pic_num, or num long refs depending on opcod e
255 } MMCO; 258 } MMCO;
256 259
257 /** 260 /**
258 * H264Context 261 * H264Context
259 */ 262 */
260 typedef struct H264Context{ 263 typedef struct H264Context{
261 MpegEncContext s; 264 MpegEncContext s;
262 int nal_ref_idc;
263 int nal_unit_type;
264 uint8_t *rbsp_buffer[2];
265 unsigned int rbsp_buffer_size[2];
266
267 /**
268 * Used to parse AVC variant of h264
269 */
270 int is_avc; ///< this flag is != 0 if codec is avc1
271 int got_avcC; ///< flag used to parse avcC data only once
272 int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4)
273
274 int chroma_qp[2]; //QPc 265 int chroma_qp[2]; //QPc
275 266
276 int qp_thresh; ///< QP threshold to skip loopfilter 267 int qp_thresh; ///< QP threshold to skip loopfilter
277 268
278 int prev_mb_skipped; 269 int prev_mb_skipped;
279 int next_mb_skipped; 270 int next_mb_skipped;
280 271
281 //prediction stuff 272 //prediction stuff
282 int chroma_pred_mode; 273 int chroma_pred_mode;
283 int intra16x16_pred_mode; 274 int intra16x16_pred_mode;
284 275
276 int topleft_mb_xy;
285 int top_mb_xy; 277 int top_mb_xy;
278 int topright_mb_xy;
286 int left_mb_xy[2]; 279 int left_mb_xy[2];
287 280
281 int topleft_type;
288 int top_type; 282 int top_type;
283 int topright_type;
289 int left_type[2]; 284 int left_type[2];
290 285
286 const uint8_t * left_block;
287 int topleft_partition;
288
291 int8_t intra4x4_pred_mode_cache[5*8]; 289 int8_t intra4x4_pred_mode_cache[5*8];
292 int8_t (*intra4x4_pred_mode)[8]; 290 int8_t (*intra4x4_pred_mode);
293 H264PredContext hpc; 291 H264PredContext hpc;
294 unsigned int topleft_samples_available; 292 unsigned int topleft_samples_available;
295 unsigned int top_samples_available; 293 unsigned int top_samples_available;
296 unsigned int topright_samples_available; 294 unsigned int topright_samples_available;
297 unsigned int left_samples_available; 295 unsigned int left_samples_available;
298 uint8_t (*top_borders[2])[16+2*8]; 296 uint8_t (*top_borders[2])[16+2*8];
299 uint8_t left_border[2*(17+2*9)];
300 297
301 /** 298 /**
302 * non zero coeff count cache. 299 * non zero coeff count cache.
303 * is 64 if not available. 300 * is 64 if not available.
304 */ 301 */
305 DECLARE_ALIGNED_8(uint8_t, non_zero_count_cache)[6*8]; 302 DECLARE_ALIGNED(8, uint8_t, non_zero_count_cache)[6*8];
306 303
307 /* 304 /*
308 .UU.YYYY 305 .UU.YYYY
309 .UU.YYYY 306 .UU.YYYY
310 .vv.YYYY 307 .vv.YYYY
311 .VV.YYYY 308 .VV.YYYY
312 */ 309 */
313 uint8_t (*non_zero_count)[32]; 310 uint8_t (*non_zero_count)[32];
314 311
315 /** 312 /**
316 * Motion vector cache. 313 * Motion vector cache.
317 */ 314 */
318 DECLARE_ALIGNED_16(int16_t, mv_cache)[2][5*8][2]; 315 DECLARE_ALIGNED(16, int16_t, mv_cache)[2][5*8][2];
319 DECLARE_ALIGNED_8(int8_t, ref_cache)[2][5*8]; 316 DECLARE_ALIGNED(8, int8_t, ref_cache)[2][5*8];
320 #define LIST_NOT_USED -1 //FIXME rename? 317 #define LIST_NOT_USED -1 //FIXME rename?
321 #define PART_NOT_AVAILABLE -2 318 #define PART_NOT_AVAILABLE -2
322 319
323 /** 320 /**
324 * is 1 if the specific list MV&references are set to 0,0,-2. 321 * is 1 if the specific list MV&references are set to 0,0,-2.
325 */ 322 */
326 int mv_cache_clean[2]; 323 int mv_cache_clean[2];
327 324
328 /** 325 /**
329 * number of neighbors (top and/or left) that used 8x8 dct 326 * number of neighbors (top and/or left) that used 8x8 dct
330 */ 327 */
331 int neighbor_transform_size; 328 int neighbor_transform_size;
332 329
333 /** 330 /**
334 * block_offset[ 0..23] for frame macroblocks 331 * block_offset[ 0..23] for frame macroblocks
335 * block_offset[24..47] for field macroblocks 332 * block_offset[24..47] for field macroblocks
336 */ 333 */
337 int block_offset[2*(16+8)]; 334 int block_offset[2*(16+8)];
338 335
339 uint32_t *mb2b_xy; //FIXME are these 4 a good idea? 336 uint32_t *mb2b_xy; //FIXME are these 4 a good idea?
340 uint32_t *mb2b8_xy; 337 uint32_t *mb2br_xy;
341 int b_stride; //FIXME use s->b4_stride 338 int b_stride; //FIXME use s->b4_stride
342 int b8_stride;
343 339
344 int mb_linesize; ///< may be equal to s->linesize or s->linesize*2, for mb aff 340 int mb_linesize; ///< may be equal to s->linesize or s->linesize*2, for mb aff
345 int mb_uvlinesize; 341 int mb_uvlinesize;
346 342
347 int emu_edge_width; 343 int emu_edge_width;
348 int emu_edge_height; 344 int emu_edge_height;
349 345
350 int halfpel_flag;
351 int thirdpel_flag;
352
353 int unknown_svq3_flag;
354 int next_slice_index;
355
356 SPS *sps_buffers[MAX_SPS_COUNT];
357 SPS sps; ///< current sps 346 SPS sps; ///< current sps
358 347
359 PPS *pps_buffers[MAX_PPS_COUNT];
360 /** 348 /**
361 * current pps 349 * current pps
362 */ 350 */
363 PPS pps; //FIXME move to Picture perhaps? (->no) do we need that? 351 PPS pps; //FIXME move to Picture perhaps? (->no) do we need that?
364 352
365 uint32_t dequant4_buffer[6][52][16]; 353 uint32_t dequant4_buffer[6][52][16]; //FIXME should these be moved down?
366 uint32_t dequant8_buffer[2][52][64]; 354 uint32_t dequant8_buffer[2][52][64];
367 uint32_t (*dequant4_coeff[6])[16]; 355 uint32_t (*dequant4_coeff[6])[16];
368 uint32_t (*dequant8_coeff[2])[64]; 356 uint32_t (*dequant8_coeff[2])[64];
369 int dequant_coeff_pps; ///< reinit tables when pps changes
370 357
371 int slice_num; 358 int slice_num;
372 uint16_t *slice_table_base;
373 uint16_t *slice_table; ///< slice_table_base + 2*mb_stride + 1 359 uint16_t *slice_table; ///< slice_table_base + 2*mb_stride + 1
374 int slice_type; 360 int slice_type;
375 int slice_type_nos; ///< S free slice type (SI/SP are remapped to I/P ) 361 int slice_type_nos; ///< S free slice type (SI/SP are remapped to I/P )
376 int slice_type_fixed; 362 int slice_type_fixed;
377 363
378 //interlacing specific flags 364 //interlacing specific flags
379 int mb_aff_frame; 365 int mb_aff_frame;
380 int mb_field_decoding_flag; 366 int mb_field_decoding_flag;
381 int mb_mbaff; ///< mb_aff_frame && mb_field_decoding_flag 367 int mb_mbaff; ///< mb_aff_frame && mb_field_decoding_flag
382 368
383 DECLARE_ALIGNED_8(uint16_t, sub_mb_type)[4]; 369 DECLARE_ALIGNED(8, uint16_t, sub_mb_type)[4];
384
385 //POC stuff
386 int poc_lsb;
387 int poc_msb;
388 int delta_poc_bottom;
389 int delta_poc[2];
390 int frame_num;
391 int prev_poc_msb; ///< poc_msb of the last reference pic for POC type 0
392 int prev_poc_lsb; ///< poc_lsb of the last reference pic for POC type 0
393 int frame_num_offset; ///< for POC type 2
394 int prev_frame_num_offset; ///< for POC type 2
395 int prev_frame_num; ///< frame_num of the last pic for POC type 1/ 2
396
397 /**
398 * frame_num for frames or 2*frame_num+1 for field pics.
399 */
400 int curr_pic_num;
401
402 /**
403 * max_frame_num or 2*max_frame_num for field pics.
404 */
405 int max_pic_num;
406 370
407 //Weighted pred stuff 371 //Weighted pred stuff
408 int use_weight; 372 int use_weight;
409 int use_weight_chroma; 373 int use_weight_chroma;
410 int luma_log2_weight_denom; 374 int luma_log2_weight_denom;
411 int chroma_log2_weight_denom; 375 int chroma_log2_weight_denom;
412 int luma_weight[2][48]; 376 //The following 2 can be changed to int8_t but that causes 10cpu cycles spee dloss
413 int luma_offset[2][48]; 377 int luma_weight[48][2][2];
414 int chroma_weight[2][48][2]; 378 int chroma_weight[48][2][2][2];
415 int chroma_offset[2][48][2];
416 int implicit_weight[48][48]; 379 int implicit_weight[48][48];
417 380
418 //deblock
419 int deblocking_filter; ///< disable_deblocking_filter_idc with 1<->0
420 int slice_alpha_c0_offset;
421 int slice_beta_offset;
422
423 int redundant_pic_count;
424
425 int direct_spatial_mv_pred; 381 int direct_spatial_mv_pred;
382 int col_parity;
383 int col_fieldoff;
426 int dist_scale_factor[16]; 384 int dist_scale_factor[16];
427 int dist_scale_factor_field[2][32]; 385 int dist_scale_factor_field[2][32];
428 int map_col_to_list0[2][16+32]; 386 int map_col_to_list0[2][16+32];
429 int map_col_to_list0_field[2][2][16+32]; 387 int map_col_to_list0_field[2][2][16+32];
430 388
431 /** 389 /**
432 * num_ref_idx_l0/1_active_minus1 + 1 390 * num_ref_idx_l0/1_active_minus1 + 1
433 */ 391 */
434 uint8_t *list_counts; ///< Array of list_count per MB specifying the slice type 392 uint8_t *list_counts; ///< Array of list_count per MB specifying the slice type
435 unsigned int ref_count[2]; ///< counts frames or fields, depending on curr ent mb mode 393 unsigned int ref_count[2]; ///< counts frames or fields, depending on curr ent mb mode
436 unsigned int list_count; 394 unsigned int list_count;
437 Picture *short_ref[32];
438 Picture *long_ref[32];
439 Picture default_ref_list[2][32]; ///< base reference list for all slices of a coded picture
440 Picture ref_list[2][48]; /**< 0..15: frame refs, 16..47: mbaff field refs. 395 Picture ref_list[2][48]; /**< 0..15: frame refs, 16..47: mbaff field refs.
441 Reordered version of default_ref_list 396 Reordered version of default_ref_list
442 according to picture reordering in sli ce header */ 397 according to picture reordering in sli ce header */
443 int ref2frm[MAX_SLICES][2][64]; ///< reference to frame number lists, used in the loop filter, the first 2 are for -2,-1 398 int ref2frm[MAX_SLICES][2][64]; ///< reference to frame number lists, used in the loop filter, the first 2 are for -2,-1
444 Picture *delayed_pic[MAX_DELAYED_PIC_COUNT+2]; //FIXME size?
445 Picture *next_output_pic;
446 int outputed_poc;
447 int next_outputed_poc;
448
449 /**
450 * memory management control operations buffer.
451 */
452 MMCO mmco[MAX_MMCO_COUNT];
453 int mmco_index;
454
455 int long_ref_count; ///< number of actual long term references
456 int short_ref_count; ///< number of actual short term references
457 399
458 //data partitioning 400 //data partitioning
459 GetBitContext intra_gb; 401 GetBitContext intra_gb;
460 GetBitContext inter_gb; 402 GetBitContext inter_gb;
461 GetBitContext *intra_gb_ptr; 403 GetBitContext *intra_gb_ptr;
462 GetBitContext *inter_gb_ptr; 404 GetBitContext *inter_gb_ptr;
463 405
464 DECLARE_ALIGNED_16(DCTELEM, mb)[16*24]; 406 DECLARE_ALIGNED(16, DCTELEM, mb)[16*24];
465 DCTELEM mb_padding[256]; ///< as mb is addressed by scantable[i] and scantable is uint8_t we can either check that i is not too large or ensure that there is some unused stuff after mb 407 DCTELEM mb_padding[256]; ///< as mb is addressed by scantable[i] and scantable is uint8_t we can either check that i is not too large or ensure that there is some unused stuff after mb
466 408
467 /** 409 /**
468 * Cabac 410 * Cabac
469 */ 411 */
470 CABACContext cabac; 412 CABACContext cabac;
471 uint8_t cabac_state[460]; 413 uint8_t cabac_state[460];
472 int cabac_init_idc;
473 414
474 /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */ 415 /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */
475 uint16_t *cbp_table; 416 uint16_t *cbp_table;
476 int cbp; 417 int cbp;
477 int top_cbp; 418 int top_cbp;
478 int left_cbp; 419 int left_cbp;
479 /* chroma_pred_mode for i4x4 or i16x16, else 0 */ 420 /* chroma_pred_mode for i4x4 or i16x16, else 0 */
480 uint8_t *chroma_pred_mode_table; 421 uint8_t *chroma_pred_mode_table;
481 int last_qscale_diff; 422 int last_qscale_diff;
482 int16_t (*mvd_table[2])[2]; 423 uint8_t (*mvd_table[2])[2];
483 DECLARE_ALIGNED_16(int16_t, mvd_cache)[2][5*8][2]; 424 DECLARE_ALIGNED(16, uint8_t, mvd_cache)[2][5*8][2];
484 uint8_t *direct_table; 425 uint8_t *direct_table;
485 uint8_t direct_cache[5*8]; 426 uint8_t direct_cache[5*8];
486 427
487 uint8_t zigzag_scan[16]; 428 uint8_t zigzag_scan[16];
488 uint8_t zigzag_scan8x8[64]; 429 uint8_t zigzag_scan8x8[64];
489 uint8_t zigzag_scan8x8_cavlc[64]; 430 uint8_t zigzag_scan8x8_cavlc[64];
490 uint8_t field_scan[16]; 431 uint8_t field_scan[16];
491 uint8_t field_scan8x8[64]; 432 uint8_t field_scan8x8[64];
492 uint8_t field_scan8x8_cavlc[64]; 433 uint8_t field_scan8x8_cavlc[64];
493 const uint8_t *zigzag_scan_q0; 434 const uint8_t *zigzag_scan_q0;
494 const uint8_t *zigzag_scan8x8_q0; 435 const uint8_t *zigzag_scan8x8_q0;
495 const uint8_t *zigzag_scan8x8_cavlc_q0; 436 const uint8_t *zigzag_scan8x8_cavlc_q0;
496 const uint8_t *field_scan_q0; 437 const uint8_t *field_scan_q0;
497 const uint8_t *field_scan8x8_q0; 438 const uint8_t *field_scan8x8_q0;
498 const uint8_t *field_scan8x8_cavlc_q0; 439 const uint8_t *field_scan8x8_cavlc_q0;
499 440
500 int x264_build; 441 int x264_build;
501 442
443 int mb_xy;
444
445 int is_complex;
446
447 //deblock
448 int deblocking_filter; ///< disable_deblocking_filter_idc with 1<->0
449 int slice_alpha_c0_offset;
450 int slice_beta_offset;
451
452 //=============================================================
453 //Things below are not used in the MB or more inner code
454
455 int nal_ref_idc;
456 int nal_unit_type;
457 uint8_t *rbsp_buffer[2];
458 unsigned int rbsp_buffer_size[2];
459
460 /**
461 * Used to parse AVC variant of h264
462 */
463 int is_avc; ///< this flag is != 0 if codec is avc1
464 int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4)
465
466 SPS *sps_buffers[MAX_SPS_COUNT];
467 PPS *pps_buffers[MAX_PPS_COUNT];
468
469 int dequant_coeff_pps; ///< reinit tables when pps changes
470
471 uint16_t *slice_table_base;
472
473
474 //POC stuff
475 int poc_lsb;
476 int poc_msb;
477 int delta_poc_bottom;
478 int delta_poc[2];
479 int frame_num;
480 int prev_poc_msb; ///< poc_msb of the last reference pic for POC type 0
481 int prev_poc_lsb; ///< poc_lsb of the last reference pic for POC type 0
482 int frame_num_offset; ///< for POC type 2
483 int prev_frame_num_offset; ///< for POC type 2
484 int prev_frame_num; ///< frame_num of the last pic for POC type 1/ 2
485
486 /**
487 * frame_num for frames or 2*frame_num+1 for field pics.
488 */
489 int curr_pic_num;
490
491 /**
492 * max_frame_num or 2*max_frame_num for field pics.
493 */
494 int max_pic_num;
495
496 int redundant_pic_count;
497
498 Picture *short_ref[32];
499 Picture *long_ref[32];
500 Picture default_ref_list[2][32]; ///< base reference list for all slices of a coded picture
501 Picture *delayed_pic[MAX_DELAYED_PIC_COUNT+2]; //FIXME size?
502 Picture *next_output_pic;
503 int outputed_poc;
504 int next_outputed_poc;
505
506 /**
507 * memory management control operations buffer.
508 */
509 MMCO mmco[MAX_MMCO_COUNT];
510 int mmco_index;
511
512 int long_ref_count; ///< number of actual long term references
513 int short_ref_count; ///< number of actual short term references
514
515 int cabac_init_idc;
516
502 /** 517 /**
503 * @defgroup multithreading Members for slice based multithreading 518 * @defgroup multithreading Members for slice based multithreading
504 * @{ 519 * @{
505 */ 520 */
506 struct H264Context *thread_context[MAX_THREADS]; 521 struct H264Context *thread_context[MAX_THREADS];
507 522
508 /** 523 /**
509 * current slice number, used to initalize slice_num of each thread/context 524 * current slice number, used to initalize slice_num of each thread/context
510 */ 525 */
511 int current_slice; 526 int current_slice;
512 527
513 /** 528 /**
514 * Max number of threads / contexts. 529 * Max number of threads / contexts.
515 * This is equal to AVCodecContext.thread_count unless 530 * This is equal to AVCodecContext.thread_count unless
516 * multithreaded decoding is impossible, in which case it is 531 * multithreaded decoding is impossible, in which case it is
517 * reduced to 1. 532 * reduced to 1.
518 */ 533 */
519 int max_contexts; 534 int max_contexts;
520 535
521 /** 536 /**
522 * 1 if the single thread fallback warning has already been 537 * 1 if the single thread fallback warning has already been
523 * displayed, 0 otherwise. 538 * displayed, 0 otherwise.
524 */ 539 */
525 int single_decode_warning; 540 int single_decode_warning;
526 541
527 int last_slice_type; 542 int last_slice_type;
528 /** @} */ 543 /** @} */
529 544
530 int mb_xy;
531
532 uint32_t svq3_watermark_key;
533
534 /** 545 /**
535 * pic_struct in picture timing SEI message 546 * pic_struct in picture timing SEI message
536 */ 547 */
537 SEI_PicStructType sei_pic_struct; 548 SEI_PicStructType sei_pic_struct;
538 549
539 /** 550 /**
540 * Complement sei_pic_struct 551 * Complement sei_pic_struct
541 * SEI_PIC_STRUCT_TOP_BOTTOM and SEI_PIC_STRUCT_BOTTOM_TOP indicate interlac ed frames. 552 * SEI_PIC_STRUCT_TOP_BOTTOM and SEI_PIC_STRUCT_BOTTOM_TOP indicate interlac ed frames.
542 * However, soft telecined frames may have these values. 553 * However, soft telecined frames may have these values.
543 * This is used in an attempt to flag soft telecine progressive. 554 * This is used in an attempt to flag soft telecine progressive.
(...skipping 19 matching lines...) Expand all
563 574
564 /** 575 /**
565 * recovery_frame_cnt from SEI message 576 * recovery_frame_cnt from SEI message
566 * 577 *
567 * Set to -1 if no recovery point SEI message found or to number of frames 578 * Set to -1 if no recovery point SEI message found or to number of frames
568 * before playback synchronizes. Frames having recovery point are key 579 * before playback synchronizes. Frames having recovery point are key
569 * frames. 580 * frames.
570 */ 581 */
571 int sei_recovery_frame_cnt; 582 int sei_recovery_frame_cnt;
572 583
573 int is_complex;
574
575 int luma_weight_flag[2]; ///< 7.4.3.2 luma_weight_lX_flag 584 int luma_weight_flag[2]; ///< 7.4.3.2 luma_weight_lX_flag
576 int chroma_weight_flag[2]; ///< 7.4.3.2 chroma_weight_lX_flag 585 int chroma_weight_flag[2]; ///< 7.4.3.2 chroma_weight_lX_flag
577 586
578 // Timestamp stuff 587 // Timestamp stuff
579 int sei_buffering_period_present; ///< Buffering period SEI flag 588 int sei_buffering_period_present; ///< Buffering period SEI flag
580 int initial_cpb_removal_delay[32]; ///< Initial timestamps for CPBs 589 int initial_cpb_removal_delay[32]; ///< Initial timestamps for CPBs
590
591 //SVQ3 specific fields
592 int halfpel_flag;
593 int thirdpel_flag;
594 int unknown_svq3_flag;
595 int next_slice_index;
596 uint32_t svq3_watermark_key;
581 }H264Context; 597 }H264Context;
582 598
583 599
584 extern const uint8_t ff_h264_chroma_qp[52]; 600 extern const uint8_t ff_h264_chroma_qp[52];
585 601
586 #if CONFIG_SVQ3_DECODER 602 #if CONFIG_SVQ3_DECODER
587 void ff_svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp); 603 void ff_svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
588 604
589 void ff_svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc ); 605 void ff_svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc );
590 #else 606 #else
(...skipping 138 matching lines...) Expand 10 before | Expand all | Expand 10 after
729 }; 745 };
730 746
731 static av_always_inline uint32_t pack16to32(int a, int b){ 747 static av_always_inline uint32_t pack16to32(int a, int b){
732 #if HAVE_BIGENDIAN 748 #if HAVE_BIGENDIAN
733 return (b&0xFFFF) + (a<<16); 749 return (b&0xFFFF) + (a<<16);
734 #else 750 #else
735 return (a&0xFFFF) + (b<<16); 751 return (a&0xFFFF) + (b<<16);
736 #endif 752 #endif
737 } 753 }
738 754
755 static av_always_inline uint16_t pack8to16(int a, int b){
756 #if HAVE_BIGENDIAN
757 return (b&0xFF) + (a<<8);
758 #else
759 return (a&0xFF) + (b<<8);
760 #endif
761 }
762
739 /** 763 /**
740 * gets the chroma qp. 764 * gets the chroma qp.
741 */ 765 */
742 static inline int get_chroma_qp(H264Context *h, int t, int qscale){ 766 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
743 return h->pps.chroma_qp_table[t][qscale]; 767 return h->pps.chroma_qp_table[t][qscale];
744 } 768 }
745 769
746 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my); 770 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my);
747 771
748 static void fill_decode_caches(H264Context *h, int mb_type){ 772 static void fill_decode_neighbors(H264Context *h, int mb_type){
749 MpegEncContext * const s = &h->s; 773 MpegEncContext * const s = &h->s;
750 const int mb_xy= h->mb_xy; 774 const int mb_xy= h->mb_xy;
751 int topleft_xy, top_xy, topright_xy, left_xy[2]; 775 int topleft_xy, top_xy, topright_xy, left_xy[2];
752 int topleft_type, top_type, topright_type, left_type[2];
753 const uint8_t * left_block;
754 int topleft_partition= -1;
755 int i;
756 static const uint8_t left_block_options[4][16]={ 776 static const uint8_t left_block_options[4][16]={
757 {0,1,2,3,7,10,8,11,7+0*8, 7+1*8, 7+2*8, 7+3*8, 2+0*8, 2+3*8, 2+1*8, 2+2* 8}, 777 {0,1,2,3,7,10,8,11,7+0*8, 7+1*8, 7+2*8, 7+3*8, 2+0*8, 2+3*8, 2+1*8, 2+2* 8},
758 {2,2,3,3,8,11,8,11,7+2*8, 7+2*8, 7+3*8, 7+3*8, 2+1*8, 2+2*8, 2+1*8, 2+2* 8}, 778 {2,2,3,3,8,11,8,11,7+2*8, 7+2*8, 7+3*8, 7+3*8, 2+1*8, 2+2*8, 2+1*8, 2+2* 8},
759 {0,0,1,1,7,10,7,10,7+0*8, 7+0*8, 7+1*8, 7+1*8, 2+0*8, 2+3*8, 2+0*8, 2+3* 8}, 779 {0,0,1,1,7,10,7,10,7+0*8, 7+0*8, 7+1*8, 7+1*8, 2+0*8, 2+3*8, 2+0*8, 2+3* 8},
760 {0,2,0,2,7,10,7,10,7+0*8, 7+2*8, 7+0*8, 7+2*8, 2+0*8, 2+3*8, 2+0*8, 2+3* 8} 780 {0,2,0,2,7,10,7,10,7+0*8, 7+2*8, 7+0*8, 7+2*8, 2+0*8, 2+3*8, 2+0*8, 2+3* 8}
761 }; 781 };
762 782
783 h->topleft_partition= -1;
784
763 top_xy = mb_xy - (s->mb_stride << MB_FIELD); 785 top_xy = mb_xy - (s->mb_stride << MB_FIELD);
764 786
765 /* Wow, what a mess, why didn't they simplify the interlacing & intra 787 /* Wow, what a mess, why didn't they simplify the interlacing & intra
766 * stuff, I can't imagine that these complex rules are worth it. */ 788 * stuff, I can't imagine that these complex rules are worth it. */
767 789
768 topleft_xy = top_xy - 1; 790 topleft_xy = top_xy - 1;
769 topright_xy= top_xy + 1; 791 topright_xy= top_xy + 1;
770 left_xy[1] = left_xy[0] = mb_xy-1; 792 left_xy[1] = left_xy[0] = mb_xy-1;
771 left_block = left_block_options[0]; 793 h->left_block = left_block_options[0];
772 if(FRAME_MBAFF){ 794 if(FRAME_MBAFF){
773 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_t ype[mb_xy-1]); 795 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_t ype[mb_xy-1]);
774 const int curr_mb_field_flag = IS_INTERLACED(mb_type); 796 const int curr_mb_field_flag = IS_INTERLACED(mb_type);
775 if(s->mb_y&1){ 797 if(s->mb_y&1){
776 if (left_mb_field_flag != curr_mb_field_flag) { 798 if (left_mb_field_flag != curr_mb_field_flag) {
777 left_xy[1] = left_xy[0] = mb_xy - s->mb_stride - 1; 799 left_xy[1] = left_xy[0] = mb_xy - s->mb_stride - 1;
778 if (curr_mb_field_flag) { 800 if (curr_mb_field_flag) {
779 left_xy[1] += s->mb_stride; 801 left_xy[1] += s->mb_stride;
780 left_block = left_block_options[3]; 802 h->left_block = left_block_options[3];
781 } else { 803 } else {
782 topleft_xy += s->mb_stride; 804 topleft_xy += s->mb_stride;
783 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition 805 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
784 topleft_partition = 0; 806 h->topleft_partition = 0;
785 left_block = left_block_options[1]; 807 h->left_block = left_block_options[1];
786 } 808 }
787 } 809 }
788 }else{ 810 }else{
789 if(curr_mb_field_flag){ 811 if(curr_mb_field_flag){
790 topleft_xy += s->mb_stride & (((s->current_picture.mb_type[top_ xy - 1]>>7)&1)-1); 812 topleft_xy += s->mb_stride & (((s->current_picture.mb_type[top_ xy - 1]>>7)&1)-1);
791 topright_xy += s->mb_stride & (((s->current_picture.mb_type[top_ xy + 1]>>7)&1)-1); 813 topright_xy += s->mb_stride & (((s->current_picture.mb_type[top_ xy + 1]>>7)&1)-1);
792 top_xy += s->mb_stride & (((s->current_picture.mb_type[top_ xy ]>>7)&1)-1); 814 top_xy += s->mb_stride & (((s->current_picture.mb_type[top_ xy ]>>7)&1)-1);
793 } 815 }
794 if (left_mb_field_flag != curr_mb_field_flag) { 816 if (left_mb_field_flag != curr_mb_field_flag) {
795 left_xy[1] = left_xy[0] = mb_xy - 1;
796 if (curr_mb_field_flag) { 817 if (curr_mb_field_flag) {
797 left_xy[1] += s->mb_stride; 818 left_xy[1] += s->mb_stride;
798 left_block = left_block_options[3]; 819 h->left_block = left_block_options[3];
799 } else { 820 } else {
800 left_block = left_block_options[2]; 821 h->left_block = left_block_options[2];
801 } 822 }
802 } 823 }
803 } 824 }
804 } 825 }
805 826
806 h->top_mb_xy = top_xy; 827 h->topleft_mb_xy = topleft_xy;
828 h->top_mb_xy = top_xy;
829 h->topright_mb_xy= topright_xy;
807 h->left_mb_xy[0] = left_xy[0]; 830 h->left_mb_xy[0] = left_xy[0];
808 h->left_mb_xy[1] = left_xy[1]; 831 h->left_mb_xy[1] = left_xy[1];
809 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_ picture.mb_type[topleft_xy] : 0; 832 //FIXME do we need all in the context?
810 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_ picture.mb_type[top_xy] : 0;
811 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_ picture.mb_type[topright_xy]: 0;
812 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_ picture.mb_type[left_xy[0]] : 0;
813 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_ picture.mb_type[left_xy[1]] : 0;
814 833
834 h->topleft_type = s->current_picture.mb_type[topleft_xy] ;
835 h->top_type = s->current_picture.mb_type[top_xy] ;
836 h->topright_type= s->current_picture.mb_type[topright_xy];
837 h->left_type[0] = s->current_picture.mb_type[left_xy[0]] ;
838 h->left_type[1] = s->current_picture.mb_type[left_xy[1]] ;
839
840 if(FMO){
841 if(h->slice_table[topleft_xy ] != h->slice_num) h->topleft_type = 0;
842 if(h->slice_table[top_xy ] != h->slice_num) h->top_type = 0;
843 if(h->slice_table[left_xy[0] ] != h->slice_num) h->left_type[0] = h->left_ty pe[1] = 0;
844 }else{
845 if(h->slice_table[topleft_xy ] != h->slice_num){
846 h->topleft_type = 0;
847 if(h->slice_table[top_xy ] != h->slice_num) h->top_type = 0;
848 if(h->slice_table[left_xy[0] ] != h->slice_num) h->left_type[0] = h- >left_type[1] = 0;
849 }
850 }
851 if(h->slice_table[topright_xy] != h->slice_num) h->topright_type= 0;
852 }
853
854 static void fill_decode_caches(H264Context *h, int mb_type){
855 MpegEncContext * const s = &h->s;
856 int topleft_xy, top_xy, topright_xy, left_xy[2];
857 int topleft_type, top_type, topright_type, left_type[2];
858 const uint8_t * left_block= h->left_block;
859 int i;
860
861 topleft_xy = h->topleft_mb_xy ;
862 top_xy = h->top_mb_xy ;
863 topright_xy = h->topright_mb_xy;
864 left_xy[0] = h->left_mb_xy[0] ;
865 left_xy[1] = h->left_mb_xy[1] ;
866 topleft_type = h->topleft_type ;
867 top_type = h->top_type ;
868 topright_type= h->topright_type ;
869 left_type[0] = h->left_type[0] ;
870 left_type[1] = h->left_type[1] ;
871
872 if(!IS_SKIP(mb_type)){
815 if(IS_INTRA(mb_type)){ 873 if(IS_INTRA(mb_type)){
816 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1; 874 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
817 h->topleft_samples_available= 875 h->topleft_samples_available=
818 h->top_samples_available= 876 h->top_samples_available=
819 h->left_samples_available= 0xFFFF; 877 h->left_samples_available= 0xFFFF;
820 h->topright_samples_available= 0xEEEA; 878 h->topright_samples_available= 0xEEEA;
821 879
822 if(!(top_type & type_mask)){ 880 if(!(top_type & type_mask)){
823 h->topleft_samples_available= 0xB3FF; 881 h->topleft_samples_available= 0xB3FF;
824 h->top_samples_available= 0x33FF; 882 h->top_samples_available= 0x33FF;
825 h->topright_samples_available= 0x26EA; 883 h->topright_samples_available= 0x26EA;
826 } 884 }
827 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){ 885 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
828 if(IS_INTERLACED(mb_type)){ 886 if(IS_INTERLACED(mb_type)){
829 if(!(left_type[0] & type_mask)){ 887 if(!(left_type[0] & type_mask)){
830 h->topleft_samples_available&= 0xDFFF; 888 h->topleft_samples_available&= 0xDFFF;
831 h->left_samples_available&= 0x5FFF; 889 h->left_samples_available&= 0x5FFF;
832 } 890 }
833 if(!(left_type[1] & type_mask)){ 891 if(!(left_type[1] & type_mask)){
834 h->topleft_samples_available&= 0xFF5F; 892 h->topleft_samples_available&= 0xFF5F;
835 h->left_samples_available&= 0xFF5F; 893 h->left_samples_available&= 0xFF5F;
836 } 894 }
837 }else{ 895 }else{
838 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num 896 int left_typei = s->current_picture.mb_type[left_xy[0] + s-> mb_stride];
839 ? s->current_picture.mb_type[left_xy[0] + s- >mb_stride] : 0; 897
840 assert(left_xy[0] == left_xy[1]); 898 assert(left_xy[0] == left_xy[1]);
841 if(!((left_typei & type_mask) && (left_type[0] & type_mask)) ){ 899 if(!((left_typei & type_mask) && (left_type[0] & type_mask)) ){
842 h->topleft_samples_available&= 0xDF5F; 900 h->topleft_samples_available&= 0xDF5F;
843 h->left_samples_available&= 0x5F5F; 901 h->left_samples_available&= 0x5F5F;
844 } 902 }
845 } 903 }
846 }else{ 904 }else{
847 if(!(left_type[0] & type_mask)){ 905 if(!(left_type[0] & type_mask)){
848 h->topleft_samples_available&= 0xDF5F; 906 h->topleft_samples_available&= 0xDF5F;
849 h->left_samples_available&= 0x5F5F; 907 h->left_samples_available&= 0x5F5F;
850 } 908 }
851 } 909 }
852 910
853 if(!(topleft_type & type_mask)) 911 if(!(topleft_type & type_mask))
854 h->topleft_samples_available&= 0x7FFF; 912 h->topleft_samples_available&= 0x7FFF;
855 913
856 if(!(topright_type & type_mask)) 914 if(!(topright_type & type_mask))
857 h->topright_samples_available&= 0xFBFF; 915 h->topright_samples_available&= 0xFBFF;
858 916
859 if(IS_INTRA4x4(mb_type)){ 917 if(IS_INTRA4x4(mb_type)){
860 if(IS_INTRA4x4(top_type)){ 918 if(IS_INTRA4x4(top_type)){
861 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[to p_xy][4]; 919 AV_COPY32(h->intra4x4_pred_mode_cache+4+8*0, h->intra4x4_pre d_mode + h->mb2br_xy[top_xy]);
862 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[to p_xy][5];
863 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[to p_xy][6];
864 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[to p_xy][3];
865 }else{ 920 }else{
866 int pred;
867 if(!(top_type & type_mask))
868 pred= -1;
869 else{
870 pred= 2;
871 }
872 h->intra4x4_pred_mode_cache[4+8*0]= 921 h->intra4x4_pred_mode_cache[4+8*0]=
873 h->intra4x4_pred_mode_cache[5+8*0]= 922 h->intra4x4_pred_mode_cache[5+8*0]=
874 h->intra4x4_pred_mode_cache[6+8*0]= 923 h->intra4x4_pred_mode_cache[6+8*0]=
875 h->intra4x4_pred_mode_cache[7+8*0]= pred; 924 h->intra4x4_pred_mode_cache[7+8*0]= 2 - 3*!(top_type & type_ mask);
876 } 925 }
877 for(i=0; i<2; i++){ 926 for(i=0; i<2; i++){
878 if(IS_INTRA4x4(left_type[i])){ 927 if(IS_INTRA4x4(left_type[i])){
879 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_ pred_mode[left_xy[i]][left_block[0+2*i]]; 928 int8_t *mode= h->intra4x4_pred_mode + h->mb2br_xy[left_x y[i]];
880 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_ pred_mode[left_xy[i]][left_block[1+2*i]]; 929 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= mode[6-left_ block[0+2*i]];
930 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= mode[6-left_ block[1+2*i]];
881 }else{ 931 }else{
882 int pred;
883 if(!(left_type[i] & type_mask))
884 pred= -1;
885 else{
886 pred= 2;
887 }
888 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= 932 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
889 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred; 933 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= 2 - 3*!(left _type[i] & type_mask);
890 } 934 }
891 } 935 }
892 } 936 }
893 } 937 }
894 938
895 939
896 /* 940 /*
897 0 . T T. T T T T 941 0 . T T. T T T T
898 1 L . .L . . . . 942 1 L . .L . . . .
899 2 L . .L . . . . 943 2 L . .L . . . .
900 3 . T TL . . . . 944 3 . T TL . . . .
901 4 L . .L . . . . 945 4 L . .L . . . .
902 5 L . .. . . . . 946 5 L . .. . . . .
903 */ 947 */
904 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a t ypo in the spec) 948 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a t ypo in the spec)
905 if(top_type){ 949 if(top_type){
906 *(uint32_t*)&h->non_zero_count_cache[4+8*0]= *(uint32_t*)&h->non_zero_co unt[top_xy][4+3*8]; 950 AV_COPY32(&h->non_zero_count_cache[4+8*0], &h->non_zero_count[top_xy][4+ 3*8]);
907 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][1+1*8]; 951 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][1+1*8];
908 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][2+1*8]; 952 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][2+1*8];
909 953
910 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][1+2*8]; 954 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][1+2*8];
911 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][2+2*8]; 955 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][2+2*8];
912 }else { 956 }else {
913 h->non_zero_count_cache[1+8*0]= 957 h->non_zero_count_cache[1+8*0]=
914 h->non_zero_count_cache[2+8*0]= 958 h->non_zero_count_cache[2+8*0]=
915 959
916 h->non_zero_count_cache[1+8*3]= 960 h->non_zero_count_cache[1+8*3]=
917 h->non_zero_count_cache[2+8*3]= 961 h->non_zero_count_cache[2+8*3]=
918 *(uint32_t*)&h->non_zero_count_cache[4+8*0]= CABAC && !IS_INTRA(mb_t ype) ? 0 : 0x40404040; 962 AV_WN32A(&h->non_zero_count_cache[4+8*0], CABAC && !IS_INTRA(mb_type ) ? 0 : 0x40404040);
919 } 963 }
920 964
921 for (i=0; i<2; i++) { 965 for (i=0; i<2; i++) {
922 if(left_type[i]){ 966 if(left_type[i]){
923 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i] ][left_block[8+0+2*i]]; 967 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i] ][left_block[8+0+2*i]];
924 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i] ][left_block[8+1+2*i]]; 968 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i] ][left_block[8+1+2*i]];
925 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_x y[i]][left_block[8+4+2*i]]; 969 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_x y[i]][left_block[8+4+2*i]];
926 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_x y[i]][left_block[8+5+2*i]]; 970 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_x y[i]][left_block[8+5+2*i]];
927 }else{ 971 }else{
928 h->non_zero_count_cache[3+8*1 + 2*8*i]= 972 h->non_zero_count_cache[3+8*1 + 2*8*i]=
929 h->non_zero_count_cache[3+8*2 + 2*8*i]= 973 h->non_zero_count_cache[3+8*2 + 2*8*i]=
930 h->non_zero_count_cache[0+8*1 + 8*i]= 974 h->non_zero_count_cache[0+8*1 + 8*i]=
931 h->non_zero_count_cache[0+8*4 + 8*i]= CABAC && !IS_INTRA(mb_ty pe) ? 0 : 64; 975 h->non_zero_count_cache[0+8*4 + 8*i]= CABAC && !IS_INTRA(mb_ty pe) ? 0 : 64;
932 } 976 }
933 } 977 }
934 978
935 if( CABAC ) { 979 if( CABAC ) {
936 // top_cbp 980 // top_cbp
937 if(top_type) { 981 if(top_type) {
938 h->top_cbp = h->cbp_table[top_xy]; 982 h->top_cbp = h->cbp_table[top_xy];
939 } else if(IS_INTRA(mb_type)) {
940 h->top_cbp = 0x1C0;
941 } else { 983 } else {
942 h->top_cbp = 0; 984 h->top_cbp = IS_INTRA(mb_type) ? 0x1CF : 0x00F;
943 } 985 }
944 // left_cbp 986 // left_cbp
945 if (left_type[0]) { 987 if (left_type[0]) {
946 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0; 988 h->left_cbp = (h->cbp_table[left_xy[0]] & 0x1f0)
947 } else if(IS_INTRA(mb_type)) { 989 | ((h->cbp_table[left_xy[0]]>>(left_block[0]&(~1)))&2)
948 h->left_cbp = 0x1C0; 990 | (((h->cbp_table[left_xy[1]]>>(left_block[2]&(~1)))&2) << 2);
949 } else { 991 } else {
950 h->left_cbp = 0; 992 h->left_cbp = IS_INTRA(mb_type) ? 0x1CF : 0x00F;
951 } 993 }
952 if (left_type[0]) { 994 }
953 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1)) &0x1) << 1;
954 }
955 if (left_type[1]) {
956 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1)) &0x1) << 3;
957 }
958 } 995 }
959 996
960 #if 1 997 #if 1
961 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){ 998 if(IS_INTER(mb_type) || (IS_DIRECT(mb_type) && h->direct_spatial_mv_pred)){
962 int list; 999 int list;
963 for(list=0; list<h->list_count; list++){ 1000 for(list=0; list<h->list_count; list++){
964 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type)){ 1001 if(!USES_LIST(mb_type, list)){
965 /*if(!h->mv_cache_clean[list]){ 1002 /*if(!h->mv_cache_clean[list]){
966 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIX ME clean only input? clean at all? 1003 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIX ME clean only input? clean at all?
967 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(in t8_t)); 1004 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(in t8_t));
968 h->mv_cache_clean[list]= 1; 1005 h->mv_cache_clean[list]= 1;
969 }*/ 1006 }*/
970 continue; 1007 continue;
971 } 1008 }
1009 assert(!(IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred));
1010
972 h->mv_cache_clean[list]= 0; 1011 h->mv_cache_clean[list]= 0;
973 1012
974 if(USES_LIST(top_type, list)){ 1013 if(USES_LIST(top_type, list)){
975 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride; 1014 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
976 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
977 AV_COPY128(h->mv_cache[list][scan8[0] + 0 - 1*8], s->current_pic ture.motion_val[list][b_xy + 0]); 1015 AV_COPY128(h->mv_cache[list][scan8[0] + 0 - 1*8], s->current_pic ture.motion_val[list][b_xy + 0]);
978 h->ref_cache[list][scan8[0] + 0 - 1*8]= 1016 h->ref_cache[list][scan8[0] + 0 - 1*8]=
979 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.r ef_index[list][b8_xy + 0]; 1017 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.r ef_index[list][4*top_xy + 2];
980 h->ref_cache[list][scan8[0] + 2 - 1*8]= 1018 h->ref_cache[list][scan8[0] + 2 - 1*8]=
981 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.r ef_index[list][b8_xy + 1]; 1019 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.r ef_index[list][4*top_xy + 3];
982 }else{ 1020 }else{
983 AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]); 1021 AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]);
984 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101; 1022 AV_WN32A(&h->ref_cache[list][scan8[0] + 0 - 1*8], ((top_type ? L IST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101);
985 } 1023 }
986 1024
1025 if(mb_type & (MB_TYPE_16x8|MB_TYPE_8x8)){
987 for(i=0; i<2; i++){ 1026 for(i=0; i<2; i++){
988 int cache_idx = scan8[0] - 1 + i*2*8; 1027 int cache_idx = scan8[0] - 1 + i*2*8;
989 if(USES_LIST(left_type[i], list)){ 1028 if(USES_LIST(left_type[i], list)){
990 const int b_xy= h->mb2b_xy[left_xy[i]] + 3; 1029 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
991 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1; 1030 const int b8_xy= 4*left_xy[i] + 1;
992 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->c urrent_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]]; 1031 AV_COPY32(h->mv_cache[list][cache_idx ], s->current_picture .motion_val[list][b_xy + h->b_stride*left_block[0+i*2]]);
993 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->c urrent_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]]; 1032 AV_COPY32(h->mv_cache[list][cache_idx+8], s->current_picture .motion_val[list][b_xy + h->b_stride*left_block[1+i*2]]);
994 h->ref_cache[list][cache_idx ]= s->current_picture.ref_ index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)]; 1033 h->ref_cache[list][cache_idx ]= s->current_picture.ref_ index[list][b8_xy + (left_block[0+i*2]&~1)];
995 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_ index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)]; 1034 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_ index[list][b8_xy + (left_block[1+i*2]&~1)];
996 }else{ 1035 }else{
997 *(uint32_t*)h->mv_cache [list][cache_idx ]= 1036 AV_ZERO32(h->mv_cache [list][cache_idx ]);
998 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0; 1037 AV_ZERO32(h->mv_cache [list][cache_idx+8]);
999 h->ref_cache[list][cache_idx ]= 1038 h->ref_cache[list][cache_idx ]=
1000 h->ref_cache[list][cache_idx+8]= (left_type[i]) ? LIST_NOT_U SED : PART_NOT_AVAILABLE; 1039 h->ref_cache[list][cache_idx+8]= (left_type[i]) ? LIST_NOT_U SED : PART_NOT_AVAILABLE;
1001 } 1040 }
1002 } 1041 }
1003
1004 if((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAF F)
1005 continue;
1006
1007 if(USES_LIST(topleft_type, list)){
1008 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (top left_partition & 2*h->b_stride);
1009 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partitio n & h->b8_stride);
1010 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s ->current_picture.motion_val[list][b_xy];
1011 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_i ndex[list][b8_xy];
1012 }else{ 1042 }else{
1013 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0; 1043 if(USES_LIST(left_type[0], list)){
1014 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_ USED : PART_NOT_AVAILABLE; 1044 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
1045 const int b8_xy= 4*left_xy[0] + 1;
1046 AV_COPY32(h->mv_cache[list][scan8[0] - 1], s->current_pictur e.motion_val[list][b_xy + h->b_stride*left_block[0]]);
1047 h->ref_cache[list][scan8[0] - 1]= s->current_picture.ref_ind ex[list][b8_xy + (left_block[0]&~1)];
1048 }else{
1049 AV_ZERO32(h->mv_cache [list][scan8[0] - 1]);
1050 h->ref_cache[list][scan8[0] - 1]= left_type[0] ? LIST_NOT_US ED : PART_NOT_AVAILABLE;
1051 }
1015 } 1052 }
1016 1053
1017 if(USES_LIST(topright_type, list)){ 1054 if(USES_LIST(topright_type, list)){
1018 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride; 1055 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
1019 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride; 1056 AV_COPY32(h->mv_cache[list][scan8[0] + 4 - 1*8], s->current_pict ure.motion_val[list][b_xy]);
1020 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s ->current_picture.motion_val[list][b_xy]; 1057 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_i ndex[list][4*topright_xy + 2];
1021 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_i ndex[list][b8_xy];
1022 }else{ 1058 }else{
1023 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0; 1059 AV_ZERO32(h->mv_cache [list][scan8[0] + 4 - 1*8]);
1024 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT _USED : PART_NOT_AVAILABLE; 1060 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT _USED : PART_NOT_AVAILABLE;
1025 } 1061 }
1062 if(h->ref_cache[list][scan8[0] + 4 - 1*8] < 0){
1063 if(USES_LIST(topleft_type, list)){
1064 const int b_xy = h->mb2b_xy [topleft_xy] + 3 + h->b_stride + (h->topleft_partition & 2*h->b_stride);
1065 const int b8_xy= 4*topleft_xy + 1 + (h->topleft_partition & 2);
1066 AV_COPY32(h->mv_cache[list][scan8[0] - 1 - 1*8], s->current_ picture.motion_val[list][b_xy]);
1067 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.r ef_index[list][b8_xy];
1068 }else{
1069 AV_ZERO32(h->mv_cache[list][scan8[0] - 1 - 1*8]);
1070 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_ NOT_USED : PART_NOT_AVAILABLE;
1071 }
1072 }
1026 1073
1027 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF) 1074 if((mb_type&(MB_TYPE_SKIP|MB_TYPE_DIRECT2)) && !FRAME_MBAFF)
1028 continue; 1075 continue;
1029 1076
1030 h->ref_cache[list][scan8[5 ]+1] = 1077 if(!(mb_type&(MB_TYPE_SKIP|MB_TYPE_DIRECT2))) {
1031 h->ref_cache[list][scan8[7 ]+1] =
1032 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somew here else)
1033 h->ref_cache[list][scan8[4 ]] = 1078 h->ref_cache[list][scan8[4 ]] =
1034 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE; 1079 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
1035 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]= 1080 AV_ZERO32(h->mv_cache [list][scan8[4 ]]);
1036 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]= 1081 AV_ZERO32(h->mv_cache [list][scan8[12]]);
1037 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 ( init somewhere else)
1038 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
1039 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
1040 1082
1041 if( CABAC ) { 1083 if( CABAC ) {
1042 /* XXX beurk, Load mvd */ 1084 /* XXX beurk, Load mvd */
1043 if(USES_LIST(top_type, list)){ 1085 if(USES_LIST(top_type, list)){
1044 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride; 1086 const int b_xy= h->mb2br_xy[top_xy];
1045 AV_COPY128(h->mvd_cache[list][scan8[0] + 0 - 1*8], h->mvd_ta ble[list][b_xy + 0]); 1087 AV_COPY64(h->mvd_cache[list][scan8[0] + 0 - 1*8], h->mvd_tab le[list][b_xy + 0]);
1046 }else{ 1088 }else{
1047 AV_ZERO128(h->mvd_cache[list][scan8[0] + 0 - 1*8]); 1089 AV_ZERO64(h->mvd_cache[list][scan8[0] + 0 - 1*8]);
1048 } 1090 }
1049 if(USES_LIST(left_type[0], list)){ 1091 if(USES_LIST(left_type[0], list)){
1050 const int b_xy= h->mb2b_xy[left_xy[0]] + 3; 1092 const int b_xy= h->mb2br_xy[left_xy[0]] + 6;
1051 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32 _t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]]; 1093 AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 0*8], h->mvd_tab le[list][b_xy - left_block[0]]);
1052 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32 _t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]]; 1094 AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 1*8], h->mvd_tab le[list][b_xy - left_block[1]]);
1053 }else{ 1095 }else{
1054 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]= 1096 AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 0*8]);
1055 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0; 1097 AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 1*8]);
1056 } 1098 }
1057 if(USES_LIST(left_type[1], list)){ 1099 if(USES_LIST(left_type[1], list)){
1058 const int b_xy= h->mb2b_xy[left_xy[1]] + 3; 1100 const int b_xy= h->mb2br_xy[left_xy[1]] + 6;
1059 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32 _t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]]; 1101 AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 2*8], h->mvd_tab le[list][b_xy - left_block[2]]);
1060 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32 _t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]]; 1102 AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 3*8], h->mvd_tab le[list][b_xy - left_block[3]]);
1061 }else{ 1103 }else{
1062 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]= 1104 AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 2*8]);
1063 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0; 1105 AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 3*8]);
1064 } 1106 }
1065 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]= 1107 AV_ZERO16(h->mvd_cache [list][scan8[4 ]]);
1066 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]= 1108 AV_ZERO16(h->mvd_cache [list][scan8[12]]);
1067 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove pas t 3 (init somewhere else)
1068 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
1069 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
1070
1071 if(h->slice_type_nos == FF_B_TYPE){ 1109 if(h->slice_type_nos == FF_B_TYPE){
1072 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1); 1110 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, MB_TYPE_ 16x16>>1, 1);
1073 1111
1074 if(IS_DIRECT(top_type)){ 1112 if(IS_DIRECT(top_type)){
1075 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101 ; 1113 AV_WN32A(&h->direct_cache[scan8[0] - 1*8], 0x01010101*(M B_TYPE_DIRECT2>>1));
1076 }else if(IS_8X8(top_type)){ 1114 }else if(IS_8X8(top_type)){
1077 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride; 1115 int b8_xy = 4*top_xy;
1078 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_ xy]; 1116 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_ xy + 2];
1079 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_ xy + 1]; 1117 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_ xy + 3];
1080 }else{ 1118 }else{
1081 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0; 1119 AV_WN32A(&h->direct_cache[scan8[0] - 1*8], 0x01010101*(M B_TYPE_16x16>>1));
1082 } 1120 }
1083 1121
1084 if(IS_DIRECT(left_type[0])) 1122 if(IS_DIRECT(left_type[0]))
1085 h->direct_cache[scan8[0] - 1 + 0*8]= 1; 1123 h->direct_cache[scan8[0] - 1 + 0*8]= MB_TYPE_DIRECT2>>1;
1086 else if(IS_8X8(left_type[0])) 1124 else if(IS_8X8(left_type[0]))
1087 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h-> mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)]; 1125 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[4*l eft_xy[0] + 1 + (left_block[0]&~1)];
1088 else 1126 else
1089 h->direct_cache[scan8[0] - 1 + 0*8]= 0; 1127 h->direct_cache[scan8[0] - 1 + 0*8]= MB_TYPE_16x16>>1;
1090 1128
1091 if(IS_DIRECT(left_type[1])) 1129 if(IS_DIRECT(left_type[1]))
1092 h->direct_cache[scan8[0] - 1 + 2*8]= 1; 1130 h->direct_cache[scan8[0] - 1 + 2*8]= MB_TYPE_DIRECT2>>1;
1093 else if(IS_8X8(left_type[1])) 1131 else if(IS_8X8(left_type[1]))
1094 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h-> mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)]; 1132 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[4*l eft_xy[1] + 1 + (left_block[2]&~1)];
1095 else 1133 else
1096 h->direct_cache[scan8[0] - 1 + 2*8]= 0; 1134 h->direct_cache[scan8[0] - 1 + 2*8]= MB_TYPE_16x16>>1;
1097 } 1135 }
1098 } 1136 }
1099 1137 }
1100 if(FRAME_MBAFF){ 1138 if(FRAME_MBAFF){
1101 #define MAP_MVS\ 1139 #define MAP_MVS\
1102 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\ 1140 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
1103 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\ 1141 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
1104 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\ 1142 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
1105 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\ 1143 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
1106 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\ 1144 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
1107 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\ 1145 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
1108 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\ 1146 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
1109 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\ 1147 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
1110 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\ 1148 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
1111 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1]) 1149 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
1112 if(MB_FIELD){ 1150 if(MB_FIELD){
1113 #define MAP_F2F(idx, mb_type)\ 1151 #define MAP_F2F(idx, mb_type)\
1114 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){ \ 1152 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){ \
1115 h->ref_cache[list][idx] <<= 1;\ 1153 h->ref_cache[list][idx] <<= 1;\
1116 h->mv_cache[list][idx][1] /= 2;\ 1154 h->mv_cache[list][idx][1] /= 2;\
1117 h->mvd_cache[list][idx][1] /= 2;\ 1155 h->mvd_cache[list][idx][1] >>=1;\
1118 } 1156 }
1119 MAP_MVS 1157 MAP_MVS
1120 #undef MAP_F2F 1158 #undef MAP_F2F
1121 }else{ 1159 }else{
1122 #define MAP_F2F(idx, mb_type)\ 1160 #define MAP_F2F(idx, mb_type)\
1123 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\ 1161 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
1124 h->ref_cache[list][idx] >>= 1;\ 1162 h->ref_cache[list][idx] >>= 1;\
1125 h->mv_cache[list][idx][1] <<= 1;\ 1163 h->mv_cache[list][idx][1] <<= 1;\
1126 h->mvd_cache[list][idx][1] <<= 1;\ 1164 h->mvd_cache[list][idx][1] <<= 1;\
1127 } 1165 }
(...skipping 10 matching lines...) Expand all
1138 1176
1139 /** 1177 /**
1140 * 1178 *
1141 * @returns non zero if the loop filter can be skiped 1179 * @returns non zero if the loop filter can be skiped
1142 */ 1180 */
1143 static int fill_filter_caches(H264Context *h, int mb_type){ 1181 static int fill_filter_caches(H264Context *h, int mb_type){
1144 MpegEncContext * const s = &h->s; 1182 MpegEncContext * const s = &h->s;
1145 const int mb_xy= h->mb_xy; 1183 const int mb_xy= h->mb_xy;
1146 int top_xy, left_xy[2]; 1184 int top_xy, left_xy[2];
1147 int top_type, left_type[2]; 1185 int top_type, left_type[2];
1148 int i;
1149 1186
1150 top_xy = mb_xy - (s->mb_stride << MB_FIELD); 1187 top_xy = mb_xy - (s->mb_stride << MB_FIELD);
1151 1188
1152 //FIXME deblocking could skip the intra and nnz parts. 1189 //FIXME deblocking could skip the intra and nnz parts.
1153 1190
1154 /* Wow, what a mess, why didn't they simplify the interlacing & intra 1191 /* Wow, what a mess, why didn't they simplify the interlacing & intra
1155 * stuff, I can't imagine that these complex rules are worth it. */ 1192 * stuff, I can't imagine that these complex rules are worth it. */
1156 1193
1157 left_xy[1] = left_xy[0] = mb_xy-1; 1194 left_xy[1] = left_xy[0] = mb_xy-1;
1158 if(FRAME_MBAFF){ 1195 if(FRAME_MBAFF){
(...skipping 25 matching lines...) Expand all
1184 && (left_xy[0]<0 || ((qp + s->current_picture.qscale_table[left_xy[0] ] + 1)>>1) <= qp_thresh) 1221 && (left_xy[0]<0 || ((qp + s->current_picture.qscale_table[left_xy[0] ] + 1)>>1) <= qp_thresh)
1185 && (top_xy < 0 || ((qp + s->current_picture.qscale_table[top_xy ] + 1)>>1) <= qp_thresh)){ 1222 && (top_xy < 0 || ((qp + s->current_picture.qscale_table[top_xy ] + 1)>>1) <= qp_thresh)){
1186 if(!FRAME_MBAFF) 1223 if(!FRAME_MBAFF)
1187 return 1; 1224 return 1;
1188 if( (left_xy[0]< 0 || ((qp + s->current_picture.qscale_ table[left_xy[1] ] + 1)>>1) <= qp_thresh) 1225 if( (left_xy[0]< 0 || ((qp + s->current_picture.qscale_ table[left_xy[1] ] + 1)>>1) <= qp_thresh)
1189 && (top_xy < s->mb_stride || ((qp + s->current_picture.qscale_ table[top_xy -s->mb_stride] + 1)>>1) <= qp_thresh)) 1226 && (top_xy < s->mb_stride || ((qp + s->current_picture.qscale_ table[top_xy -s->mb_stride] + 1)>>1) <= qp_thresh))
1190 return 1; 1227 return 1;
1191 } 1228 }
1192 } 1229 }
1193 1230
1231 top_type = s->current_picture.mb_type[top_xy] ;
1232 left_type[0] = s->current_picture.mb_type[left_xy[0]];
1233 left_type[1] = s->current_picture.mb_type[left_xy[1]];
1194 if(h->deblocking_filter == 2){ 1234 if(h->deblocking_filter == 2){
1195 h->top_type = top_type = h->slice_table[top_xy ] == h->slice_ num ? s->current_picture.mb_type[top_xy] : 0; 1235 if(h->slice_table[top_xy ] != h->slice_num) top_type= 0;
1196 h->left_type[0]= left_type[0] = h->slice_table[left_xy[0] ] == h->slice_ num ? s->current_picture.mb_type[left_xy[0]] : 0; 1236 if(h->slice_table[left_xy[0] ] != h->slice_num) left_type[0]= left_type[ 1]= 0;
1197 h->left_type[1]= left_type[1] = h->slice_table[left_xy[1] ] == h->slice_ num ? s->current_picture.mb_type[left_xy[1]] : 0;
1198 }else{ 1237 }else{
1199 h->top_type = top_type = h->slice_table[top_xy ] < 0xFFFF ? s ->current_picture.mb_type[top_xy] : 0; 1238 if(h->slice_table[top_xy ] == 0xFFFF) top_type= 0;
1200 h->left_type[0]= left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s ->current_picture.mb_type[left_xy[0]] : 0; 1239 if(h->slice_table[left_xy[0] ] == 0xFFFF) left_type[0]= left_type[1] =0;
1201 h->left_type[1]= left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s ->current_picture.mb_type[left_xy[1]] : 0;
1202 } 1240 }
1241 h->top_type = top_type ;
1242 h->left_type[0]= left_type[0];
1243 h->left_type[1]= left_type[1];
1244
1203 if(IS_INTRA(mb_type)) 1245 if(IS_INTRA(mb_type))
1204 return 0; 1246 return 0;
1205 1247
1206 AV_COPY64(&h->non_zero_count_cache[0+8*1], &h->non_zero_count[mb_xy][ 0]); 1248 AV_COPY64(&h->non_zero_count_cache[0+8*1], &h->non_zero_count[mb_xy][ 0]);
1207 AV_COPY64(&h->non_zero_count_cache[0+8*2], &h->non_zero_count[mb_xy][ 8]); 1249 AV_COPY64(&h->non_zero_count_cache[0+8*2], &h->non_zero_count[mb_xy][ 8]);
1208 *((uint32_t*)&h->non_zero_count_cache[0+8*5])= *((uint32_t*)&h->non_zero_cou nt[mb_xy][16]); 1250 AV_COPY32(&h->non_zero_count_cache[0+8*5], &h->non_zero_count[mb_xy][16]);
1209 *((uint32_t*)&h->non_zero_count_cache[4+8*3])= *((uint32_t*)&h->non_zero_cou nt[mb_xy][20]); 1251 AV_COPY32(&h->non_zero_count_cache[4+8*3], &h->non_zero_count[mb_xy][20]);
1210 AV_COPY64(&h->non_zero_count_cache[0+8*4], &h->non_zero_count[mb_xy][24]); 1252 AV_COPY64(&h->non_zero_count_cache[0+8*4], &h->non_zero_count[mb_xy][24]);
1211 1253
1212 h->cbp= h->cbp_table[mb_xy]; 1254 h->cbp= h->cbp_table[mb_xy];
1213 1255
1214 { 1256 {
1215 int list; 1257 int list;
1216 for(list=0; list<h->list_count; list++){ 1258 for(list=0; list<h->list_count; list++){
1217 int8_t *ref; 1259 int8_t *ref;
1218 int y, b_stride; 1260 int y, b_stride;
1219 int16_t (*mv_dst)[2]; 1261 int16_t (*mv_dst)[2];
1220 int16_t (*mv_src)[2]; 1262 int16_t (*mv_src)[2];
1221 1263
1222 if(!USES_LIST(mb_type, list)){ 1264 if(!USES_LIST(mb_type, list)){
1223 fill_rectangle( h->mv_cache[list][scan8[0]], 4, 4, 8, pack16to3 2(0,0), 4); 1265 fill_rectangle( h->mv_cache[list][scan8[0]], 4, 4, 8, pack16to3 2(0,0), 4);
1224 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] = 1266 AV_WN32A(&h->ref_cache[list][scan8[ 0]], ((LIST_NOT_USED)&0xFF)* 0x01010101u);
1225 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = 1267 AV_WN32A(&h->ref_cache[list][scan8[ 2]], ((LIST_NOT_USED)&0xFF)* 0x01010101u);
1226 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] = 1268 AV_WN32A(&h->ref_cache[list][scan8[ 8]], ((LIST_NOT_USED)&0xFF)* 0x01010101u);
1227 *(uint32_t*)&h->ref_cache[list][scan8[10]] = ((LIST_NOT_USED)&0x FF)*0x01010101; 1269 AV_WN32A(&h->ref_cache[list][scan8[10]], ((LIST_NOT_USED)&0xFF)* 0x01010101u);
1228 continue; 1270 continue;
1229 } 1271 }
1230 1272
1231 ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]]; 1273 ref = &s->current_picture.ref_index[list][4*mb_xy];
1232 { 1274 {
1233 int (*ref2frm)[64] = h->ref2frm[ h->slice_num&(MAX_SLICES-1) ][0 ] + (MB_MBAFF ? 20 : 2); 1275 int (*ref2frm)[64] = h->ref2frm[ h->slice_num&(MAX_SLICES-1) ][0 ] + (MB_MBAFF ? 20 : 2);
1234 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] = 1276 AV_WN32A(&h->ref_cache[list][scan8[ 0]], (pack16to32(ref2frm[lis t][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
1235 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref2frm [list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101; 1277 AV_WN32A(&h->ref_cache[list][scan8[ 2]], (pack16to32(ref2frm[lis t][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
1236 ref += h->b8_stride; 1278 ref += 2;
1237 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] = 1279 AV_WN32A(&h->ref_cache[list][scan8[ 8]], (pack16to32(ref2frm[lis t][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
1238 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref2frm [list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101; 1280 AV_WN32A(&h->ref_cache[list][scan8[10]], (pack16to32(ref2frm[lis t][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
1239 } 1281 }
1240 1282
1241 b_stride = h->b_stride; 1283 b_stride = h->b_stride;
1242 mv_dst = &h->mv_cache[list][scan8[0]]; 1284 mv_dst = &h->mv_cache[list][scan8[0]];
1243 mv_src = &s->current_picture.motion_val[list][4*s->mb_x + 4*s->mb_ y*b_stride]; 1285 mv_src = &s->current_picture.motion_val[list][4*s->mb_x + 4*s->mb_ y*b_stride];
1244 for(y=0; y<4; y++){ 1286 for(y=0; y<4; y++){
1245 AV_COPY128(mv_dst + 8*y, mv_src + y*b_stride); 1287 AV_COPY128(mv_dst + 8*y, mv_src + y*b_stride);
1246 } 1288 }
1247 1289
1248 } 1290 }
1249 } 1291 }
1250 1292
1251 1293
1252 /* 1294 /*
1253 0 . T T. T T T T 1295 0 . T T. T T T T
1254 1 L . .L . . . . 1296 1 L . .L . . . .
1255 2 L . .L . . . . 1297 2 L . .L . . . .
1256 3 . T TL . . . . 1298 3 . T TL . . . .
1257 4 L . .L . . . . 1299 4 L . .L . . . .
1258 5 L . .. . . . . 1300 5 L . .. . . . .
1259 */ 1301 */
1260 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a t ypo in the spec) 1302 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a t ypo in the spec)
1261 if(top_type){ 1303 if(top_type){
1262 *(uint32_t*)&h->non_zero_count_cache[4+8*0]= *(uint32_t*)&h->non_zero_co unt[top_xy][4+3*8]; 1304 AV_COPY32(&h->non_zero_count_cache[4+8*0], &h->non_zero_count[top_xy][4+ 3*8]);
1263 } 1305 }
1264 1306
1265 if(left_type[0]){ 1307 if(left_type[0]){
1266 h->non_zero_count_cache[3+8*1]= h->non_zero_count[left_xy[0]][7+0*8]; 1308 h->non_zero_count_cache[3+8*1]= h->non_zero_count[left_xy[0]][7+0*8];
1267 h->non_zero_count_cache[3+8*2]= h->non_zero_count[left_xy[0]][7+1*8]; 1309 h->non_zero_count_cache[3+8*2]= h->non_zero_count[left_xy[0]][7+1*8];
1268 h->non_zero_count_cache[3+8*3]= h->non_zero_count[left_xy[0]][7+2*8]; 1310 h->non_zero_count_cache[3+8*3]= h->non_zero_count[left_xy[0]][7+2*8];
1269 h->non_zero_count_cache[3+8*4]= h->non_zero_count[left_xy[0]][7+3*8]; 1311 h->non_zero_count_cache[3+8*4]= h->non_zero_count[left_xy[0]][7+3*8];
1270 } 1312 }
1271 1313
1272 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from w hat the loop filter needs 1314 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from w hat the loop filter needs
(...skipping 26 matching lines...) Expand all
1299 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[ 1+12]]= 1341 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[ 1+12]]=
1300 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[ 3+12]]= h->cbp & 8; 1342 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[ 3+12]]= h->cbp & 8;
1301 } 1343 }
1302 } 1344 }
1303 1345
1304 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){ 1346 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
1305 int list; 1347 int list;
1306 for(list=0; list<h->list_count; list++){ 1348 for(list=0; list<h->list_count; list++){
1307 if(USES_LIST(top_type, list)){ 1349 if(USES_LIST(top_type, list)){
1308 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride; 1350 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
1309 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride; 1351 const int b8_xy= 4*top_xy + 2;
1310 int (*ref2frm)[64] = h->ref2frm[ h->slice_table[top_xy]&(MAX_SLI CES-1) ][0] + (MB_MBAFF ? 20 : 2); 1352 int (*ref2frm)[64] = h->ref2frm[ h->slice_table[top_xy]&(MAX_SLI CES-1) ][0] + (MB_MBAFF ? 20 : 2);
1311 AV_COPY128(h->mv_cache[list][scan8[0] + 0 - 1*8], s->current_pic ture.motion_val[list][b_xy + 0]); 1353 AV_COPY128(h->mv_cache[list][scan8[0] + 0 - 1*8], s->current_pic ture.motion_val[list][b_xy + 0]);
1312 h->ref_cache[list][scan8[0] + 0 - 1*8]= 1354 h->ref_cache[list][scan8[0] + 0 - 1*8]=
1313 h->ref_cache[list][scan8[0] + 1 - 1*8]= ref2frm[list][s->current _picture.ref_index[list][b8_xy + 0]]; 1355 h->ref_cache[list][scan8[0] + 1 - 1*8]= ref2frm[list][s->current _picture.ref_index[list][b8_xy + 0]];
1314 h->ref_cache[list][scan8[0] + 2 - 1*8]= 1356 h->ref_cache[list][scan8[0] + 2 - 1*8]=
1315 h->ref_cache[list][scan8[0] + 3 - 1*8]= ref2frm[list][s->current _picture.ref_index[list][b8_xy + 1]]; 1357 h->ref_cache[list][scan8[0] + 3 - 1*8]= ref2frm[list][s->current _picture.ref_index[list][b8_xy + 1]];
1316 }else{ 1358 }else{
1317 AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]); 1359 AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]);
1318 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((LIST_NOT_ USED)&0xFF)*0x01010101; 1360 AV_WN32A(&h->ref_cache[list][scan8[0] + 0 - 1*8], ((LIST_NOT_USE D)&0xFF)*0x01010101u);
1319 } 1361 }
1320 1362
1321 if(!IS_INTERLACED(mb_type^left_type[0])){ 1363 if(!IS_INTERLACED(mb_type^left_type[0])){
1322 if(USES_LIST(left_type[0], list)){ 1364 if(USES_LIST(left_type[0], list)){
1323 const int b_xy= h->mb2b_xy[left_xy[0]] + 3; 1365 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
1324 const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1; 1366 const int b8_xy= 4*left_xy[0] + 1;
1325 int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[0]]& (MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2); 1367 int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[0]]& (MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
1326 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0 ]= *(uint32_t *)s->current_picture.motion_val[list][b_xy + h->b_stride*0]; 1368 AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 0 ], s->current_p icture.motion_val[list][b_xy + h->b_stride*0]);
1327 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 8 ]= *(uint32_t *)s->current_picture.motion_val[list][b_xy + h->b_stride*1]; 1369 AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 8 ], s->current_p icture.motion_val[list][b_xy + h->b_stride*1]);
1328 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 +16 ]= *(uint32_t *)s->current_picture.motion_val[list][b_xy + h->b_stride*2]; 1370 AV_COPY32(h->mv_cache[list][scan8[0] - 1 +16 ], s->current_p icture.motion_val[list][b_xy + h->b_stride*2]);
1329 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 +24 ]= *(uint32_t *)s->current_picture.motion_val[list][b_xy + h->b_stride*3]; 1371 AV_COPY32(h->mv_cache[list][scan8[0] - 1 +24 ], s->current_p icture.motion_val[list][b_xy + h->b_stride*3]);
1330 h->ref_cache[list][scan8[0] - 1 + 0 ]= 1372 h->ref_cache[list][scan8[0] - 1 + 0 ]=
1331 h->ref_cache[list][scan8[0] - 1 + 8 ]= ref2frm[list][s->curr ent_picture.ref_index[list][b8_xy + h->b8_stride*0]]; 1373 h->ref_cache[list][scan8[0] - 1 + 8 ]= ref2frm[list][s->curr ent_picture.ref_index[list][b8_xy + 2*0]];
1332 h->ref_cache[list][scan8[0] - 1 +16 ]= 1374 h->ref_cache[list][scan8[0] - 1 +16 ]=
1333 h->ref_cache[list][scan8[0] - 1 +24 ]= ref2frm[list][s->curr ent_picture.ref_index[list][b8_xy + h->b8_stride*1]]; 1375 h->ref_cache[list][scan8[0] - 1 +24 ]= ref2frm[list][s->curr ent_picture.ref_index[list][b8_xy + 2*1]];
1334 }else{ 1376 }else{
1335 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 0 ]= 1377 AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 0 ]);
1336 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 8 ]= 1378 AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 8 ]);
1337 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 +16 ]= 1379 AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +16 ]);
1338 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 +24 ]= 0; 1380 AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +24 ]);
1339 h->ref_cache[list][scan8[0] - 1 + 0 ]= 1381 h->ref_cache[list][scan8[0] - 1 + 0 ]=
1340 h->ref_cache[list][scan8[0] - 1 + 8 ]= 1382 h->ref_cache[list][scan8[0] - 1 + 8 ]=
1341 h->ref_cache[list][scan8[0] - 1 + 16 ]= 1383 h->ref_cache[list][scan8[0] - 1 + 16 ]=
1342 h->ref_cache[list][scan8[0] - 1 + 24 ]= LIST_NOT_USED; 1384 h->ref_cache[list][scan8[0] - 1 + 24 ]= LIST_NOT_USED;
1343 } 1385 }
1344 } 1386 }
1345 } 1387 }
1346 } 1388 }
1347 1389
1348 return 0; 1390 return 0;
(...skipping 12 matching lines...) Expand all
1361 1403
1362 if(min<0) return DC_PRED; 1404 if(min<0) return DC_PRED;
1363 else return min; 1405 else return min;
1364 } 1406 }
1365 1407
1366 static inline void write_back_non_zero_count(H264Context *h){ 1408 static inline void write_back_non_zero_count(H264Context *h){
1367 const int mb_xy= h->mb_xy; 1409 const int mb_xy= h->mb_xy;
1368 1410
1369 AV_COPY64(&h->non_zero_count[mb_xy][ 0], &h->non_zero_count_cache[0+8*1]); 1411 AV_COPY64(&h->non_zero_count[mb_xy][ 0], &h->non_zero_count_cache[0+8*1]);
1370 AV_COPY64(&h->non_zero_count[mb_xy][ 8], &h->non_zero_count_cache[0+8*2]); 1412 AV_COPY64(&h->non_zero_count[mb_xy][ 8], &h->non_zero_count_cache[0+8*2]);
1371 *((uint32_t*)&h->non_zero_count[mb_xy][16]) = *((uint32_t*)&h->non_zero_coun t_cache[0+8*5]); 1413 AV_COPY32(&h->non_zero_count[mb_xy][16], &h->non_zero_count_cache[0+8*5]);
1372 *((uint32_t*)&h->non_zero_count[mb_xy][20]) = *((uint32_t*)&h->non_zero_coun t_cache[4+8*3]); 1414 AV_COPY32(&h->non_zero_count[mb_xy][20], &h->non_zero_count_cache[4+8*3]);
1373 AV_COPY64(&h->non_zero_count[mb_xy][24], &h->non_zero_count_cache[0+8*4]); 1415 AV_COPY64(&h->non_zero_count[mb_xy][24], &h->non_zero_count_cache[0+8*4]);
1374 } 1416 }
1375 1417
1376 static inline void write_back_motion(H264Context *h, int mb_type){ 1418 static inline void write_back_motion(H264Context *h, int mb_type){
1377 MpegEncContext * const s = &h->s; 1419 MpegEncContext * const s = &h->s;
1378 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride; 1420 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride; //try mb2b(8)_xy
1379 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride; 1421 const int b8_xy= 4*h->mb_xy;
1380 int list; 1422 int list;
1381 1423
1382 if(!USES_LIST(mb_type, 0)) 1424 if(!USES_LIST(mb_type, 0))
1383 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stri de, (uint8_t)LIST_NOT_USED, 1); 1425 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, 2, (uint8_ t)LIST_NOT_USED, 1);
1384 1426
1385 for(list=0; list<h->list_count; list++){ 1427 for(list=0; list<h->list_count; list++){
1386 int y, b_stride; 1428 int y, b_stride;
1387 int16_t (*mv_dst)[2]; 1429 int16_t (*mv_dst)[2];
1388 int16_t (*mv_src)[2]; 1430 int16_t (*mv_src)[2];
1389 1431
1390 if(!USES_LIST(mb_type, list)) 1432 if(!USES_LIST(mb_type, list))
1391 continue; 1433 continue;
1392 1434
1393 b_stride = h->b_stride; 1435 b_stride = h->b_stride;
1394 mv_dst = &s->current_picture.motion_val[list][b_xy]; 1436 mv_dst = &s->current_picture.motion_val[list][b_xy];
1395 mv_src = &h->mv_cache[list][scan8[0]]; 1437 mv_src = &h->mv_cache[list][scan8[0]];
1396 for(y=0; y<4; y++){ 1438 for(y=0; y<4; y++){
1397 AV_COPY128(mv_dst + y*b_stride, mv_src + 8*y); 1439 AV_COPY128(mv_dst + y*b_stride, mv_src + 8*y);
1398 } 1440 }
1399 if( CABAC ) { 1441 if( CABAC ) {
1400 int16_t (*mvd_dst)[2] = &h->mvd_table[list][b_xy]; 1442 uint8_t (*mvd_dst)[2] = &h->mvd_table[list][FMO ? 8*h->mb_xy : h->mb 2br_xy[h->mb_xy]];
1401 int16_t (*mvd_src)[2] = &h->mvd_cache[list][scan8[0]]; 1443 uint8_t (*mvd_src)[2] = &h->mvd_cache[list][scan8[0]];
1402 if(IS_SKIP(mb_type)) 1444 if(IS_SKIP(mb_type))
1403 fill_rectangle(mvd_dst, 4, 4, h->b_stride, 0, 4); 1445 AV_ZERO128(mvd_dst);
1404 else 1446 else{
1405 for(y=0; y<4; y++){ 1447 AV_COPY64(mvd_dst, mvd_src + 8*3);
1406 AV_COPY128(mvd_dst + y*b_stride, mvd_src + 8*y); 1448 AV_COPY16(mvd_dst + 3 + 3, mvd_src + 3 + 8*0);
1449 AV_COPY16(mvd_dst + 3 + 2, mvd_src + 3 + 8*1);
1450 AV_COPY16(mvd_dst + 3 + 1, mvd_src + 3 + 8*2);
1407 } 1451 }
1408 } 1452 }
1409 1453
1410 { 1454 {
1411 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy]; 1455 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1412 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]]; 1456 ref_index[0+0*2]= h->ref_cache[list][scan8[0]];
1413 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]]; 1457 ref_index[1+0*2]= h->ref_cache[list][scan8[4]];
1414 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]]; 1458 ref_index[0+1*2]= h->ref_cache[list][scan8[8]];
1415 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]]; 1459 ref_index[1+1*2]= h->ref_cache[list][scan8[12]];
1416 } 1460 }
1417 } 1461 }
1418 1462
1419 if(h->slice_type_nos == FF_B_TYPE && CABAC){ 1463 if(h->slice_type_nos == FF_B_TYPE && CABAC){
1420 if(IS_8X8(mb_type)){ 1464 if(IS_8X8(mb_type)){
1421 uint8_t *direct_table = &h->direct_table[b8_xy]; 1465 uint8_t *direct_table = &h->direct_table[4*h->mb_xy];
1422 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0; 1466 direct_table[1] = h->sub_mb_type[1]>>1;
1423 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0; 1467 direct_table[2] = h->sub_mb_type[2]>>1;
1424 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0; 1468 direct_table[3] = h->sub_mb_type[3]>>1;
1425 } 1469 }
1426 } 1470 }
1427 } 1471 }
1428 1472
1429 static inline int get_dct8x8_allowed(H264Context *h){ 1473 static inline int get_dct8x8_allowed(H264Context *h){
1430 if(h->sps.direct_8x8_inference_flag) 1474 if(h->sps.direct_8x8_inference_flag)
1431 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYP E_8x8 )*0x0001000100010001ULL)); 1475 return !(AV_RN64A(h->sub_mb_type) & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_ 8x8 )*0x0001000100010001ULL));
1432 else 1476 else
1433 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYP E_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL)); 1477 return !(AV_RN64A(h->sub_mb_type) & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_ 8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL));
1434 }
1435
1436 static void predict_field_decoding_flag(H264Context *h){
1437 MpegEncContext * const s = &h->s;
1438 const int mb_xy= h->mb_xy;
1439 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
1440 ? s->current_picture.mb_type[mb_xy-1]
1441 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
1442 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
1443 : 0;
1444 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
1445 } 1478 }
1446 1479
1447 /** 1480 /**
1448 * decodes a P_SKIP or B_SKIP macroblock 1481 * decodes a P_SKIP or B_SKIP macroblock
1449 */ 1482 */
1450 static void decode_mb_skip(H264Context *h){ 1483 static void decode_mb_skip(H264Context *h){
1451 MpegEncContext * const s = &h->s; 1484 MpegEncContext * const s = &h->s;
1452 const int mb_xy= h->mb_xy; 1485 const int mb_xy= h->mb_xy;
1453 int mb_type=0; 1486 int mb_type=0;
1454 1487
1455 memset(h->non_zero_count[mb_xy], 0, 32); 1488 memset(h->non_zero_count[mb_xy], 0, 32);
1456 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui 1489 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
1457 1490
1458 if(MB_FIELD) 1491 if(MB_FIELD)
1459 mb_type|= MB_TYPE_INTERLACED; 1492 mb_type|= MB_TYPE_INTERLACED;
1460 1493
1461 if( h->slice_type_nos == FF_B_TYPE ) 1494 if( h->slice_type_nos == FF_B_TYPE )
1462 { 1495 {
1463 // just for fill_caches. pred_direct_motion will set the real mb_type 1496 // just for fill_caches. pred_direct_motion will set the real mb_type
1464 mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP; 1497 mb_type|= MB_TYPE_L0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
1465 1498 if(h->direct_spatial_mv_pred){
1499 fill_decode_neighbors(h, mb_type);
1466 fill_decode_caches(h, mb_type); //FIXME check what is needed and what no t ... 1500 fill_decode_caches(h, mb_type); //FIXME check what is needed and what no t ...
1501 }
1467 ff_h264_pred_direct_motion(h, &mb_type); 1502 ff_h264_pred_direct_motion(h, &mb_type);
1468 mb_type|= MB_TYPE_SKIP; 1503 mb_type|= MB_TYPE_SKIP;
1469 } 1504 }
1470 else 1505 else
1471 { 1506 {
1472 int mx, my; 1507 int mx, my;
1473 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP; 1508 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
1474 1509
1510 fill_decode_neighbors(h, mb_type);
1475 fill_decode_caches(h, mb_type); //FIXME check what is needed and what no t ... 1511 fill_decode_caches(h, mb_type); //FIXME check what is needed and what no t ...
1476 pred_pskip_motion(h, &mx, &my); 1512 pred_pskip_motion(h, &mx, &my);
1477 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1); 1513 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
1478 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4 ); 1514 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4 );
1479 } 1515 }
1480 1516
1481 write_back_motion(h, mb_type); 1517 write_back_motion(h, mb_type);
1482 s->current_picture.mb_type[mb_xy]= mb_type; 1518 s->current_picture.mb_type[mb_xy]= mb_type;
1483 s->current_picture.qscale_table[mb_xy]= s->qscale; 1519 s->current_picture.qscale_table[mb_xy]= s->qscale;
1484 h->slice_table[ mb_xy ]= h->slice_num; 1520 h->slice_table[ mb_xy ]= h->slice_num;
1485 h->prev_mb_skipped= 1; 1521 h->prev_mb_skipped= 1;
1486 } 1522 }
1487 1523
1488 #include "h264_mvpred.h" //For pred_pskip_motion() 1524 #include "h264_mvpred.h" //For pred_pskip_motion()
1489 1525
1490 #endif /* AVCODEC_H264_H */ 1526 #endif /* AVCODEC_H264_H */
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698