| Index: patched-ffmpeg-mt/libavcodec/h264.h
|
| ===================================================================
|
| --- patched-ffmpeg-mt/libavcodec/h264.h (revision 41250)
|
| +++ patched-ffmpeg-mt/libavcodec/h264.h (working copy)
|
| @@ -28,6 +28,7 @@
|
| #ifndef AVCODEC_H264_H
|
| #define AVCODEC_H264_H
|
|
|
| +#include "libavutil/intreadwrite.h"
|
| #include "dsputil.h"
|
| #include "cabac.h"
|
| #include "mpegvideo.h"
|
| @@ -60,6 +61,8 @@
|
|
|
| #define ALLOW_NOCHROMA
|
|
|
| +#define FMO 0
|
| +
|
| /**
|
| * The maximum number of slices supported by the decoder.
|
| * must be a power of 2
|
| @@ -259,18 +262,6 @@
|
| */
|
| typedef struct H264Context{
|
| MpegEncContext s;
|
| - int nal_ref_idc;
|
| - int nal_unit_type;
|
| - uint8_t *rbsp_buffer[2];
|
| - unsigned int rbsp_buffer_size[2];
|
| -
|
| - /**
|
| - * Used to parse AVC variant of h264
|
| - */
|
| - int is_avc; ///< this flag is != 0 if codec is avc1
|
| - int got_avcC; ///< flag used to parse avcC data only once
|
| - int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4)
|
| -
|
| int chroma_qp[2]; //QPc
|
|
|
| int qp_thresh; ///< QP threshold to skip loopfilter
|
| @@ -282,27 +273,33 @@
|
| int chroma_pred_mode;
|
| int intra16x16_pred_mode;
|
|
|
| + int topleft_mb_xy;
|
| int top_mb_xy;
|
| + int topright_mb_xy;
|
| int left_mb_xy[2];
|
|
|
| + int topleft_type;
|
| int top_type;
|
| + int topright_type;
|
| int left_type[2];
|
|
|
| + const uint8_t * left_block;
|
| + int topleft_partition;
|
| +
|
| int8_t intra4x4_pred_mode_cache[5*8];
|
| - int8_t (*intra4x4_pred_mode)[8];
|
| + int8_t (*intra4x4_pred_mode);
|
| H264PredContext hpc;
|
| unsigned int topleft_samples_available;
|
| unsigned int top_samples_available;
|
| unsigned int topright_samples_available;
|
| unsigned int left_samples_available;
|
| uint8_t (*top_borders[2])[16+2*8];
|
| - uint8_t left_border[2*(17+2*9)];
|
|
|
| /**
|
| * non zero coeff count cache.
|
| * is 64 if not available.
|
| */
|
| - DECLARE_ALIGNED_8(uint8_t, non_zero_count_cache)[6*8];
|
| + DECLARE_ALIGNED(8, uint8_t, non_zero_count_cache)[6*8];
|
|
|
| /*
|
| .UU.YYYY
|
| @@ -315,8 +312,8 @@
|
| /**
|
| * Motion vector cache.
|
| */
|
| - DECLARE_ALIGNED_16(int16_t, mv_cache)[2][5*8][2];
|
| - DECLARE_ALIGNED_8(int8_t, ref_cache)[2][5*8];
|
| + DECLARE_ALIGNED(16, int16_t, mv_cache)[2][5*8][2];
|
| + DECLARE_ALIGNED(8, int8_t, ref_cache)[2][5*8];
|
| #define LIST_NOT_USED -1 //FIXME rename?
|
| #define PART_NOT_AVAILABLE -2
|
|
|
| @@ -337,9 +334,8 @@
|
| int block_offset[2*(16+8)];
|
|
|
| uint32_t *mb2b_xy; //FIXME are these 4 a good idea?
|
| - uint32_t *mb2b8_xy;
|
| + uint32_t *mb2br_xy;
|
| int b_stride; //FIXME use s->b4_stride
|
| - int b8_stride;
|
|
|
| int mb_linesize; ///< may be equal to s->linesize or s->linesize*2, for mbaff
|
| int mb_uvlinesize;
|
| @@ -347,29 +343,19 @@
|
| int emu_edge_width;
|
| int emu_edge_height;
|
|
|
| - int halfpel_flag;
|
| - int thirdpel_flag;
|
| -
|
| - int unknown_svq3_flag;
|
| - int next_slice_index;
|
| -
|
| - SPS *sps_buffers[MAX_SPS_COUNT];
|
| SPS sps; ///< current sps
|
|
|
| - PPS *pps_buffers[MAX_PPS_COUNT];
|
| /**
|
| * current pps
|
| */
|
| PPS pps; //FIXME move to Picture perhaps? (->no) do we need that?
|
|
|
| - uint32_t dequant4_buffer[6][52][16];
|
| + uint32_t dequant4_buffer[6][52][16]; //FIXME should these be moved down?
|
| uint32_t dequant8_buffer[2][52][64];
|
| uint32_t (*dequant4_coeff[6])[16];
|
| uint32_t (*dequant8_coeff[2])[64];
|
| - int dequant_coeff_pps; ///< reinit tables when pps changes
|
|
|
| int slice_num;
|
| - uint16_t *slice_table_base;
|
| uint16_t *slice_table; ///< slice_table_base + 2*mb_stride + 1
|
| int slice_type;
|
| int slice_type_nos; ///< S free slice type (SI/SP are remapped to I/P)
|
| @@ -380,49 +366,21 @@
|
| int mb_field_decoding_flag;
|
| int mb_mbaff; ///< mb_aff_frame && mb_field_decoding_flag
|
|
|
| - DECLARE_ALIGNED_8(uint16_t, sub_mb_type)[4];
|
| + DECLARE_ALIGNED(8, uint16_t, sub_mb_type)[4];
|
|
|
| - //POC stuff
|
| - int poc_lsb;
|
| - int poc_msb;
|
| - int delta_poc_bottom;
|
| - int delta_poc[2];
|
| - int frame_num;
|
| - int prev_poc_msb; ///< poc_msb of the last reference pic for POC type 0
|
| - int prev_poc_lsb; ///< poc_lsb of the last reference pic for POC type 0
|
| - int frame_num_offset; ///< for POC type 2
|
| - int prev_frame_num_offset; ///< for POC type 2
|
| - int prev_frame_num; ///< frame_num of the last pic for POC type 1/2
|
| -
|
| - /**
|
| - * frame_num for frames or 2*frame_num+1 for field pics.
|
| - */
|
| - int curr_pic_num;
|
| -
|
| - /**
|
| - * max_frame_num or 2*max_frame_num for field pics.
|
| - */
|
| - int max_pic_num;
|
| -
|
| //Weighted pred stuff
|
| int use_weight;
|
| int use_weight_chroma;
|
| int luma_log2_weight_denom;
|
| int chroma_log2_weight_denom;
|
| - int luma_weight[2][48];
|
| - int luma_offset[2][48];
|
| - int chroma_weight[2][48][2];
|
| - int chroma_offset[2][48][2];
|
| + //The following 2 can be changed to int8_t but that causes 10cpu cycles speedloss
|
| + int luma_weight[48][2][2];
|
| + int chroma_weight[48][2][2][2];
|
| int implicit_weight[48][48];
|
|
|
| - //deblock
|
| - int deblocking_filter; ///< disable_deblocking_filter_idc with 1<->0
|
| - int slice_alpha_c0_offset;
|
| - int slice_beta_offset;
|
| -
|
| - int redundant_pic_count;
|
| -
|
| int direct_spatial_mv_pred;
|
| + int col_parity;
|
| + int col_fieldoff;
|
| int dist_scale_factor[16];
|
| int dist_scale_factor_field[2][32];
|
| int map_col_to_list0[2][16+32];
|
| @@ -434,34 +392,18 @@
|
| uint8_t *list_counts; ///< Array of list_count per MB specifying the slice type
|
| unsigned int ref_count[2]; ///< counts frames or fields, depending on current mb mode
|
| unsigned int list_count;
|
| - Picture *short_ref[32];
|
| - Picture *long_ref[32];
|
| - Picture default_ref_list[2][32]; ///< base reference list for all slices of a coded picture
|
| Picture ref_list[2][48]; /**< 0..15: frame refs, 16..47: mbaff field refs.
|
| Reordered version of default_ref_list
|
| according to picture reordering in slice header */
|
| int ref2frm[MAX_SLICES][2][64]; ///< reference to frame number lists, used in the loop filter, the first 2 are for -2,-1
|
| - Picture *delayed_pic[MAX_DELAYED_PIC_COUNT+2]; //FIXME size?
|
| - Picture *next_output_pic;
|
| - int outputed_poc;
|
| - int next_outputed_poc;
|
|
|
| - /**
|
| - * memory management control operations buffer.
|
| - */
|
| - MMCO mmco[MAX_MMCO_COUNT];
|
| - int mmco_index;
|
| -
|
| - int long_ref_count; ///< number of actual long term references
|
| - int short_ref_count; ///< number of actual short term references
|
| -
|
| //data partitioning
|
| GetBitContext intra_gb;
|
| GetBitContext inter_gb;
|
| GetBitContext *intra_gb_ptr;
|
| GetBitContext *inter_gb_ptr;
|
|
|
| - DECLARE_ALIGNED_16(DCTELEM, mb)[16*24];
|
| + DECLARE_ALIGNED(16, DCTELEM, mb)[16*24];
|
| DCTELEM mb_padding[256]; ///< as mb is addressed by scantable[i] and scantable is uint8_t we can either check that i is not too large or ensure that there is some unused stuff after mb
|
|
|
| /**
|
| @@ -469,7 +411,6 @@
|
| */
|
| CABACContext cabac;
|
| uint8_t cabac_state[460];
|
| - int cabac_init_idc;
|
|
|
| /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */
|
| uint16_t *cbp_table;
|
| @@ -479,8 +420,8 @@
|
| /* chroma_pred_mode for i4x4 or i16x16, else 0 */
|
| uint8_t *chroma_pred_mode_table;
|
| int last_qscale_diff;
|
| - int16_t (*mvd_table[2])[2];
|
| - DECLARE_ALIGNED_16(int16_t, mvd_cache)[2][5*8][2];
|
| + uint8_t (*mvd_table[2])[2];
|
| + DECLARE_ALIGNED(16, uint8_t, mvd_cache)[2][5*8][2];
|
| uint8_t *direct_table;
|
| uint8_t direct_cache[5*8];
|
|
|
| @@ -499,7 +440,81 @@
|
|
|
| int x264_build;
|
|
|
| + int mb_xy;
|
| +
|
| + int is_complex;
|
| +
|
| + //deblock
|
| + int deblocking_filter; ///< disable_deblocking_filter_idc with 1<->0
|
| + int slice_alpha_c0_offset;
|
| + int slice_beta_offset;
|
| +
|
| +//=============================================================
|
| + //Things below are not used in the MB or more inner code
|
| +
|
| + int nal_ref_idc;
|
| + int nal_unit_type;
|
| + uint8_t *rbsp_buffer[2];
|
| + unsigned int rbsp_buffer_size[2];
|
| +
|
| /**
|
| + * Used to parse AVC variant of h264
|
| + */
|
| + int is_avc; ///< this flag is != 0 if codec is avc1
|
| + int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4)
|
| +
|
| + SPS *sps_buffers[MAX_SPS_COUNT];
|
| + PPS *pps_buffers[MAX_PPS_COUNT];
|
| +
|
| + int dequant_coeff_pps; ///< reinit tables when pps changes
|
| +
|
| + uint16_t *slice_table_base;
|
| +
|
| +
|
| + //POC stuff
|
| + int poc_lsb;
|
| + int poc_msb;
|
| + int delta_poc_bottom;
|
| + int delta_poc[2];
|
| + int frame_num;
|
| + int prev_poc_msb; ///< poc_msb of the last reference pic for POC type 0
|
| + int prev_poc_lsb; ///< poc_lsb of the last reference pic for POC type 0
|
| + int frame_num_offset; ///< for POC type 2
|
| + int prev_frame_num_offset; ///< for POC type 2
|
| + int prev_frame_num; ///< frame_num of the last pic for POC type 1/2
|
| +
|
| + /**
|
| + * frame_num for frames or 2*frame_num+1 for field pics.
|
| + */
|
| + int curr_pic_num;
|
| +
|
| + /**
|
| + * max_frame_num or 2*max_frame_num for field pics.
|
| + */
|
| + int max_pic_num;
|
| +
|
| + int redundant_pic_count;
|
| +
|
| + Picture *short_ref[32];
|
| + Picture *long_ref[32];
|
| + Picture default_ref_list[2][32]; ///< base reference list for all slices of a coded picture
|
| + Picture *delayed_pic[MAX_DELAYED_PIC_COUNT+2]; //FIXME size?
|
| + Picture *next_output_pic;
|
| + int outputed_poc;
|
| + int next_outputed_poc;
|
| +
|
| + /**
|
| + * memory management control operations buffer.
|
| + */
|
| + MMCO mmco[MAX_MMCO_COUNT];
|
| + int mmco_index;
|
| +
|
| + int long_ref_count; ///< number of actual long term references
|
| + int short_ref_count; ///< number of actual short term references
|
| +
|
| + int cabac_init_idc;
|
| +
|
| + /**
|
| * @defgroup multithreading Members for slice based multithreading
|
| * @{
|
| */
|
| @@ -527,10 +542,6 @@
|
| int last_slice_type;
|
| /** @} */
|
|
|
| - int mb_xy;
|
| -
|
| - uint32_t svq3_watermark_key;
|
| -
|
| /**
|
| * pic_struct in picture timing SEI message
|
| */
|
| @@ -570,14 +581,19 @@
|
| */
|
| int sei_recovery_frame_cnt;
|
|
|
| - int is_complex;
|
| -
|
| int luma_weight_flag[2]; ///< 7.4.3.2 luma_weight_lX_flag
|
| int chroma_weight_flag[2]; ///< 7.4.3.2 chroma_weight_lX_flag
|
|
|
| // Timestamp stuff
|
| int sei_buffering_period_present; ///< Buffering period SEI flag
|
| int initial_cpb_removal_delay[32]; ///< Initial timestamps for CPBs
|
| +
|
| + //SVQ3 specific fields
|
| + int halfpel_flag;
|
| + int thirdpel_flag;
|
| + int unknown_svq3_flag;
|
| + int next_slice_index;
|
| + uint32_t svq3_watermark_key;
|
| }H264Context;
|
|
|
|
|
| @@ -736,6 +752,14 @@
|
| #endif
|
| }
|
|
|
| +static av_always_inline uint16_t pack8to16(int a, int b){
|
| +#if HAVE_BIGENDIAN
|
| + return (b&0xFF) + (a<<8);
|
| +#else
|
| + return (a&0xFF) + (b<<8);
|
| +#endif
|
| +}
|
| +
|
| /**
|
| * gets the chroma qp.
|
| */
|
| @@ -745,14 +769,10 @@
|
|
|
| static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my);
|
|
|
| -static void fill_decode_caches(H264Context *h, int mb_type){
|
| +static void fill_decode_neighbors(H264Context *h, int mb_type){
|
| MpegEncContext * const s = &h->s;
|
| const int mb_xy= h->mb_xy;
|
| int topleft_xy, top_xy, topright_xy, left_xy[2];
|
| - int topleft_type, top_type, topright_type, left_type[2];
|
| - const uint8_t * left_block;
|
| - int topleft_partition= -1;
|
| - int i;
|
| static const uint8_t left_block_options[4][16]={
|
| {0,1,2,3,7,10,8,11,7+0*8, 7+1*8, 7+2*8, 7+3*8, 2+0*8, 2+3*8, 2+1*8, 2+2*8},
|
| {2,2,3,3,8,11,8,11,7+2*8, 7+2*8, 7+3*8, 7+3*8, 2+1*8, 2+2*8, 2+1*8, 2+2*8},
|
| @@ -760,6 +780,8 @@
|
| {0,2,0,2,7,10,7,10,7+0*8, 7+2*8, 7+0*8, 7+2*8, 2+0*8, 2+3*8, 2+0*8, 2+3*8}
|
| };
|
|
|
| + h->topleft_partition= -1;
|
| +
|
| top_xy = mb_xy - (s->mb_stride << MB_FIELD);
|
|
|
| /* Wow, what a mess, why didn't they simplify the interlacing & intra
|
| @@ -768,7 +790,7 @@
|
| topleft_xy = top_xy - 1;
|
| topright_xy= top_xy + 1;
|
| left_xy[1] = left_xy[0] = mb_xy-1;
|
| - left_block = left_block_options[0];
|
| + h->left_block = left_block_options[0];
|
| if(FRAME_MBAFF){
|
| const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]);
|
| const int curr_mb_field_flag = IS_INTERLACED(mb_type);
|
| @@ -777,12 +799,12 @@
|
| left_xy[1] = left_xy[0] = mb_xy - s->mb_stride - 1;
|
| if (curr_mb_field_flag) {
|
| left_xy[1] += s->mb_stride;
|
| - left_block = left_block_options[3];
|
| + h->left_block = left_block_options[3];
|
| } else {
|
| topleft_xy += s->mb_stride;
|
| // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
|
| - topleft_partition = 0;
|
| - left_block = left_block_options[1];
|
| + h->topleft_partition = 0;
|
| + h->left_block = left_block_options[1];
|
| }
|
| }
|
| }else{
|
| @@ -792,26 +814,62 @@
|
| top_xy += s->mb_stride & (((s->current_picture.mb_type[top_xy ]>>7)&1)-1);
|
| }
|
| if (left_mb_field_flag != curr_mb_field_flag) {
|
| - left_xy[1] = left_xy[0] = mb_xy - 1;
|
| if (curr_mb_field_flag) {
|
| left_xy[1] += s->mb_stride;
|
| - left_block = left_block_options[3];
|
| + h->left_block = left_block_options[3];
|
| } else {
|
| - left_block = left_block_options[2];
|
| + h->left_block = left_block_options[2];
|
| }
|
| }
|
| }
|
| }
|
|
|
| - h->top_mb_xy = top_xy;
|
| + h->topleft_mb_xy = topleft_xy;
|
| + h->top_mb_xy = top_xy;
|
| + h->topright_mb_xy= topright_xy;
|
| h->left_mb_xy[0] = left_xy[0];
|
| h->left_mb_xy[1] = left_xy[1];
|
| - topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
|
| - top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
|
| - topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
|
| - left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
|
| - left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
|
| + //FIXME do we need all in the context?
|
|
|
| + h->topleft_type = s->current_picture.mb_type[topleft_xy] ;
|
| + h->top_type = s->current_picture.mb_type[top_xy] ;
|
| + h->topright_type= s->current_picture.mb_type[topright_xy];
|
| + h->left_type[0] = s->current_picture.mb_type[left_xy[0]] ;
|
| + h->left_type[1] = s->current_picture.mb_type[left_xy[1]] ;
|
| +
|
| + if(FMO){
|
| + if(h->slice_table[topleft_xy ] != h->slice_num) h->topleft_type = 0;
|
| + if(h->slice_table[top_xy ] != h->slice_num) h->top_type = 0;
|
| + if(h->slice_table[left_xy[0] ] != h->slice_num) h->left_type[0] = h->left_type[1] = 0;
|
| + }else{
|
| + if(h->slice_table[topleft_xy ] != h->slice_num){
|
| + h->topleft_type = 0;
|
| + if(h->slice_table[top_xy ] != h->slice_num) h->top_type = 0;
|
| + if(h->slice_table[left_xy[0] ] != h->slice_num) h->left_type[0] = h->left_type[1] = 0;
|
| + }
|
| + }
|
| + if(h->slice_table[topright_xy] != h->slice_num) h->topright_type= 0;
|
| +}
|
| +
|
| +static void fill_decode_caches(H264Context *h, int mb_type){
|
| + MpegEncContext * const s = &h->s;
|
| + int topleft_xy, top_xy, topright_xy, left_xy[2];
|
| + int topleft_type, top_type, topright_type, left_type[2];
|
| + const uint8_t * left_block= h->left_block;
|
| + int i;
|
| +
|
| + topleft_xy = h->topleft_mb_xy ;
|
| + top_xy = h->top_mb_xy ;
|
| + topright_xy = h->topright_mb_xy;
|
| + left_xy[0] = h->left_mb_xy[0] ;
|
| + left_xy[1] = h->left_mb_xy[1] ;
|
| + topleft_type = h->topleft_type ;
|
| + top_type = h->top_type ;
|
| + topright_type= h->topright_type ;
|
| + left_type[0] = h->left_type[0] ;
|
| + left_type[1] = h->left_type[1] ;
|
| +
|
| + if(!IS_SKIP(mb_type)){
|
| if(IS_INTRA(mb_type)){
|
| int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
|
| h->topleft_samples_available=
|
| @@ -835,8 +893,8 @@
|
| h->left_samples_available&= 0xFF5F;
|
| }
|
| }else{
|
| - int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
|
| - ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
|
| + int left_typei = s->current_picture.mb_type[left_xy[0] + s->mb_stride];
|
| +
|
| assert(left_xy[0] == left_xy[1]);
|
| if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
|
| h->topleft_samples_available&= 0xDF5F;
|
| @@ -858,35 +916,21 @@
|
|
|
| if(IS_INTRA4x4(mb_type)){
|
| if(IS_INTRA4x4(top_type)){
|
| - h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
|
| - h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
|
| - h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
|
| - h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
|
| + AV_COPY32(h->intra4x4_pred_mode_cache+4+8*0, h->intra4x4_pred_mode + h->mb2br_xy[top_xy]);
|
| }else{
|
| - int pred;
|
| - if(!(top_type & type_mask))
|
| - pred= -1;
|
| - else{
|
| - pred= 2;
|
| - }
|
| h->intra4x4_pred_mode_cache[4+8*0]=
|
| h->intra4x4_pred_mode_cache[5+8*0]=
|
| h->intra4x4_pred_mode_cache[6+8*0]=
|
| - h->intra4x4_pred_mode_cache[7+8*0]= pred;
|
| + h->intra4x4_pred_mode_cache[7+8*0]= 2 - 3*!(top_type & type_mask);
|
| }
|
| for(i=0; i<2; i++){
|
| if(IS_INTRA4x4(left_type[i])){
|
| - h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
|
| - h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
|
| + int8_t *mode= h->intra4x4_pred_mode + h->mb2br_xy[left_xy[i]];
|
| + h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= mode[6-left_block[0+2*i]];
|
| + h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= mode[6-left_block[1+2*i]];
|
| }else{
|
| - int pred;
|
| - if(!(left_type[i] & type_mask))
|
| - pred= -1;
|
| - else{
|
| - pred= 2;
|
| - }
|
| h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
|
| - h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
|
| + h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= 2 - 3*!(left_type[i] & type_mask);
|
| }
|
| }
|
| }
|
| @@ -903,7 +947,7 @@
|
| */
|
| //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
|
| if(top_type){
|
| - *(uint32_t*)&h->non_zero_count_cache[4+8*0]= *(uint32_t*)&h->non_zero_count[top_xy][4+3*8];
|
| + AV_COPY32(&h->non_zero_count_cache[4+8*0], &h->non_zero_count[top_xy][4+3*8]);
|
| h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][1+1*8];
|
| h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][2+1*8];
|
|
|
| @@ -915,7 +959,7 @@
|
|
|
| h->non_zero_count_cache[1+8*3]=
|
| h->non_zero_count_cache[2+8*3]=
|
| - *(uint32_t*)&h->non_zero_count_cache[4+8*0]= CABAC && !IS_INTRA(mb_type) ? 0 : 0x40404040;
|
| + AV_WN32A(&h->non_zero_count_cache[4+8*0], CABAC && !IS_INTRA(mb_type) ? 0 : 0x40404040);
|
| }
|
|
|
| for (i=0; i<2; i++) {
|
| @@ -936,32 +980,25 @@
|
| // top_cbp
|
| if(top_type) {
|
| h->top_cbp = h->cbp_table[top_xy];
|
| - } else if(IS_INTRA(mb_type)) {
|
| - h->top_cbp = 0x1C0;
|
| } else {
|
| - h->top_cbp = 0;
|
| + h->top_cbp = IS_INTRA(mb_type) ? 0x1CF : 0x00F;
|
| }
|
| // left_cbp
|
| if (left_type[0]) {
|
| - h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
|
| - } else if(IS_INTRA(mb_type)) {
|
| - h->left_cbp = 0x1C0;
|
| + h->left_cbp = (h->cbp_table[left_xy[0]] & 0x1f0)
|
| + | ((h->cbp_table[left_xy[0]]>>(left_block[0]&(~1)))&2)
|
| + | (((h->cbp_table[left_xy[1]]>>(left_block[2]&(~1)))&2) << 2);
|
| } else {
|
| - h->left_cbp = 0;
|
| + h->left_cbp = IS_INTRA(mb_type) ? 0x1CF : 0x00F;
|
| }
|
| - if (left_type[0]) {
|
| - h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
|
| - }
|
| - if (left_type[1]) {
|
| - h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
|
| - }
|
| }
|
| + }
|
|
|
| #if 1
|
| - if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
|
| + if(IS_INTER(mb_type) || (IS_DIRECT(mb_type) && h->direct_spatial_mv_pred)){
|
| int list;
|
| for(list=0; list<h->list_count; list++){
|
| - if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type)){
|
| + if(!USES_LIST(mb_type, list)){
|
| /*if(!h->mv_cache_clean[list]){
|
| memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
|
| memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
|
| @@ -969,134 +1006,135 @@
|
| }*/
|
| continue;
|
| }
|
| + assert(!(IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred));
|
| +
|
| h->mv_cache_clean[list]= 0;
|
|
|
| if(USES_LIST(top_type, list)){
|
| const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
|
| - const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
|
| AV_COPY128(h->mv_cache[list][scan8[0] + 0 - 1*8], s->current_picture.motion_val[list][b_xy + 0]);
|
| h->ref_cache[list][scan8[0] + 0 - 1*8]=
|
| - h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
|
| + h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][4*top_xy + 2];
|
| h->ref_cache[list][scan8[0] + 2 - 1*8]=
|
| - h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
|
| + h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][4*top_xy + 3];
|
| }else{
|
| AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]);
|
| - *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
|
| + AV_WN32A(&h->ref_cache[list][scan8[0] + 0 - 1*8], ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101);
|
| }
|
|
|
| + if(mb_type & (MB_TYPE_16x8|MB_TYPE_8x8)){
|
| for(i=0; i<2; i++){
|
| int cache_idx = scan8[0] - 1 + i*2*8;
|
| if(USES_LIST(left_type[i], list)){
|
| const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
|
| - const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
|
| - *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
|
| - *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
|
| - h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
|
| - h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
|
| + const int b8_xy= 4*left_xy[i] + 1;
|
| + AV_COPY32(h->mv_cache[list][cache_idx ], s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]]);
|
| + AV_COPY32(h->mv_cache[list][cache_idx+8], s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]]);
|
| + h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + (left_block[0+i*2]&~1)];
|
| + h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + (left_block[1+i*2]&~1)];
|
| }else{
|
| - *(uint32_t*)h->mv_cache [list][cache_idx ]=
|
| - *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
|
| + AV_ZERO32(h->mv_cache [list][cache_idx ]);
|
| + AV_ZERO32(h->mv_cache [list][cache_idx+8]);
|
| h->ref_cache[list][cache_idx ]=
|
| h->ref_cache[list][cache_idx+8]= (left_type[i]) ? LIST_NOT_USED : PART_NOT_AVAILABLE;
|
| }
|
| }
|
| -
|
| - if((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF)
|
| - continue;
|
| -
|
| - if(USES_LIST(topleft_type, list)){
|
| - const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
|
| - const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
|
| - *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
|
| - h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
|
| }else{
|
| - *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
|
| - h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
|
| + if(USES_LIST(left_type[0], list)){
|
| + const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
|
| + const int b8_xy= 4*left_xy[0] + 1;
|
| + AV_COPY32(h->mv_cache[list][scan8[0] - 1], s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0]]);
|
| + h->ref_cache[list][scan8[0] - 1]= s->current_picture.ref_index[list][b8_xy + (left_block[0]&~1)];
|
| + }else{
|
| + AV_ZERO32(h->mv_cache [list][scan8[0] - 1]);
|
| + h->ref_cache[list][scan8[0] - 1]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
|
| + }
|
| }
|
|
|
| if(USES_LIST(topright_type, list)){
|
| const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
|
| - const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
|
| - *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
|
| - h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
|
| + AV_COPY32(h->mv_cache[list][scan8[0] + 4 - 1*8], s->current_picture.motion_val[list][b_xy]);
|
| + h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][4*topright_xy + 2];
|
| }else{
|
| - *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
|
| + AV_ZERO32(h->mv_cache [list][scan8[0] + 4 - 1*8]);
|
| h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
|
| }
|
| + if(h->ref_cache[list][scan8[0] + 4 - 1*8] < 0){
|
| + if(USES_LIST(topleft_type, list)){
|
| + const int b_xy = h->mb2b_xy [topleft_xy] + 3 + h->b_stride + (h->topleft_partition & 2*h->b_stride);
|
| + const int b8_xy= 4*topleft_xy + 1 + (h->topleft_partition & 2);
|
| + AV_COPY32(h->mv_cache[list][scan8[0] - 1 - 1*8], s->current_picture.motion_val[list][b_xy]);
|
| + h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
|
| + }else{
|
| + AV_ZERO32(h->mv_cache[list][scan8[0] - 1 - 1*8]);
|
| + h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
|
| + }
|
| + }
|
|
|
| - if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
|
| + if((mb_type&(MB_TYPE_SKIP|MB_TYPE_DIRECT2)) && !FRAME_MBAFF)
|
| continue;
|
|
|
| - h->ref_cache[list][scan8[5 ]+1] =
|
| - h->ref_cache[list][scan8[7 ]+1] =
|
| - h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
|
| + if(!(mb_type&(MB_TYPE_SKIP|MB_TYPE_DIRECT2))) {
|
| h->ref_cache[list][scan8[4 ]] =
|
| h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
|
| - *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
|
| - *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
|
| - *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
|
| - *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
|
| - *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
|
| + AV_ZERO32(h->mv_cache [list][scan8[4 ]]);
|
| + AV_ZERO32(h->mv_cache [list][scan8[12]]);
|
|
|
| if( CABAC ) {
|
| /* XXX beurk, Load mvd */
|
| if(USES_LIST(top_type, list)){
|
| - const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
|
| - AV_COPY128(h->mvd_cache[list][scan8[0] + 0 - 1*8], h->mvd_table[list][b_xy + 0]);
|
| + const int b_xy= h->mb2br_xy[top_xy];
|
| + AV_COPY64(h->mvd_cache[list][scan8[0] + 0 - 1*8], h->mvd_table[list][b_xy + 0]);
|
| }else{
|
| - AV_ZERO128(h->mvd_cache[list][scan8[0] + 0 - 1*8]);
|
| + AV_ZERO64(h->mvd_cache[list][scan8[0] + 0 - 1*8]);
|
| }
|
| if(USES_LIST(left_type[0], list)){
|
| - const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
|
| - *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
|
| - *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
|
| + const int b_xy= h->mb2br_xy[left_xy[0]] + 6;
|
| + AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 0*8], h->mvd_table[list][b_xy - left_block[0]]);
|
| + AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 1*8], h->mvd_table[list][b_xy - left_block[1]]);
|
| }else{
|
| - *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
|
| - *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
|
| + AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 0*8]);
|
| + AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 1*8]);
|
| }
|
| if(USES_LIST(left_type[1], list)){
|
| - const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
|
| - *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
|
| - *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
|
| + const int b_xy= h->mb2br_xy[left_xy[1]] + 6;
|
| + AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 2*8], h->mvd_table[list][b_xy - left_block[2]]);
|
| + AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 3*8], h->mvd_table[list][b_xy - left_block[3]]);
|
| }else{
|
| - *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
|
| - *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
|
| + AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 2*8]);
|
| + AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 3*8]);
|
| }
|
| - *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
|
| - *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
|
| - *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
|
| - *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
|
| - *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
|
| -
|
| + AV_ZERO16(h->mvd_cache [list][scan8[4 ]]);
|
| + AV_ZERO16(h->mvd_cache [list][scan8[12]]);
|
| if(h->slice_type_nos == FF_B_TYPE){
|
| - fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
|
| + fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, MB_TYPE_16x16>>1, 1);
|
|
|
| if(IS_DIRECT(top_type)){
|
| - *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
|
| + AV_WN32A(&h->direct_cache[scan8[0] - 1*8], 0x01010101*(MB_TYPE_DIRECT2>>1));
|
| }else if(IS_8X8(top_type)){
|
| - int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
|
| - h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
|
| - h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
|
| + int b8_xy = 4*top_xy;
|
| + h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy + 2];
|
| + h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 3];
|
| }else{
|
| - *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
|
| + AV_WN32A(&h->direct_cache[scan8[0] - 1*8], 0x01010101*(MB_TYPE_16x16>>1));
|
| }
|
|
|
| if(IS_DIRECT(left_type[0]))
|
| - h->direct_cache[scan8[0] - 1 + 0*8]= 1;
|
| + h->direct_cache[scan8[0] - 1 + 0*8]= MB_TYPE_DIRECT2>>1;
|
| else if(IS_8X8(left_type[0]))
|
| - h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
|
| + h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[4*left_xy[0] + 1 + (left_block[0]&~1)];
|
| else
|
| - h->direct_cache[scan8[0] - 1 + 0*8]= 0;
|
| + h->direct_cache[scan8[0] - 1 + 0*8]= MB_TYPE_16x16>>1;
|
|
|
| if(IS_DIRECT(left_type[1]))
|
| - h->direct_cache[scan8[0] - 1 + 2*8]= 1;
|
| + h->direct_cache[scan8[0] - 1 + 2*8]= MB_TYPE_DIRECT2>>1;
|
| else if(IS_8X8(left_type[1]))
|
| - h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
|
| + h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[4*left_xy[1] + 1 + (left_block[2]&~1)];
|
| else
|
| - h->direct_cache[scan8[0] - 1 + 2*8]= 0;
|
| + h->direct_cache[scan8[0] - 1 + 2*8]= MB_TYPE_16x16>>1;
|
| }
|
| }
|
| -
|
| + }
|
| if(FRAME_MBAFF){
|
| #define MAP_MVS\
|
| MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
|
| @@ -1114,7 +1152,7 @@
|
| if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
|
| h->ref_cache[list][idx] <<= 1;\
|
| h->mv_cache[list][idx][1] /= 2;\
|
| - h->mvd_cache[list][idx][1] /= 2;\
|
| + h->mvd_cache[list][idx][1] >>=1;\
|
| }
|
| MAP_MVS
|
| #undef MAP_F2F
|
| @@ -1145,7 +1183,6 @@
|
| const int mb_xy= h->mb_xy;
|
| int top_xy, left_xy[2];
|
| int top_type, left_type[2];
|
| - int i;
|
|
|
| top_xy = mb_xy - (s->mb_stride << MB_FIELD);
|
|
|
| @@ -1191,22 +1228,27 @@
|
| }
|
| }
|
|
|
| + top_type = s->current_picture.mb_type[top_xy] ;
|
| + left_type[0] = s->current_picture.mb_type[left_xy[0]];
|
| + left_type[1] = s->current_picture.mb_type[left_xy[1]];
|
| if(h->deblocking_filter == 2){
|
| - h->top_type = top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
|
| - h->left_type[0]= left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
|
| - h->left_type[1]= left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
|
| + if(h->slice_table[top_xy ] != h->slice_num) top_type= 0;
|
| + if(h->slice_table[left_xy[0] ] != h->slice_num) left_type[0]= left_type[1]= 0;
|
| }else{
|
| - h->top_type = top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0;
|
| - h->left_type[0]= left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
|
| - h->left_type[1]= left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
|
| + if(h->slice_table[top_xy ] == 0xFFFF) top_type= 0;
|
| + if(h->slice_table[left_xy[0] ] == 0xFFFF) left_type[0]= left_type[1] =0;
|
| }
|
| + h->top_type = top_type ;
|
| + h->left_type[0]= left_type[0];
|
| + h->left_type[1]= left_type[1];
|
| +
|
| if(IS_INTRA(mb_type))
|
| return 0;
|
|
|
| AV_COPY64(&h->non_zero_count_cache[0+8*1], &h->non_zero_count[mb_xy][ 0]);
|
| AV_COPY64(&h->non_zero_count_cache[0+8*2], &h->non_zero_count[mb_xy][ 8]);
|
| - *((uint32_t*)&h->non_zero_count_cache[0+8*5])= *((uint32_t*)&h->non_zero_count[mb_xy][16]);
|
| - *((uint32_t*)&h->non_zero_count_cache[4+8*3])= *((uint32_t*)&h->non_zero_count[mb_xy][20]);
|
| + AV_COPY32(&h->non_zero_count_cache[0+8*5], &h->non_zero_count[mb_xy][16]);
|
| + AV_COPY32(&h->non_zero_count_cache[4+8*3], &h->non_zero_count[mb_xy][20]);
|
| AV_COPY64(&h->non_zero_count_cache[0+8*4], &h->non_zero_count[mb_xy][24]);
|
|
|
| h->cbp= h->cbp_table[mb_xy];
|
| @@ -1221,21 +1263,21 @@
|
|
|
| if(!USES_LIST(mb_type, list)){
|
| fill_rectangle( h->mv_cache[list][scan8[0]], 4, 4, 8, pack16to32(0,0), 4);
|
| - *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
|
| - *(uint32_t*)&h->ref_cache[list][scan8[ 2]] =
|
| - *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
|
| - *(uint32_t*)&h->ref_cache[list][scan8[10]] = ((LIST_NOT_USED)&0xFF)*0x01010101;
|
| + AV_WN32A(&h->ref_cache[list][scan8[ 0]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
|
| + AV_WN32A(&h->ref_cache[list][scan8[ 2]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
|
| + AV_WN32A(&h->ref_cache[list][scan8[ 8]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
|
| + AV_WN32A(&h->ref_cache[list][scan8[10]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
|
| continue;
|
| }
|
|
|
| - ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
|
| + ref = &s->current_picture.ref_index[list][4*mb_xy];
|
| {
|
| int (*ref2frm)[64] = h->ref2frm[ h->slice_num&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
|
| - *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
|
| - *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101;
|
| - ref += h->b8_stride;
|
| - *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
|
| - *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101;
|
| + AV_WN32A(&h->ref_cache[list][scan8[ 0]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
|
| + AV_WN32A(&h->ref_cache[list][scan8[ 2]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
|
| + ref += 2;
|
| + AV_WN32A(&h->ref_cache[list][scan8[ 8]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
|
| + AV_WN32A(&h->ref_cache[list][scan8[10]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
|
| }
|
|
|
| b_stride = h->b_stride;
|
| @@ -1259,7 +1301,7 @@
|
| */
|
| //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
|
| if(top_type){
|
| - *(uint32_t*)&h->non_zero_count_cache[4+8*0]= *(uint32_t*)&h->non_zero_count[top_xy][4+3*8];
|
| + AV_COPY32(&h->non_zero_count_cache[4+8*0], &h->non_zero_count[top_xy][4+3*8]);
|
| }
|
|
|
| if(left_type[0]){
|
| @@ -1306,7 +1348,7 @@
|
| for(list=0; list<h->list_count; list++){
|
| if(USES_LIST(top_type, list)){
|
| const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
|
| - const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
|
| + const int b8_xy= 4*top_xy + 2;
|
| int (*ref2frm)[64] = h->ref2frm[ h->slice_table[top_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
|
| AV_COPY128(h->mv_cache[list][scan8[0] + 0 - 1*8], s->current_picture.motion_val[list][b_xy + 0]);
|
| h->ref_cache[list][scan8[0] + 0 - 1*8]=
|
| @@ -1315,27 +1357,27 @@
|
| h->ref_cache[list][scan8[0] + 3 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 1]];
|
| }else{
|
| AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]);
|
| - *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((LIST_NOT_USED)&0xFF)*0x01010101;
|
| + AV_WN32A(&h->ref_cache[list][scan8[0] + 0 - 1*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
|
| }
|
|
|
| if(!IS_INTERLACED(mb_type^left_type[0])){
|
| if(USES_LIST(left_type[0], list)){
|
| const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
|
| - const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1;
|
| + const int b8_xy= 4*left_xy[0] + 1;
|
| int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[0]]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
|
| - *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0 ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*0];
|
| - *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 8 ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*1];
|
| - *(uint32_t*)h->mv_cache[list][scan8[0] - 1 +16 ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*2];
|
| - *(uint32_t*)h->mv_cache[list][scan8[0] - 1 +24 ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*3];
|
| + AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 0 ], s->current_picture.motion_val[list][b_xy + h->b_stride*0]);
|
| + AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 8 ], s->current_picture.motion_val[list][b_xy + h->b_stride*1]);
|
| + AV_COPY32(h->mv_cache[list][scan8[0] - 1 +16 ], s->current_picture.motion_val[list][b_xy + h->b_stride*2]);
|
| + AV_COPY32(h->mv_cache[list][scan8[0] - 1 +24 ], s->current_picture.motion_val[list][b_xy + h->b_stride*3]);
|
| h->ref_cache[list][scan8[0] - 1 + 0 ]=
|
| - h->ref_cache[list][scan8[0] - 1 + 8 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + h->b8_stride*0]];
|
| + h->ref_cache[list][scan8[0] - 1 + 8 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 2*0]];
|
| h->ref_cache[list][scan8[0] - 1 +16 ]=
|
| - h->ref_cache[list][scan8[0] - 1 +24 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + h->b8_stride*1]];
|
| + h->ref_cache[list][scan8[0] - 1 +24 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 2*1]];
|
| }else{
|
| - *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 0 ]=
|
| - *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 8 ]=
|
| - *(uint32_t*)h->mv_cache [list][scan8[0] - 1 +16 ]=
|
| - *(uint32_t*)h->mv_cache [list][scan8[0] - 1 +24 ]= 0;
|
| + AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 0 ]);
|
| + AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 8 ]);
|
| + AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +16 ]);
|
| + AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +24 ]);
|
| h->ref_cache[list][scan8[0] - 1 + 0 ]=
|
| h->ref_cache[list][scan8[0] - 1 + 8 ]=
|
| h->ref_cache[list][scan8[0] - 1 + 16 ]=
|
| @@ -1368,19 +1410,19 @@
|
|
|
| AV_COPY64(&h->non_zero_count[mb_xy][ 0], &h->non_zero_count_cache[0+8*1]);
|
| AV_COPY64(&h->non_zero_count[mb_xy][ 8], &h->non_zero_count_cache[0+8*2]);
|
| - *((uint32_t*)&h->non_zero_count[mb_xy][16]) = *((uint32_t*)&h->non_zero_count_cache[0+8*5]);
|
| - *((uint32_t*)&h->non_zero_count[mb_xy][20]) = *((uint32_t*)&h->non_zero_count_cache[4+8*3]);
|
| + AV_COPY32(&h->non_zero_count[mb_xy][16], &h->non_zero_count_cache[0+8*5]);
|
| + AV_COPY32(&h->non_zero_count[mb_xy][20], &h->non_zero_count_cache[4+8*3]);
|
| AV_COPY64(&h->non_zero_count[mb_xy][24], &h->non_zero_count_cache[0+8*4]);
|
| }
|
|
|
| static inline void write_back_motion(H264Context *h, int mb_type){
|
| MpegEncContext * const s = &h->s;
|
| - const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
|
| - const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
|
| + const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride; //try mb2b(8)_xy
|
| + const int b8_xy= 4*h->mb_xy;
|
| int list;
|
|
|
| if(!USES_LIST(mb_type, 0))
|
| - fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
|
| + fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, 2, (uint8_t)LIST_NOT_USED, 1);
|
|
|
| for(list=0; list<h->list_count; list++){
|
| int y, b_stride;
|
| @@ -1397,53 +1439,44 @@
|
| AV_COPY128(mv_dst + y*b_stride, mv_src + 8*y);
|
| }
|
| if( CABAC ) {
|
| - int16_t (*mvd_dst)[2] = &h->mvd_table[list][b_xy];
|
| - int16_t (*mvd_src)[2] = &h->mvd_cache[list][scan8[0]];
|
| + uint8_t (*mvd_dst)[2] = &h->mvd_table[list][FMO ? 8*h->mb_xy : h->mb2br_xy[h->mb_xy]];
|
| + uint8_t (*mvd_src)[2] = &h->mvd_cache[list][scan8[0]];
|
| if(IS_SKIP(mb_type))
|
| - fill_rectangle(mvd_dst, 4, 4, h->b_stride, 0, 4);
|
| - else
|
| - for(y=0; y<4; y++){
|
| - AV_COPY128(mvd_dst + y*b_stride, mvd_src + 8*y);
|
| + AV_ZERO128(mvd_dst);
|
| + else{
|
| + AV_COPY64(mvd_dst, mvd_src + 8*3);
|
| + AV_COPY16(mvd_dst + 3 + 3, mvd_src + 3 + 8*0);
|
| + AV_COPY16(mvd_dst + 3 + 2, mvd_src + 3 + 8*1);
|
| + AV_COPY16(mvd_dst + 3 + 1, mvd_src + 3 + 8*2);
|
| }
|
| }
|
|
|
| {
|
| int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
|
| - ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
|
| - ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
|
| - ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
|
| - ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
|
| + ref_index[0+0*2]= h->ref_cache[list][scan8[0]];
|
| + ref_index[1+0*2]= h->ref_cache[list][scan8[4]];
|
| + ref_index[0+1*2]= h->ref_cache[list][scan8[8]];
|
| + ref_index[1+1*2]= h->ref_cache[list][scan8[12]];
|
| }
|
| }
|
|
|
| if(h->slice_type_nos == FF_B_TYPE && CABAC){
|
| if(IS_8X8(mb_type)){
|
| - uint8_t *direct_table = &h->direct_table[b8_xy];
|
| - direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
|
| - direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
|
| - direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
|
| + uint8_t *direct_table = &h->direct_table[4*h->mb_xy];
|
| + direct_table[1] = h->sub_mb_type[1]>>1;
|
| + direct_table[2] = h->sub_mb_type[2]>>1;
|
| + direct_table[3] = h->sub_mb_type[3]>>1;
|
| }
|
| }
|
| }
|
|
|
| static inline int get_dct8x8_allowed(H264Context *h){
|
| if(h->sps.direct_8x8_inference_flag)
|
| - return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8 )*0x0001000100010001ULL));
|
| + return !(AV_RN64A(h->sub_mb_type) & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8 )*0x0001000100010001ULL));
|
| else
|
| - return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL));
|
| + return !(AV_RN64A(h->sub_mb_type) & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL));
|
| }
|
|
|
| -static void predict_field_decoding_flag(H264Context *h){
|
| - MpegEncContext * const s = &h->s;
|
| - const int mb_xy= h->mb_xy;
|
| - int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
|
| - ? s->current_picture.mb_type[mb_xy-1]
|
| - : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
|
| - ? s->current_picture.mb_type[mb_xy-s->mb_stride]
|
| - : 0;
|
| - h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
|
| -}
|
| -
|
| /**
|
| * decodes a P_SKIP or B_SKIP macroblock
|
| */
|
| @@ -1461,9 +1494,11 @@
|
| if( h->slice_type_nos == FF_B_TYPE )
|
| {
|
| // just for fill_caches. pred_direct_motion will set the real mb_type
|
| - mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
|
| -
|
| + mb_type|= MB_TYPE_L0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
|
| + if(h->direct_spatial_mv_pred){
|
| + fill_decode_neighbors(h, mb_type);
|
| fill_decode_caches(h, mb_type); //FIXME check what is needed and what not ...
|
| + }
|
| ff_h264_pred_direct_motion(h, &mb_type);
|
| mb_type|= MB_TYPE_SKIP;
|
| }
|
| @@ -1472,6 +1507,7 @@
|
| int mx, my;
|
| mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
|
|
|
| + fill_decode_neighbors(h, mb_type);
|
| fill_decode_caches(h, mb_type); //FIXME check what is needed and what not ...
|
| pred_pskip_motion(h, &mx, &my);
|
| fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
|
|
|