Index: patched-ffmpeg-mt/libavcodec/h264.h |
=================================================================== |
--- patched-ffmpeg-mt/libavcodec/h264.h (revision 41250) |
+++ patched-ffmpeg-mt/libavcodec/h264.h (working copy) |
@@ -28,6 +28,7 @@ |
#ifndef AVCODEC_H264_H |
#define AVCODEC_H264_H |
+#include "libavutil/intreadwrite.h" |
#include "dsputil.h" |
#include "cabac.h" |
#include "mpegvideo.h" |
@@ -60,6 +61,8 @@ |
#define ALLOW_NOCHROMA |
+#define FMO 0 |
+ |
/** |
* The maximum number of slices supported by the decoder. |
* must be a power of 2 |
@@ -259,18 +262,6 @@ |
*/ |
typedef struct H264Context{ |
MpegEncContext s; |
- int nal_ref_idc; |
- int nal_unit_type; |
- uint8_t *rbsp_buffer[2]; |
- unsigned int rbsp_buffer_size[2]; |
- |
- /** |
- * Used to parse AVC variant of h264 |
- */ |
- int is_avc; ///< this flag is != 0 if codec is avc1 |
- int got_avcC; ///< flag used to parse avcC data only once |
- int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4) |
- |
int chroma_qp[2]; //QPc |
int qp_thresh; ///< QP threshold to skip loopfilter |
@@ -282,27 +273,33 @@ |
int chroma_pred_mode; |
int intra16x16_pred_mode; |
+ int topleft_mb_xy; |
int top_mb_xy; |
+ int topright_mb_xy; |
int left_mb_xy[2]; |
+ int topleft_type; |
int top_type; |
+ int topright_type; |
int left_type[2]; |
+ const uint8_t * left_block; |
+ int topleft_partition; |
+ |
int8_t intra4x4_pred_mode_cache[5*8]; |
- int8_t (*intra4x4_pred_mode)[8]; |
+ int8_t (*intra4x4_pred_mode); |
H264PredContext hpc; |
unsigned int topleft_samples_available; |
unsigned int top_samples_available; |
unsigned int topright_samples_available; |
unsigned int left_samples_available; |
uint8_t (*top_borders[2])[16+2*8]; |
- uint8_t left_border[2*(17+2*9)]; |
/** |
* non zero coeff count cache. |
* is 64 if not available. |
*/ |
- DECLARE_ALIGNED_8(uint8_t, non_zero_count_cache)[6*8]; |
+ DECLARE_ALIGNED(8, uint8_t, non_zero_count_cache)[6*8]; |
/* |
.UU.YYYY |
@@ -315,8 +312,8 @@ |
/** |
* Motion vector cache. |
*/ |
- DECLARE_ALIGNED_16(int16_t, mv_cache)[2][5*8][2]; |
- DECLARE_ALIGNED_8(int8_t, ref_cache)[2][5*8]; |
+ DECLARE_ALIGNED(16, int16_t, mv_cache)[2][5*8][2]; |
+ DECLARE_ALIGNED(8, int8_t, ref_cache)[2][5*8]; |
#define LIST_NOT_USED -1 //FIXME rename? |
#define PART_NOT_AVAILABLE -2 |
@@ -337,9 +334,8 @@ |
int block_offset[2*(16+8)]; |
uint32_t *mb2b_xy; //FIXME are these 4 a good idea? |
- uint32_t *mb2b8_xy; |
+ uint32_t *mb2br_xy; |
int b_stride; //FIXME use s->b4_stride |
- int b8_stride; |
int mb_linesize; ///< may be equal to s->linesize or s->linesize*2, for mbaff |
int mb_uvlinesize; |
@@ -347,29 +343,19 @@ |
int emu_edge_width; |
int emu_edge_height; |
- int halfpel_flag; |
- int thirdpel_flag; |
- |
- int unknown_svq3_flag; |
- int next_slice_index; |
- |
- SPS *sps_buffers[MAX_SPS_COUNT]; |
SPS sps; ///< current sps |
- PPS *pps_buffers[MAX_PPS_COUNT]; |
/** |
* current pps |
*/ |
PPS pps; //FIXME move to Picture perhaps? (->no) do we need that? |
- uint32_t dequant4_buffer[6][52][16]; |
+ uint32_t dequant4_buffer[6][52][16]; //FIXME should these be moved down? |
uint32_t dequant8_buffer[2][52][64]; |
uint32_t (*dequant4_coeff[6])[16]; |
uint32_t (*dequant8_coeff[2])[64]; |
- int dequant_coeff_pps; ///< reinit tables when pps changes |
int slice_num; |
- uint16_t *slice_table_base; |
uint16_t *slice_table; ///< slice_table_base + 2*mb_stride + 1 |
int slice_type; |
int slice_type_nos; ///< S free slice type (SI/SP are remapped to I/P) |
@@ -380,49 +366,21 @@ |
int mb_field_decoding_flag; |
int mb_mbaff; ///< mb_aff_frame && mb_field_decoding_flag |
- DECLARE_ALIGNED_8(uint16_t, sub_mb_type)[4]; |
+ DECLARE_ALIGNED(8, uint16_t, sub_mb_type)[4]; |
- //POC stuff |
- int poc_lsb; |
- int poc_msb; |
- int delta_poc_bottom; |
- int delta_poc[2]; |
- int frame_num; |
- int prev_poc_msb; ///< poc_msb of the last reference pic for POC type 0 |
- int prev_poc_lsb; ///< poc_lsb of the last reference pic for POC type 0 |
- int frame_num_offset; ///< for POC type 2 |
- int prev_frame_num_offset; ///< for POC type 2 |
- int prev_frame_num; ///< frame_num of the last pic for POC type 1/2 |
- |
- /** |
- * frame_num for frames or 2*frame_num+1 for field pics. |
- */ |
- int curr_pic_num; |
- |
- /** |
- * max_frame_num or 2*max_frame_num for field pics. |
- */ |
- int max_pic_num; |
- |
//Weighted pred stuff |
int use_weight; |
int use_weight_chroma; |
int luma_log2_weight_denom; |
int chroma_log2_weight_denom; |
- int luma_weight[2][48]; |
- int luma_offset[2][48]; |
- int chroma_weight[2][48][2]; |
- int chroma_offset[2][48][2]; |
+ //The following 2 can be changed to int8_t but that causes 10cpu cycles speedloss |
+ int luma_weight[48][2][2]; |
+ int chroma_weight[48][2][2][2]; |
int implicit_weight[48][48]; |
- //deblock |
- int deblocking_filter; ///< disable_deblocking_filter_idc with 1<->0 |
- int slice_alpha_c0_offset; |
- int slice_beta_offset; |
- |
- int redundant_pic_count; |
- |
int direct_spatial_mv_pred; |
+ int col_parity; |
+ int col_fieldoff; |
int dist_scale_factor[16]; |
int dist_scale_factor_field[2][32]; |
int map_col_to_list0[2][16+32]; |
@@ -434,34 +392,18 @@ |
uint8_t *list_counts; ///< Array of list_count per MB specifying the slice type |
unsigned int ref_count[2]; ///< counts frames or fields, depending on current mb mode |
unsigned int list_count; |
- Picture *short_ref[32]; |
- Picture *long_ref[32]; |
- Picture default_ref_list[2][32]; ///< base reference list for all slices of a coded picture |
Picture ref_list[2][48]; /**< 0..15: frame refs, 16..47: mbaff field refs. |
Reordered version of default_ref_list |
according to picture reordering in slice header */ |
int ref2frm[MAX_SLICES][2][64]; ///< reference to frame number lists, used in the loop filter, the first 2 are for -2,-1 |
- Picture *delayed_pic[MAX_DELAYED_PIC_COUNT+2]; //FIXME size? |
- Picture *next_output_pic; |
- int outputed_poc; |
- int next_outputed_poc; |
- /** |
- * memory management control operations buffer. |
- */ |
- MMCO mmco[MAX_MMCO_COUNT]; |
- int mmco_index; |
- |
- int long_ref_count; ///< number of actual long term references |
- int short_ref_count; ///< number of actual short term references |
- |
//data partitioning |
GetBitContext intra_gb; |
GetBitContext inter_gb; |
GetBitContext *intra_gb_ptr; |
GetBitContext *inter_gb_ptr; |
- DECLARE_ALIGNED_16(DCTELEM, mb)[16*24]; |
+ DECLARE_ALIGNED(16, DCTELEM, mb)[16*24]; |
DCTELEM mb_padding[256]; ///< as mb is addressed by scantable[i] and scantable is uint8_t we can either check that i is not too large or ensure that there is some unused stuff after mb |
/** |
@@ -469,7 +411,6 @@ |
*/ |
CABACContext cabac; |
uint8_t cabac_state[460]; |
- int cabac_init_idc; |
/* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */ |
uint16_t *cbp_table; |
@@ -479,8 +420,8 @@ |
/* chroma_pred_mode for i4x4 or i16x16, else 0 */ |
uint8_t *chroma_pred_mode_table; |
int last_qscale_diff; |
- int16_t (*mvd_table[2])[2]; |
- DECLARE_ALIGNED_16(int16_t, mvd_cache)[2][5*8][2]; |
+ uint8_t (*mvd_table[2])[2]; |
+ DECLARE_ALIGNED(16, uint8_t, mvd_cache)[2][5*8][2]; |
uint8_t *direct_table; |
uint8_t direct_cache[5*8]; |
@@ -499,7 +440,81 @@ |
int x264_build; |
+ int mb_xy; |
+ |
+ int is_complex; |
+ |
+ //deblock |
+ int deblocking_filter; ///< disable_deblocking_filter_idc with 1<->0 |
+ int slice_alpha_c0_offset; |
+ int slice_beta_offset; |
+ |
+//============================================================= |
+ //Things below are not used in the MB or more inner code |
+ |
+ int nal_ref_idc; |
+ int nal_unit_type; |
+ uint8_t *rbsp_buffer[2]; |
+ unsigned int rbsp_buffer_size[2]; |
+ |
/** |
+ * Used to parse AVC variant of h264 |
+ */ |
+ int is_avc; ///< this flag is != 0 if codec is avc1 |
+ int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4) |
+ |
+ SPS *sps_buffers[MAX_SPS_COUNT]; |
+ PPS *pps_buffers[MAX_PPS_COUNT]; |
+ |
+ int dequant_coeff_pps; ///< reinit tables when pps changes |
+ |
+ uint16_t *slice_table_base; |
+ |
+ |
+ //POC stuff |
+ int poc_lsb; |
+ int poc_msb; |
+ int delta_poc_bottom; |
+ int delta_poc[2]; |
+ int frame_num; |
+ int prev_poc_msb; ///< poc_msb of the last reference pic for POC type 0 |
+ int prev_poc_lsb; ///< poc_lsb of the last reference pic for POC type 0 |
+ int frame_num_offset; ///< for POC type 2 |
+ int prev_frame_num_offset; ///< for POC type 2 |
+ int prev_frame_num; ///< frame_num of the last pic for POC type 1/2 |
+ |
+ /** |
+ * frame_num for frames or 2*frame_num+1 for field pics. |
+ */ |
+ int curr_pic_num; |
+ |
+ /** |
+ * max_frame_num or 2*max_frame_num for field pics. |
+ */ |
+ int max_pic_num; |
+ |
+ int redundant_pic_count; |
+ |
+ Picture *short_ref[32]; |
+ Picture *long_ref[32]; |
+ Picture default_ref_list[2][32]; ///< base reference list for all slices of a coded picture |
+ Picture *delayed_pic[MAX_DELAYED_PIC_COUNT+2]; //FIXME size? |
+ Picture *next_output_pic; |
+ int outputed_poc; |
+ int next_outputed_poc; |
+ |
+ /** |
+ * memory management control operations buffer. |
+ */ |
+ MMCO mmco[MAX_MMCO_COUNT]; |
+ int mmco_index; |
+ |
+ int long_ref_count; ///< number of actual long term references |
+ int short_ref_count; ///< number of actual short term references |
+ |
+ int cabac_init_idc; |
+ |
+ /** |
* @defgroup multithreading Members for slice based multithreading |
* @{ |
*/ |
@@ -527,10 +542,6 @@ |
int last_slice_type; |
/** @} */ |
- int mb_xy; |
- |
- uint32_t svq3_watermark_key; |
- |
/** |
* pic_struct in picture timing SEI message |
*/ |
@@ -570,14 +581,19 @@ |
*/ |
int sei_recovery_frame_cnt; |
- int is_complex; |
- |
int luma_weight_flag[2]; ///< 7.4.3.2 luma_weight_lX_flag |
int chroma_weight_flag[2]; ///< 7.4.3.2 chroma_weight_lX_flag |
// Timestamp stuff |
int sei_buffering_period_present; ///< Buffering period SEI flag |
int initial_cpb_removal_delay[32]; ///< Initial timestamps for CPBs |
+ |
+ //SVQ3 specific fields |
+ int halfpel_flag; |
+ int thirdpel_flag; |
+ int unknown_svq3_flag; |
+ int next_slice_index; |
+ uint32_t svq3_watermark_key; |
}H264Context; |
@@ -736,6 +752,14 @@ |
#endif |
} |
+static av_always_inline uint16_t pack8to16(int a, int b){ |
+#if HAVE_BIGENDIAN |
+ return (b&0xFF) + (a<<8); |
+#else |
+ return (a&0xFF) + (b<<8); |
+#endif |
+} |
+ |
/** |
* gets the chroma qp. |
*/ |
@@ -745,14 +769,10 @@ |
static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my); |
-static void fill_decode_caches(H264Context *h, int mb_type){ |
+static void fill_decode_neighbors(H264Context *h, int mb_type){ |
MpegEncContext * const s = &h->s; |
const int mb_xy= h->mb_xy; |
int topleft_xy, top_xy, topright_xy, left_xy[2]; |
- int topleft_type, top_type, topright_type, left_type[2]; |
- const uint8_t * left_block; |
- int topleft_partition= -1; |
- int i; |
static const uint8_t left_block_options[4][16]={ |
{0,1,2,3,7,10,8,11,7+0*8, 7+1*8, 7+2*8, 7+3*8, 2+0*8, 2+3*8, 2+1*8, 2+2*8}, |
{2,2,3,3,8,11,8,11,7+2*8, 7+2*8, 7+3*8, 7+3*8, 2+1*8, 2+2*8, 2+1*8, 2+2*8}, |
@@ -760,6 +780,8 @@ |
{0,2,0,2,7,10,7,10,7+0*8, 7+2*8, 7+0*8, 7+2*8, 2+0*8, 2+3*8, 2+0*8, 2+3*8} |
}; |
+ h->topleft_partition= -1; |
+ |
top_xy = mb_xy - (s->mb_stride << MB_FIELD); |
/* Wow, what a mess, why didn't they simplify the interlacing & intra |
@@ -768,7 +790,7 @@ |
topleft_xy = top_xy - 1; |
topright_xy= top_xy + 1; |
left_xy[1] = left_xy[0] = mb_xy-1; |
- left_block = left_block_options[0]; |
+ h->left_block = left_block_options[0]; |
if(FRAME_MBAFF){ |
const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]); |
const int curr_mb_field_flag = IS_INTERLACED(mb_type); |
@@ -777,12 +799,12 @@ |
left_xy[1] = left_xy[0] = mb_xy - s->mb_stride - 1; |
if (curr_mb_field_flag) { |
left_xy[1] += s->mb_stride; |
- left_block = left_block_options[3]; |
+ h->left_block = left_block_options[3]; |
} else { |
topleft_xy += s->mb_stride; |
// take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition |
- topleft_partition = 0; |
- left_block = left_block_options[1]; |
+ h->topleft_partition = 0; |
+ h->left_block = left_block_options[1]; |
} |
} |
}else{ |
@@ -792,26 +814,62 @@ |
top_xy += s->mb_stride & (((s->current_picture.mb_type[top_xy ]>>7)&1)-1); |
} |
if (left_mb_field_flag != curr_mb_field_flag) { |
- left_xy[1] = left_xy[0] = mb_xy - 1; |
if (curr_mb_field_flag) { |
left_xy[1] += s->mb_stride; |
- left_block = left_block_options[3]; |
+ h->left_block = left_block_options[3]; |
} else { |
- left_block = left_block_options[2]; |
+ h->left_block = left_block_options[2]; |
} |
} |
} |
} |
- h->top_mb_xy = top_xy; |
+ h->topleft_mb_xy = topleft_xy; |
+ h->top_mb_xy = top_xy; |
+ h->topright_mb_xy= topright_xy; |
h->left_mb_xy[0] = left_xy[0]; |
h->left_mb_xy[1] = left_xy[1]; |
- topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0; |
- top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0; |
- topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0; |
- left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0; |
- left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0; |
+ //FIXME do we need all in the context? |
+ h->topleft_type = s->current_picture.mb_type[topleft_xy] ; |
+ h->top_type = s->current_picture.mb_type[top_xy] ; |
+ h->topright_type= s->current_picture.mb_type[topright_xy]; |
+ h->left_type[0] = s->current_picture.mb_type[left_xy[0]] ; |
+ h->left_type[1] = s->current_picture.mb_type[left_xy[1]] ; |
+ |
+ if(FMO){ |
+ if(h->slice_table[topleft_xy ] != h->slice_num) h->topleft_type = 0; |
+ if(h->slice_table[top_xy ] != h->slice_num) h->top_type = 0; |
+ if(h->slice_table[left_xy[0] ] != h->slice_num) h->left_type[0] = h->left_type[1] = 0; |
+ }else{ |
+ if(h->slice_table[topleft_xy ] != h->slice_num){ |
+ h->topleft_type = 0; |
+ if(h->slice_table[top_xy ] != h->slice_num) h->top_type = 0; |
+ if(h->slice_table[left_xy[0] ] != h->slice_num) h->left_type[0] = h->left_type[1] = 0; |
+ } |
+ } |
+ if(h->slice_table[topright_xy] != h->slice_num) h->topright_type= 0; |
+} |
+ |
+static void fill_decode_caches(H264Context *h, int mb_type){ |
+ MpegEncContext * const s = &h->s; |
+ int topleft_xy, top_xy, topright_xy, left_xy[2]; |
+ int topleft_type, top_type, topright_type, left_type[2]; |
+ const uint8_t * left_block= h->left_block; |
+ int i; |
+ |
+ topleft_xy = h->topleft_mb_xy ; |
+ top_xy = h->top_mb_xy ; |
+ topright_xy = h->topright_mb_xy; |
+ left_xy[0] = h->left_mb_xy[0] ; |
+ left_xy[1] = h->left_mb_xy[1] ; |
+ topleft_type = h->topleft_type ; |
+ top_type = h->top_type ; |
+ topright_type= h->topright_type ; |
+ left_type[0] = h->left_type[0] ; |
+ left_type[1] = h->left_type[1] ; |
+ |
+ if(!IS_SKIP(mb_type)){ |
if(IS_INTRA(mb_type)){ |
int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1; |
h->topleft_samples_available= |
@@ -835,8 +893,8 @@ |
h->left_samples_available&= 0xFF5F; |
} |
}else{ |
- int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num |
- ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0; |
+ int left_typei = s->current_picture.mb_type[left_xy[0] + s->mb_stride]; |
+ |
assert(left_xy[0] == left_xy[1]); |
if(!((left_typei & type_mask) && (left_type[0] & type_mask))){ |
h->topleft_samples_available&= 0xDF5F; |
@@ -858,35 +916,21 @@ |
if(IS_INTRA4x4(mb_type)){ |
if(IS_INTRA4x4(top_type)){ |
- h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4]; |
- h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5]; |
- h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6]; |
- h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3]; |
+ AV_COPY32(h->intra4x4_pred_mode_cache+4+8*0, h->intra4x4_pred_mode + h->mb2br_xy[top_xy]); |
}else{ |
- int pred; |
- if(!(top_type & type_mask)) |
- pred= -1; |
- else{ |
- pred= 2; |
- } |
h->intra4x4_pred_mode_cache[4+8*0]= |
h->intra4x4_pred_mode_cache[5+8*0]= |
h->intra4x4_pred_mode_cache[6+8*0]= |
- h->intra4x4_pred_mode_cache[7+8*0]= pred; |
+ h->intra4x4_pred_mode_cache[7+8*0]= 2 - 3*!(top_type & type_mask); |
} |
for(i=0; i<2; i++){ |
if(IS_INTRA4x4(left_type[i])){ |
- h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]]; |
- h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]]; |
+ int8_t *mode= h->intra4x4_pred_mode + h->mb2br_xy[left_xy[i]]; |
+ h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= mode[6-left_block[0+2*i]]; |
+ h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= mode[6-left_block[1+2*i]]; |
}else{ |
- int pred; |
- if(!(left_type[i] & type_mask)) |
- pred= -1; |
- else{ |
- pred= 2; |
- } |
h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= |
- h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred; |
+ h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= 2 - 3*!(left_type[i] & type_mask); |
} |
} |
} |
@@ -903,7 +947,7 @@ |
*/ |
//FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec) |
if(top_type){ |
- *(uint32_t*)&h->non_zero_count_cache[4+8*0]= *(uint32_t*)&h->non_zero_count[top_xy][4+3*8]; |
+ AV_COPY32(&h->non_zero_count_cache[4+8*0], &h->non_zero_count[top_xy][4+3*8]); |
h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][1+1*8]; |
h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][2+1*8]; |
@@ -915,7 +959,7 @@ |
h->non_zero_count_cache[1+8*3]= |
h->non_zero_count_cache[2+8*3]= |
- *(uint32_t*)&h->non_zero_count_cache[4+8*0]= CABAC && !IS_INTRA(mb_type) ? 0 : 0x40404040; |
+ AV_WN32A(&h->non_zero_count_cache[4+8*0], CABAC && !IS_INTRA(mb_type) ? 0 : 0x40404040); |
} |
for (i=0; i<2; i++) { |
@@ -936,32 +980,25 @@ |
// top_cbp |
if(top_type) { |
h->top_cbp = h->cbp_table[top_xy]; |
- } else if(IS_INTRA(mb_type)) { |
- h->top_cbp = 0x1C0; |
} else { |
- h->top_cbp = 0; |
+ h->top_cbp = IS_INTRA(mb_type) ? 0x1CF : 0x00F; |
} |
// left_cbp |
if (left_type[0]) { |
- h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0; |
- } else if(IS_INTRA(mb_type)) { |
- h->left_cbp = 0x1C0; |
+ h->left_cbp = (h->cbp_table[left_xy[0]] & 0x1f0) |
+ | ((h->cbp_table[left_xy[0]]>>(left_block[0]&(~1)))&2) |
+ | (((h->cbp_table[left_xy[1]]>>(left_block[2]&(~1)))&2) << 2); |
} else { |
- h->left_cbp = 0; |
+ h->left_cbp = IS_INTRA(mb_type) ? 0x1CF : 0x00F; |
} |
- if (left_type[0]) { |
- h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1; |
- } |
- if (left_type[1]) { |
- h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3; |
- } |
} |
+ } |
#if 1 |
- if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){ |
+ if(IS_INTER(mb_type) || (IS_DIRECT(mb_type) && h->direct_spatial_mv_pred)){ |
int list; |
for(list=0; list<h->list_count; list++){ |
- if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type)){ |
+ if(!USES_LIST(mb_type, list)){ |
/*if(!h->mv_cache_clean[list]){ |
memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all? |
memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t)); |
@@ -969,134 +1006,135 @@ |
}*/ |
continue; |
} |
+ assert(!(IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)); |
+ |
h->mv_cache_clean[list]= 0; |
if(USES_LIST(top_type, list)){ |
const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride; |
- const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride; |
AV_COPY128(h->mv_cache[list][scan8[0] + 0 - 1*8], s->current_picture.motion_val[list][b_xy + 0]); |
h->ref_cache[list][scan8[0] + 0 - 1*8]= |
- h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0]; |
+ h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][4*top_xy + 2]; |
h->ref_cache[list][scan8[0] + 2 - 1*8]= |
- h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1]; |
+ h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][4*top_xy + 3]; |
}else{ |
AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]); |
- *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101; |
+ AV_WN32A(&h->ref_cache[list][scan8[0] + 0 - 1*8], ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101); |
} |
+ if(mb_type & (MB_TYPE_16x8|MB_TYPE_8x8)){ |
for(i=0; i<2; i++){ |
int cache_idx = scan8[0] - 1 + i*2*8; |
if(USES_LIST(left_type[i], list)){ |
const int b_xy= h->mb2b_xy[left_xy[i]] + 3; |
- const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1; |
- *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]]; |
- *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]]; |
- h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)]; |
- h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)]; |
+ const int b8_xy= 4*left_xy[i] + 1; |
+ AV_COPY32(h->mv_cache[list][cache_idx ], s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]]); |
+ AV_COPY32(h->mv_cache[list][cache_idx+8], s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]]); |
+ h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + (left_block[0+i*2]&~1)]; |
+ h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + (left_block[1+i*2]&~1)]; |
}else{ |
- *(uint32_t*)h->mv_cache [list][cache_idx ]= |
- *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0; |
+ AV_ZERO32(h->mv_cache [list][cache_idx ]); |
+ AV_ZERO32(h->mv_cache [list][cache_idx+8]); |
h->ref_cache[list][cache_idx ]= |
h->ref_cache[list][cache_idx+8]= (left_type[i]) ? LIST_NOT_USED : PART_NOT_AVAILABLE; |
} |
} |
- |
- if((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF) |
- continue; |
- |
- if(USES_LIST(topleft_type, list)){ |
- const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride); |
- const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride); |
- *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy]; |
- h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy]; |
}else{ |
- *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0; |
- h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE; |
+ if(USES_LIST(left_type[0], list)){ |
+ const int b_xy= h->mb2b_xy[left_xy[0]] + 3; |
+ const int b8_xy= 4*left_xy[0] + 1; |
+ AV_COPY32(h->mv_cache[list][scan8[0] - 1], s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0]]); |
+ h->ref_cache[list][scan8[0] - 1]= s->current_picture.ref_index[list][b8_xy + (left_block[0]&~1)]; |
+ }else{ |
+ AV_ZERO32(h->mv_cache [list][scan8[0] - 1]); |
+ h->ref_cache[list][scan8[0] - 1]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE; |
+ } |
} |
if(USES_LIST(topright_type, list)){ |
const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride; |
- const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride; |
- *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy]; |
- h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy]; |
+ AV_COPY32(h->mv_cache[list][scan8[0] + 4 - 1*8], s->current_picture.motion_val[list][b_xy]); |
+ h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][4*topright_xy + 2]; |
}else{ |
- *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0; |
+ AV_ZERO32(h->mv_cache [list][scan8[0] + 4 - 1*8]); |
h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE; |
} |
+ if(h->ref_cache[list][scan8[0] + 4 - 1*8] < 0){ |
+ if(USES_LIST(topleft_type, list)){ |
+ const int b_xy = h->mb2b_xy [topleft_xy] + 3 + h->b_stride + (h->topleft_partition & 2*h->b_stride); |
+ const int b8_xy= 4*topleft_xy + 1 + (h->topleft_partition & 2); |
+ AV_COPY32(h->mv_cache[list][scan8[0] - 1 - 1*8], s->current_picture.motion_val[list][b_xy]); |
+ h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy]; |
+ }else{ |
+ AV_ZERO32(h->mv_cache[list][scan8[0] - 1 - 1*8]); |
+ h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE; |
+ } |
+ } |
- if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF) |
+ if((mb_type&(MB_TYPE_SKIP|MB_TYPE_DIRECT2)) && !FRAME_MBAFF) |
continue; |
- h->ref_cache[list][scan8[5 ]+1] = |
- h->ref_cache[list][scan8[7 ]+1] = |
- h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else) |
+ if(!(mb_type&(MB_TYPE_SKIP|MB_TYPE_DIRECT2))) { |
h->ref_cache[list][scan8[4 ]] = |
h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE; |
- *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]= |
- *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]= |
- *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else) |
- *(uint32_t*)h->mv_cache [list][scan8[4 ]]= |
- *(uint32_t*)h->mv_cache [list][scan8[12]]= 0; |
+ AV_ZERO32(h->mv_cache [list][scan8[4 ]]); |
+ AV_ZERO32(h->mv_cache [list][scan8[12]]); |
if( CABAC ) { |
/* XXX beurk, Load mvd */ |
if(USES_LIST(top_type, list)){ |
- const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride; |
- AV_COPY128(h->mvd_cache[list][scan8[0] + 0 - 1*8], h->mvd_table[list][b_xy + 0]); |
+ const int b_xy= h->mb2br_xy[top_xy]; |
+ AV_COPY64(h->mvd_cache[list][scan8[0] + 0 - 1*8], h->mvd_table[list][b_xy + 0]); |
}else{ |
- AV_ZERO128(h->mvd_cache[list][scan8[0] + 0 - 1*8]); |
+ AV_ZERO64(h->mvd_cache[list][scan8[0] + 0 - 1*8]); |
} |
if(USES_LIST(left_type[0], list)){ |
- const int b_xy= h->mb2b_xy[left_xy[0]] + 3; |
- *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]]; |
- *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]]; |
+ const int b_xy= h->mb2br_xy[left_xy[0]] + 6; |
+ AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 0*8], h->mvd_table[list][b_xy - left_block[0]]); |
+ AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 1*8], h->mvd_table[list][b_xy - left_block[1]]); |
}else{ |
- *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]= |
- *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0; |
+ AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 0*8]); |
+ AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 1*8]); |
} |
if(USES_LIST(left_type[1], list)){ |
- const int b_xy= h->mb2b_xy[left_xy[1]] + 3; |
- *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]]; |
- *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]]; |
+ const int b_xy= h->mb2br_xy[left_xy[1]] + 6; |
+ AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 2*8], h->mvd_table[list][b_xy - left_block[2]]); |
+ AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 3*8], h->mvd_table[list][b_xy - left_block[3]]); |
}else{ |
- *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]= |
- *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0; |
+ AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 2*8]); |
+ AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 3*8]); |
} |
- *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]= |
- *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]= |
- *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else) |
- *(uint32_t*)h->mvd_cache [list][scan8[4 ]]= |
- *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0; |
- |
+ AV_ZERO16(h->mvd_cache [list][scan8[4 ]]); |
+ AV_ZERO16(h->mvd_cache [list][scan8[12]]); |
if(h->slice_type_nos == FF_B_TYPE){ |
- fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1); |
+ fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, MB_TYPE_16x16>>1, 1); |
if(IS_DIRECT(top_type)){ |
- *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101; |
+ AV_WN32A(&h->direct_cache[scan8[0] - 1*8], 0x01010101*(MB_TYPE_DIRECT2>>1)); |
}else if(IS_8X8(top_type)){ |
- int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride; |
- h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy]; |
- h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1]; |
+ int b8_xy = 4*top_xy; |
+ h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy + 2]; |
+ h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 3]; |
}else{ |
- *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0; |
+ AV_WN32A(&h->direct_cache[scan8[0] - 1*8], 0x01010101*(MB_TYPE_16x16>>1)); |
} |
if(IS_DIRECT(left_type[0])) |
- h->direct_cache[scan8[0] - 1 + 0*8]= 1; |
+ h->direct_cache[scan8[0] - 1 + 0*8]= MB_TYPE_DIRECT2>>1; |
else if(IS_8X8(left_type[0])) |
- h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)]; |
+ h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[4*left_xy[0] + 1 + (left_block[0]&~1)]; |
else |
- h->direct_cache[scan8[0] - 1 + 0*8]= 0; |
+ h->direct_cache[scan8[0] - 1 + 0*8]= MB_TYPE_16x16>>1; |
if(IS_DIRECT(left_type[1])) |
- h->direct_cache[scan8[0] - 1 + 2*8]= 1; |
+ h->direct_cache[scan8[0] - 1 + 2*8]= MB_TYPE_DIRECT2>>1; |
else if(IS_8X8(left_type[1])) |
- h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)]; |
+ h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[4*left_xy[1] + 1 + (left_block[2]&~1)]; |
else |
- h->direct_cache[scan8[0] - 1 + 2*8]= 0; |
+ h->direct_cache[scan8[0] - 1 + 2*8]= MB_TYPE_16x16>>1; |
} |
} |
- |
+ } |
if(FRAME_MBAFF){ |
#define MAP_MVS\ |
MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\ |
@@ -1114,7 +1152,7 @@ |
if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\ |
h->ref_cache[list][idx] <<= 1;\ |
h->mv_cache[list][idx][1] /= 2;\ |
- h->mvd_cache[list][idx][1] /= 2;\ |
+ h->mvd_cache[list][idx][1] >>=1;\ |
} |
MAP_MVS |
#undef MAP_F2F |
@@ -1145,7 +1183,6 @@ |
const int mb_xy= h->mb_xy; |
int top_xy, left_xy[2]; |
int top_type, left_type[2]; |
- int i; |
top_xy = mb_xy - (s->mb_stride << MB_FIELD); |
@@ -1191,22 +1228,27 @@ |
} |
} |
+ top_type = s->current_picture.mb_type[top_xy] ; |
+ left_type[0] = s->current_picture.mb_type[left_xy[0]]; |
+ left_type[1] = s->current_picture.mb_type[left_xy[1]]; |
if(h->deblocking_filter == 2){ |
- h->top_type = top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0; |
- h->left_type[0]= left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0; |
- h->left_type[1]= left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0; |
+ if(h->slice_table[top_xy ] != h->slice_num) top_type= 0; |
+ if(h->slice_table[left_xy[0] ] != h->slice_num) left_type[0]= left_type[1]= 0; |
}else{ |
- h->top_type = top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0; |
- h->left_type[0]= left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0; |
- h->left_type[1]= left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0; |
+ if(h->slice_table[top_xy ] == 0xFFFF) top_type= 0; |
+ if(h->slice_table[left_xy[0] ] == 0xFFFF) left_type[0]= left_type[1] =0; |
} |
+ h->top_type = top_type ; |
+ h->left_type[0]= left_type[0]; |
+ h->left_type[1]= left_type[1]; |
+ |
if(IS_INTRA(mb_type)) |
return 0; |
AV_COPY64(&h->non_zero_count_cache[0+8*1], &h->non_zero_count[mb_xy][ 0]); |
AV_COPY64(&h->non_zero_count_cache[0+8*2], &h->non_zero_count[mb_xy][ 8]); |
- *((uint32_t*)&h->non_zero_count_cache[0+8*5])= *((uint32_t*)&h->non_zero_count[mb_xy][16]); |
- *((uint32_t*)&h->non_zero_count_cache[4+8*3])= *((uint32_t*)&h->non_zero_count[mb_xy][20]); |
+ AV_COPY32(&h->non_zero_count_cache[0+8*5], &h->non_zero_count[mb_xy][16]); |
+ AV_COPY32(&h->non_zero_count_cache[4+8*3], &h->non_zero_count[mb_xy][20]); |
AV_COPY64(&h->non_zero_count_cache[0+8*4], &h->non_zero_count[mb_xy][24]); |
h->cbp= h->cbp_table[mb_xy]; |
@@ -1221,21 +1263,21 @@ |
if(!USES_LIST(mb_type, list)){ |
fill_rectangle( h->mv_cache[list][scan8[0]], 4, 4, 8, pack16to32(0,0), 4); |
- *(uint32_t*)&h->ref_cache[list][scan8[ 0]] = |
- *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = |
- *(uint32_t*)&h->ref_cache[list][scan8[ 8]] = |
- *(uint32_t*)&h->ref_cache[list][scan8[10]] = ((LIST_NOT_USED)&0xFF)*0x01010101; |
+ AV_WN32A(&h->ref_cache[list][scan8[ 0]], ((LIST_NOT_USED)&0xFF)*0x01010101u); |
+ AV_WN32A(&h->ref_cache[list][scan8[ 2]], ((LIST_NOT_USED)&0xFF)*0x01010101u); |
+ AV_WN32A(&h->ref_cache[list][scan8[ 8]], ((LIST_NOT_USED)&0xFF)*0x01010101u); |
+ AV_WN32A(&h->ref_cache[list][scan8[10]], ((LIST_NOT_USED)&0xFF)*0x01010101u); |
continue; |
} |
- ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]]; |
+ ref = &s->current_picture.ref_index[list][4*mb_xy]; |
{ |
int (*ref2frm)[64] = h->ref2frm[ h->slice_num&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2); |
- *(uint32_t*)&h->ref_cache[list][scan8[ 0]] = |
- *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101; |
- ref += h->b8_stride; |
- *(uint32_t*)&h->ref_cache[list][scan8[ 8]] = |
- *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101; |
+ AV_WN32A(&h->ref_cache[list][scan8[ 0]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101); |
+ AV_WN32A(&h->ref_cache[list][scan8[ 2]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101); |
+ ref += 2; |
+ AV_WN32A(&h->ref_cache[list][scan8[ 8]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101); |
+ AV_WN32A(&h->ref_cache[list][scan8[10]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101); |
} |
b_stride = h->b_stride; |
@@ -1259,7 +1301,7 @@ |
*/ |
//FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec) |
if(top_type){ |
- *(uint32_t*)&h->non_zero_count_cache[4+8*0]= *(uint32_t*)&h->non_zero_count[top_xy][4+3*8]; |
+ AV_COPY32(&h->non_zero_count_cache[4+8*0], &h->non_zero_count[top_xy][4+3*8]); |
} |
if(left_type[0]){ |
@@ -1306,7 +1348,7 @@ |
for(list=0; list<h->list_count; list++){ |
if(USES_LIST(top_type, list)){ |
const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride; |
- const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride; |
+ const int b8_xy= 4*top_xy + 2; |
int (*ref2frm)[64] = h->ref2frm[ h->slice_table[top_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2); |
AV_COPY128(h->mv_cache[list][scan8[0] + 0 - 1*8], s->current_picture.motion_val[list][b_xy + 0]); |
h->ref_cache[list][scan8[0] + 0 - 1*8]= |
@@ -1315,27 +1357,27 @@ |
h->ref_cache[list][scan8[0] + 3 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 1]]; |
}else{ |
AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]); |
- *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((LIST_NOT_USED)&0xFF)*0x01010101; |
+ AV_WN32A(&h->ref_cache[list][scan8[0] + 0 - 1*8], ((LIST_NOT_USED)&0xFF)*0x01010101u); |
} |
if(!IS_INTERLACED(mb_type^left_type[0])){ |
if(USES_LIST(left_type[0], list)){ |
const int b_xy= h->mb2b_xy[left_xy[0]] + 3; |
- const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1; |
+ const int b8_xy= 4*left_xy[0] + 1; |
int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[0]]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2); |
- *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0 ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*0]; |
- *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 8 ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*1]; |
- *(uint32_t*)h->mv_cache[list][scan8[0] - 1 +16 ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*2]; |
- *(uint32_t*)h->mv_cache[list][scan8[0] - 1 +24 ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*3]; |
+ AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 0 ], s->current_picture.motion_val[list][b_xy + h->b_stride*0]); |
+ AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 8 ], s->current_picture.motion_val[list][b_xy + h->b_stride*1]); |
+ AV_COPY32(h->mv_cache[list][scan8[0] - 1 +16 ], s->current_picture.motion_val[list][b_xy + h->b_stride*2]); |
+ AV_COPY32(h->mv_cache[list][scan8[0] - 1 +24 ], s->current_picture.motion_val[list][b_xy + h->b_stride*3]); |
h->ref_cache[list][scan8[0] - 1 + 0 ]= |
- h->ref_cache[list][scan8[0] - 1 + 8 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + h->b8_stride*0]]; |
+ h->ref_cache[list][scan8[0] - 1 + 8 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 2*0]]; |
h->ref_cache[list][scan8[0] - 1 +16 ]= |
- h->ref_cache[list][scan8[0] - 1 +24 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + h->b8_stride*1]]; |
+ h->ref_cache[list][scan8[0] - 1 +24 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 2*1]]; |
}else{ |
- *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 0 ]= |
- *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 8 ]= |
- *(uint32_t*)h->mv_cache [list][scan8[0] - 1 +16 ]= |
- *(uint32_t*)h->mv_cache [list][scan8[0] - 1 +24 ]= 0; |
+ AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 0 ]); |
+ AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 8 ]); |
+ AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +16 ]); |
+ AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +24 ]); |
h->ref_cache[list][scan8[0] - 1 + 0 ]= |
h->ref_cache[list][scan8[0] - 1 + 8 ]= |
h->ref_cache[list][scan8[0] - 1 + 16 ]= |
@@ -1368,19 +1410,19 @@ |
AV_COPY64(&h->non_zero_count[mb_xy][ 0], &h->non_zero_count_cache[0+8*1]); |
AV_COPY64(&h->non_zero_count[mb_xy][ 8], &h->non_zero_count_cache[0+8*2]); |
- *((uint32_t*)&h->non_zero_count[mb_xy][16]) = *((uint32_t*)&h->non_zero_count_cache[0+8*5]); |
- *((uint32_t*)&h->non_zero_count[mb_xy][20]) = *((uint32_t*)&h->non_zero_count_cache[4+8*3]); |
+ AV_COPY32(&h->non_zero_count[mb_xy][16], &h->non_zero_count_cache[0+8*5]); |
+ AV_COPY32(&h->non_zero_count[mb_xy][20], &h->non_zero_count_cache[4+8*3]); |
AV_COPY64(&h->non_zero_count[mb_xy][24], &h->non_zero_count_cache[0+8*4]); |
} |
static inline void write_back_motion(H264Context *h, int mb_type){ |
MpegEncContext * const s = &h->s; |
- const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride; |
- const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride; |
+ const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride; //try mb2b(8)_xy |
+ const int b8_xy= 4*h->mb_xy; |
int list; |
if(!USES_LIST(mb_type, 0)) |
- fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1); |
+ fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, 2, (uint8_t)LIST_NOT_USED, 1); |
for(list=0; list<h->list_count; list++){ |
int y, b_stride; |
@@ -1397,53 +1439,44 @@ |
AV_COPY128(mv_dst + y*b_stride, mv_src + 8*y); |
} |
if( CABAC ) { |
- int16_t (*mvd_dst)[2] = &h->mvd_table[list][b_xy]; |
- int16_t (*mvd_src)[2] = &h->mvd_cache[list][scan8[0]]; |
+ uint8_t (*mvd_dst)[2] = &h->mvd_table[list][FMO ? 8*h->mb_xy : h->mb2br_xy[h->mb_xy]]; |
+ uint8_t (*mvd_src)[2] = &h->mvd_cache[list][scan8[0]]; |
if(IS_SKIP(mb_type)) |
- fill_rectangle(mvd_dst, 4, 4, h->b_stride, 0, 4); |
- else |
- for(y=0; y<4; y++){ |
- AV_COPY128(mvd_dst + y*b_stride, mvd_src + 8*y); |
+ AV_ZERO128(mvd_dst); |
+ else{ |
+ AV_COPY64(mvd_dst, mvd_src + 8*3); |
+ AV_COPY16(mvd_dst + 3 + 3, mvd_src + 3 + 8*0); |
+ AV_COPY16(mvd_dst + 3 + 2, mvd_src + 3 + 8*1); |
+ AV_COPY16(mvd_dst + 3 + 1, mvd_src + 3 + 8*2); |
} |
} |
{ |
int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy]; |
- ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]]; |
- ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]]; |
- ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]]; |
- ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]]; |
+ ref_index[0+0*2]= h->ref_cache[list][scan8[0]]; |
+ ref_index[1+0*2]= h->ref_cache[list][scan8[4]]; |
+ ref_index[0+1*2]= h->ref_cache[list][scan8[8]]; |
+ ref_index[1+1*2]= h->ref_cache[list][scan8[12]]; |
} |
} |
if(h->slice_type_nos == FF_B_TYPE && CABAC){ |
if(IS_8X8(mb_type)){ |
- uint8_t *direct_table = &h->direct_table[b8_xy]; |
- direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0; |
- direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0; |
- direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0; |
+ uint8_t *direct_table = &h->direct_table[4*h->mb_xy]; |
+ direct_table[1] = h->sub_mb_type[1]>>1; |
+ direct_table[2] = h->sub_mb_type[2]>>1; |
+ direct_table[3] = h->sub_mb_type[3]>>1; |
} |
} |
} |
static inline int get_dct8x8_allowed(H264Context *h){ |
if(h->sps.direct_8x8_inference_flag) |
- return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8 )*0x0001000100010001ULL)); |
+ return !(AV_RN64A(h->sub_mb_type) & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8 )*0x0001000100010001ULL)); |
else |
- return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL)); |
+ return !(AV_RN64A(h->sub_mb_type) & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL)); |
} |
-static void predict_field_decoding_flag(H264Context *h){ |
- MpegEncContext * const s = &h->s; |
- const int mb_xy= h->mb_xy; |
- int mb_type = (h->slice_table[mb_xy-1] == h->slice_num) |
- ? s->current_picture.mb_type[mb_xy-1] |
- : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num) |
- ? s->current_picture.mb_type[mb_xy-s->mb_stride] |
- : 0; |
- h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0; |
-} |
- |
/** |
* decodes a P_SKIP or B_SKIP macroblock |
*/ |
@@ -1461,9 +1494,11 @@ |
if( h->slice_type_nos == FF_B_TYPE ) |
{ |
// just for fill_caches. pred_direct_motion will set the real mb_type |
- mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP; |
- |
+ mb_type|= MB_TYPE_L0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP; |
+ if(h->direct_spatial_mv_pred){ |
+ fill_decode_neighbors(h, mb_type); |
fill_decode_caches(h, mb_type); //FIXME check what is needed and what not ... |
+ } |
ff_h264_pred_direct_motion(h, &mb_type); |
mb_type|= MB_TYPE_SKIP; |
} |
@@ -1472,6 +1507,7 @@ |
int mx, my; |
mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP; |
+ fill_decode_neighbors(h, mb_type); |
fill_decode_caches(h, mb_type); //FIXME check what is needed and what not ... |
pred_pskip_motion(h, &mx, &my); |
fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1); |