| Index: source/libvpx/vp8/encoder/ethreading.c | 
| =================================================================== | 
| --- source/libvpx/vp8/encoder/ethreading.c	(revision 96967) | 
| +++ source/libvpx/vp8/encoder/ethreading.c	(working copy) | 
| @@ -91,8 +91,8 @@ | 
| int dst_fb_idx = cm->new_fb_idx; | 
| int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride; | 
| int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride; | 
| +                int map_index = (mb_row * cm->mb_cols); | 
| volatile int *last_row_current_mb_col; | 
| -                INT64 activity_sum = 0; | 
|  | 
| tp = cpi->tok + (mb_row * (cm->mb_cols * 16 * 24)); | 
|  | 
| @@ -112,11 +112,12 @@ | 
|  | 
| //printf("Thread mb_row = %d\n", mb_row); | 
|  | 
| +                // Set the mb activity pointer to the start of the row. | 
| +                x->mb_activity_ptr = &cpi->mb_activity_map[map_index]; | 
| + | 
| // for each macroblock col in image | 
| for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) | 
| { | 
| -                    int seg_map_index = (mb_row * cm->mb_cols); | 
| - | 
| if ((mb_col & (nsync - 1)) == 0) | 
| { | 
| while (mb_col > (*last_row_current_mb_col - nsync) && *last_row_current_mb_col != cm->mb_cols - 1) | 
| @@ -147,16 +148,19 @@ | 
| x->rddiv = cpi->RDDIV; | 
| x->rdmult = cpi->RDMULT; | 
|  | 
| +                    //Copy current mb to a buffer | 
| +                    RECON_INVOKE(&xd->rtcd->recon, copy16x16)(x->src.y_buffer, x->src.y_stride, x->thismb, 16); | 
| + | 
| if (cpi->oxcf.tuning == VP8_TUNE_SSIM) | 
| -                        activity_sum += vp8_activity_masking(cpi, x); | 
| +                        vp8_activity_masking(cpi, x); | 
|  | 
| // Is segmentation enabled | 
| // MB level adjutment to quantizer | 
| if (xd->segmentation_enabled) | 
| { | 
| // Code to set segment id in xd->mbmi.segment_id for current MB (with range checking) | 
| -                        if (cpi->segmentation_map[seg_map_index + mb_col] <= 3) | 
| -                            xd->mode_info_context->mbmi.segment_id = cpi->segmentation_map[seg_map_index + mb_col]; | 
| +                        if (cpi->segmentation_map[map_index + mb_col] <= 3) | 
| +                            xd->mode_info_context->mbmi.segment_id = cpi->segmentation_map[map_index + mb_col]; | 
| else | 
| xd->mode_info_context->mbmi.segment_id = 0; | 
|  | 
| @@ -165,7 +169,7 @@ | 
| else | 
| xd->mode_info_context->mbmi.segment_id = 0; // Set to Segment 0 by default | 
|  | 
| -                    x->active_ptr = cpi->active_map + seg_map_index + mb_col; | 
| +                    x->active_ptr = cpi->active_map + map_index + mb_col; | 
|  | 
| if (cm->frame_type == KEY_FRAME) | 
| { | 
| @@ -203,29 +207,34 @@ | 
| if (cpi->cyclic_refresh_mode_enabled && xd->segmentation_enabled) | 
| { | 
| const MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi; | 
| -                            cpi->segmentation_map[seg_map_index + mb_col] = mbmi->segment_id; | 
| +                            cpi->segmentation_map[map_index + mb_col] = mbmi->segment_id; | 
|  | 
| // If the block has been refreshed mark it as clean (the magnitude of the -ve influences how long it will be before we consider another refresh): | 
| // Else if it was coded (last frame 0,0) and has not already been refreshed then mark it as a candidate for cleanup next time (marked 0) | 
| // else mark it as dirty (1). | 
| if (mbmi->segment_id) | 
| -                                cpi->cyclic_refresh_map[seg_map_index + mb_col] = -1; | 
| +                                cpi->cyclic_refresh_map[map_index + mb_col] = -1; | 
| else if ((mbmi->mode == ZEROMV) && (mbmi->ref_frame == LAST_FRAME)) | 
| { | 
| -                                if (cpi->cyclic_refresh_map[seg_map_index + mb_col] == 1) | 
| -                                    cpi->cyclic_refresh_map[seg_map_index + mb_col] = 0; | 
| +                                if (cpi->cyclic_refresh_map[map_index + mb_col] == 1) | 
| +                                    cpi->cyclic_refresh_map[map_index + mb_col] = 0; | 
| } | 
| else | 
| -                                cpi->cyclic_refresh_map[seg_map_index + mb_col] = 1; | 
| +                                cpi->cyclic_refresh_map[map_index + mb_col] = 1; | 
|  | 
| } | 
| } | 
| cpi->tplist[mb_row].stop = tp; | 
|  | 
| -                    x->gf_active_ptr++; // Increment pointer into gf useage flags structure for next mb | 
| +                    // Increment pointer into gf useage flags structure. | 
| +                    x->gf_active_ptr++; | 
|  | 
| +                    // Increment the activity mask pointers. | 
| +                    x->mb_activity_ptr++; | 
| + | 
| +                    /* save the block info */ | 
| for (i = 0; i < 16; i++) | 
| -                        vpx_memcpy(&xd->mode_info_context->bmi[i], &xd->block[i].bmi, sizeof(xd->block[i].bmi)); | 
| +                        xd->mode_info_context->bmi[i] = xd->block[i].bmi; | 
|  | 
| // adjust to the next column of macroblocks | 
| x->src.y_buffer += 16; | 
| @@ -256,7 +265,6 @@ | 
| // this is to account for the border | 
| xd->mode_info_context++; | 
| x->partition_info++; | 
| -                x->activity_sum += activity_sum; | 
|  | 
| x->src.y_buffer += 16 * x->src.y_stride * (cpi->encoding_thread_count + 1) - 16 * cm->mb_cols; | 
| x->src.u_buffer += 8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols; | 
| @@ -264,6 +272,7 @@ | 
|  | 
| xd->mode_info_context += xd->mode_info_stride * cpi->encoding_thread_count; | 
| x->partition_info += xd->mode_info_stride * cpi->encoding_thread_count; | 
| +                x->gf_active_ptr   += cm->mb_cols * cpi->encoding_thread_count; | 
|  | 
| if (mb_row == cm->mb_rows - 1) | 
| { | 
| @@ -292,7 +301,6 @@ | 
|  | 
| z->sadperbit16      = x->sadperbit16; | 
| z->sadperbit4       = x->sadperbit4; | 
| -    z->errthresh        = x->errthresh; | 
|  | 
| /* | 
| z->mv_col_min    = x->mv_col_min; | 
| @@ -306,6 +314,7 @@ | 
| z->vp8_short_fdct8x4     = x->vp8_short_fdct8x4; | 
| z->short_walsh4x4    = x->short_walsh4x4; | 
| z->quantize_b        = x->quantize_b; | 
| +    z->quantize_b_pair   = x->quantize_b_pair; | 
| z->optimize          = x->optimize; | 
|  | 
| /* | 
| @@ -319,8 +328,8 @@ | 
| vpx_memcpy(z->mvcosts,          x->mvcosts,         sizeof(x->mvcosts)); | 
| z->mvcost[0] = &z->mvcosts[0][mv_max+1]; | 
| z->mvcost[1] = &z->mvcosts[1][mv_max+1]; | 
| -    z->mvsadcost[0] = &z->mvsadcosts[0][mv_max+1]; | 
| -    z->mvsadcost[1] = &z->mvsadcosts[1][mv_max+1]; | 
| +    z->mvsadcost[0] = &z->mvsadcosts[0][mvfp_max+1]; | 
| +    z->mvsadcost[1] = &z->mvsadcosts[1][mvfp_max+1]; | 
|  | 
|  | 
| vpx_memcpy(z->token_costs,       x->token_costs,      sizeof(x->token_costs)); | 
| @@ -443,8 +452,6 @@ | 
|  | 
| vp8_setup_block_ptrs(mb); | 
|  | 
| -        mb->activity_sum = 0; | 
| - | 
| mbd->left_context = &cm->left_context; | 
| mb->mvc = cm->fc.mvc; | 
|  | 
| @@ -459,15 +466,15 @@ | 
|  | 
| cpi->b_multi_threaded = 0; | 
| cpi->encoding_thread_count = 0; | 
| -    cpi->processor_core_count = 32; //vp8_get_proc_core_count(); | 
|  | 
| -    if (cpi->processor_core_count > 1 && cpi->oxcf.multi_threaded > 1) | 
| +    if (cm->processor_core_count > 1 && cpi->oxcf.multi_threaded > 1) | 
| { | 
| int ithread; | 
| int th_count = cpi->oxcf.multi_threaded - 1; | 
|  | 
| -        if (cpi->oxcf.multi_threaded > cpi->processor_core_count) | 
| -            th_count = cpi->processor_core_count - 1; | 
| +        /* don't allocate more threads than cores available */ | 
| +        if (cpi->oxcf.multi_threaded > cm->processor_core_count) | 
| +            th_count = cm->processor_core_count - 1; | 
|  | 
| /* we have th_count + 1 (main) threads processing one row each */ | 
| /* no point to have more threads than the sync range allows */ | 
|  |