| OLD | NEW |
| 1 // Copyright 2010 Google Inc. All Rights Reserved. | 1 // Copyright 2010 Google Inc. All Rights Reserved. |
| 2 // | 2 // |
| 3 // Use of this source code is governed by a BSD-style license | 3 // Use of this source code is governed by a BSD-style license |
| 4 // that can be found in the COPYING file in the root of the source | 4 // that can be found in the COPYING file in the root of the source |
| 5 // tree. An additional intellectual property rights grant can be found | 5 // tree. An additional intellectual property rights grant can be found |
| 6 // in the file PATENTS. All contributing project authors may | 6 // in the file PATENTS. All contributing project authors may |
| 7 // be found in the AUTHORS file in the root of the source tree. | 7 // be found in the AUTHORS file in the root of the source tree. |
| 8 // ----------------------------------------------------------------------------- | 8 // ----------------------------------------------------------------------------- |
| 9 // | 9 // |
| 10 // Frame-reconstruction function. Memory allocation. | 10 // Frame-reconstruction function. Memory allocation. |
| 11 // | 11 // |
| 12 // Author: Skal (pascal.massimino@gmail.com) | 12 // Author: Skal (pascal.massimino@gmail.com) |
| 13 | 13 |
| 14 #include <stdlib.h> | 14 #include <stdlib.h> |
| 15 #include "./vp8i.h" | 15 #include "./vp8i.h" |
| 16 #include "../utils/utils.h" | 16 #include "../utils/utils.h" |
| 17 | 17 |
| 18 #define ALIGN_MASK (32 - 1) | 18 //------------------------------------------------------------------------------ |
| 19 // Main reconstruction function. |
| 20 |
| 21 static const int kScan[16] = { |
| 22 0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS, |
| 23 0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS, |
| 24 0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS, |
| 25 0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS |
| 26 }; |
| 27 |
| 28 static int CheckMode(int mb_x, int mb_y, int mode) { |
| 29 if (mode == B_DC_PRED) { |
| 30 if (mb_x == 0) { |
| 31 return (mb_y == 0) ? B_DC_PRED_NOTOPLEFT : B_DC_PRED_NOLEFT; |
| 32 } else { |
| 33 return (mb_y == 0) ? B_DC_PRED_NOTOP : B_DC_PRED; |
| 34 } |
| 35 } |
| 36 return mode; |
| 37 } |
| 38 |
| 39 static void Copy32b(uint8_t* const dst, const uint8_t* const src) { |
| 40 memcpy(dst, src, 4); |
| 41 } |
| 42 |
| 43 static WEBP_INLINE void DoTransform(uint32_t bits, const int16_t* const src, |
| 44 uint8_t* const dst) { |
| 45 switch (bits >> 30) { |
| 46 case 3: |
| 47 VP8Transform(src, dst, 0); |
| 48 break; |
| 49 case 2: |
| 50 VP8TransformAC3(src, dst); |
| 51 break; |
| 52 case 1: |
| 53 VP8TransformDC(src, dst); |
| 54 break; |
| 55 default: |
| 56 break; |
| 57 } |
| 58 } |
| 59 |
| 60 static void DoUVTransform(uint32_t bits, const int16_t* const src, |
| 61 uint8_t* const dst) { |
| 62 if (bits & 0xff) { // any non-zero coeff at all? |
| 63 if (bits & 0xaa) { // any non-zero AC coefficient? |
| 64 VP8TransformUV(src, dst); // note we don't use the AC3 variant for U/V |
| 65 } else { |
| 66 VP8TransformDCUV(src, dst); |
| 67 } |
| 68 } |
| 69 } |
| 19 | 70 |
| 20 static void ReconstructRow(const VP8Decoder* const dec, | 71 static void ReconstructRow(const VP8Decoder* const dec, |
| 21 const VP8ThreadContext* ctx); // TODO(skal): remove | 72 const VP8ThreadContext* ctx) { |
| 73 int j; |
| 74 int mb_x; |
| 75 const int mb_y = ctx->mb_y_; |
| 76 const int cache_id = ctx->id_; |
| 77 uint8_t* const y_dst = dec->yuv_b_ + Y_OFF; |
| 78 uint8_t* const u_dst = dec->yuv_b_ + U_OFF; |
| 79 uint8_t* const v_dst = dec->yuv_b_ + V_OFF; |
| 80 |
| 81 // Initialize left-most block. |
| 82 for (j = 0; j < 16; ++j) { |
| 83 y_dst[j * BPS - 1] = 129; |
| 84 } |
| 85 for (j = 0; j < 8; ++j) { |
| 86 u_dst[j * BPS - 1] = 129; |
| 87 v_dst[j * BPS - 1] = 129; |
| 88 } |
| 89 |
| 90 // Init top-left sample on left column too. |
| 91 if (mb_y > 0) { |
| 92 y_dst[-1 - BPS] = u_dst[-1 - BPS] = v_dst[-1 - BPS] = 129; |
| 93 } else { |
| 94 // we only need to do this init once at block (0,0). |
| 95 // Afterward, it remains valid for the whole topmost row. |
| 96 memset(y_dst - BPS - 1, 127, 16 + 4 + 1); |
| 97 memset(u_dst - BPS - 1, 127, 8 + 1); |
| 98 memset(v_dst - BPS - 1, 127, 8 + 1); |
| 99 } |
| 100 |
| 101 // Reconstruct one row. |
| 102 for (mb_x = 0; mb_x < dec->mb_w_; ++mb_x) { |
| 103 const VP8MBData* const block = ctx->mb_data_ + mb_x; |
| 104 |
| 105 // Rotate in the left samples from previously decoded block. We move four |
| 106 // pixels at a time for alignment reason, and because of in-loop filter. |
| 107 if (mb_x > 0) { |
| 108 for (j = -1; j < 16; ++j) { |
| 109 Copy32b(&y_dst[j * BPS - 4], &y_dst[j * BPS + 12]); |
| 110 } |
| 111 for (j = -1; j < 8; ++j) { |
| 112 Copy32b(&u_dst[j * BPS - 4], &u_dst[j * BPS + 4]); |
| 113 Copy32b(&v_dst[j * BPS - 4], &v_dst[j * BPS + 4]); |
| 114 } |
| 115 } |
| 116 { |
| 117 // bring top samples into the cache |
| 118 VP8TopSamples* const top_yuv = dec->yuv_t_ + mb_x; |
| 119 const int16_t* const coeffs = block->coeffs_; |
| 120 uint32_t bits = block->non_zero_y_; |
| 121 int n; |
| 122 |
| 123 if (mb_y > 0) { |
| 124 memcpy(y_dst - BPS, top_yuv[0].y, 16); |
| 125 memcpy(u_dst - BPS, top_yuv[0].u, 8); |
| 126 memcpy(v_dst - BPS, top_yuv[0].v, 8); |
| 127 } |
| 128 |
| 129 // predict and add residuals |
| 130 if (block->is_i4x4_) { // 4x4 |
| 131 uint32_t* const top_right = (uint32_t*)(y_dst - BPS + 16); |
| 132 |
| 133 if (mb_y > 0) { |
| 134 if (mb_x >= dec->mb_w_ - 1) { // on rightmost border |
| 135 memset(top_right, top_yuv[0].y[15], sizeof(*top_right)); |
| 136 } else { |
| 137 memcpy(top_right, top_yuv[1].y, sizeof(*top_right)); |
| 138 } |
| 139 } |
| 140 // replicate the top-right pixels below |
| 141 top_right[BPS] = top_right[2 * BPS] = top_right[3 * BPS] = top_right[0]; |
| 142 |
| 143 // predict and add residuals for all 4x4 blocks in turn. |
| 144 for (n = 0; n < 16; ++n, bits <<= 2) { |
| 145 uint8_t* const dst = y_dst + kScan[n]; |
| 146 VP8PredLuma4[block->imodes_[n]](dst); |
| 147 DoTransform(bits, coeffs + n * 16, dst); |
| 148 } |
| 149 } else { // 16x16 |
| 150 const int pred_func = CheckMode(mb_x, mb_y, block->imodes_[0]); |
| 151 VP8PredLuma16[pred_func](y_dst); |
| 152 if (bits != 0) { |
| 153 for (n = 0; n < 16; ++n, bits <<= 2) { |
| 154 DoTransform(bits, coeffs + n * 16, y_dst + kScan[n]); |
| 155 } |
| 156 } |
| 157 } |
| 158 { |
| 159 // Chroma |
| 160 const uint32_t bits_uv = block->non_zero_uv_; |
| 161 const int pred_func = CheckMode(mb_x, mb_y, block->uvmode_); |
| 162 VP8PredChroma8[pred_func](u_dst); |
| 163 VP8PredChroma8[pred_func](v_dst); |
| 164 DoUVTransform(bits_uv >> 0, coeffs + 16 * 16, u_dst); |
| 165 DoUVTransform(bits_uv >> 8, coeffs + 20 * 16, v_dst); |
| 166 } |
| 167 |
| 168 // stash away top samples for next block |
| 169 if (mb_y < dec->mb_h_ - 1) { |
| 170 memcpy(top_yuv[0].y, y_dst + 15 * BPS, 16); |
| 171 memcpy(top_yuv[0].u, u_dst + 7 * BPS, 8); |
| 172 memcpy(top_yuv[0].v, v_dst + 7 * BPS, 8); |
| 173 } |
| 174 } |
| 175 // Transfer reconstructed samples from yuv_b_ cache to final destination. |
| 176 { |
| 177 const int y_offset = cache_id * 16 * dec->cache_y_stride_; |
| 178 const int uv_offset = cache_id * 8 * dec->cache_uv_stride_; |
| 179 uint8_t* const y_out = dec->cache_y_ + mb_x * 16 + y_offset; |
| 180 uint8_t* const u_out = dec->cache_u_ + mb_x * 8 + uv_offset; |
| 181 uint8_t* const v_out = dec->cache_v_ + mb_x * 8 + uv_offset; |
| 182 for (j = 0; j < 16; ++j) { |
| 183 memcpy(y_out + j * dec->cache_y_stride_, y_dst + j * BPS, 16); |
| 184 } |
| 185 for (j = 0; j < 8; ++j) { |
| 186 memcpy(u_out + j * dec->cache_uv_stride_, u_dst + j * BPS, 8); |
| 187 memcpy(v_out + j * dec->cache_uv_stride_, v_dst + j * BPS, 8); |
| 188 } |
| 189 } |
| 190 } |
| 191 } |
| 22 | 192 |
| 23 //------------------------------------------------------------------------------ | 193 //------------------------------------------------------------------------------ |
| 24 // Filtering | 194 // Filtering |
| 25 | 195 |
| 26 // kFilterExtraRows[] = How many extra lines are needed on the MB boundary | 196 // kFilterExtraRows[] = How many extra lines are needed on the MB boundary |
| 27 // for caching, given a filtering level. | 197 // for caching, given a filtering level. |
| 28 // Simple filter: up to 2 luma samples are read and 1 is written. | 198 // Simple filter: up to 2 luma samples are read and 1 is written. |
| 29 // Complex filter: up to 4 luma samples are read and 3 are written. Same for | 199 // Complex filter: up to 4 luma samples are read and 3 are written. Same for |
| 30 // U/V, so it's 8 samples total (because of the 2x upsampling). | 200 // U/V, so it's 8 samples total (because of the 2x upsampling). |
| 31 static const uint8_t kFilterExtraRows[3] = { 0, 2, 8 }; | 201 static const uint8_t kFilterExtraRows[3] = { 0, 2, 8 }; |
| (...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 105 if (!dec->segment_hdr_.absolute_delta_) { | 275 if (!dec->segment_hdr_.absolute_delta_) { |
| 106 base_level += hdr->level_; | 276 base_level += hdr->level_; |
| 107 } | 277 } |
| 108 } else { | 278 } else { |
| 109 base_level = hdr->level_; | 279 base_level = hdr->level_; |
| 110 } | 280 } |
| 111 for (i4x4 = 0; i4x4 <= 1; ++i4x4) { | 281 for (i4x4 = 0; i4x4 <= 1; ++i4x4) { |
| 112 VP8FInfo* const info = &dec->fstrengths_[s][i4x4]; | 282 VP8FInfo* const info = &dec->fstrengths_[s][i4x4]; |
| 113 int level = base_level; | 283 int level = base_level; |
| 114 if (hdr->use_lf_delta_) { | 284 if (hdr->use_lf_delta_) { |
| 115 // TODO(skal): only CURRENT is handled for now. | |
| 116 level += hdr->ref_lf_delta_[0]; | 285 level += hdr->ref_lf_delta_[0]; |
| 117 if (i4x4) { | 286 if (i4x4) { |
| 118 level += hdr->mode_lf_delta_[0]; | 287 level += hdr->mode_lf_delta_[0]; |
| 119 } | 288 } |
| 120 } | 289 } |
| 121 level = (level < 0) ? 0 : (level > 63) ? 63 : level; | 290 level = (level < 0) ? 0 : (level > 63) ? 63 : level; |
| 122 if (level > 0) { | 291 if (level > 0) { |
| 123 int ilevel = level; | 292 int ilevel = level; |
| 124 if (hdr->sharpness_ > 0) { | 293 if (hdr->sharpness_ > 0) { |
| 125 if (hdr->sharpness_ > 4) { | 294 if (hdr->sharpness_ > 4) { |
| (...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 170 const int idx = (dqm->uv_quant_ < 0) ? 0 : dqm->uv_quant_; | 339 const int idx = (dqm->uv_quant_ < 0) ? 0 : dqm->uv_quant_; |
| 171 dqm->dither_ = (f * kQuantToDitherAmp[idx]) >> 3; | 340 dqm->dither_ = (f * kQuantToDitherAmp[idx]) >> 3; |
| 172 } | 341 } |
| 173 all_amp |= dqm->dither_; | 342 all_amp |= dqm->dither_; |
| 174 } | 343 } |
| 175 if (all_amp != 0) { | 344 if (all_amp != 0) { |
| 176 VP8InitRandom(&dec->dithering_rg_, 1.0f); | 345 VP8InitRandom(&dec->dithering_rg_, 1.0f); |
| 177 dec->dither_ = 1; | 346 dec->dither_ = 1; |
| 178 } | 347 } |
| 179 } | 348 } |
| 180 #if WEBP_DECODER_ABI_VERSION > 0x0204 | |
| 181 // potentially allow alpha dithering | 349 // potentially allow alpha dithering |
| 182 dec->alpha_dithering_ = options->alpha_dithering_strength; | 350 dec->alpha_dithering_ = options->alpha_dithering_strength; |
| 183 if (dec->alpha_dithering_ > 100) { | 351 if (dec->alpha_dithering_ > 100) { |
| 184 dec->alpha_dithering_ = 100; | 352 dec->alpha_dithering_ = 100; |
| 185 } else if (dec->alpha_dithering_ < 0) { | 353 } else if (dec->alpha_dithering_ < 0) { |
| 186 dec->alpha_dithering_ = 0; | 354 dec->alpha_dithering_ = 0; |
| 187 } | 355 } |
| 188 #endif | |
| 189 } | 356 } |
| 190 } | 357 } |
| 191 | 358 |
| 192 // minimal amp that will provide a non-zero dithering effect | 359 // minimal amp that will provide a non-zero dithering effect |
| 193 #define MIN_DITHER_AMP 4 | 360 #define MIN_DITHER_AMP 4 |
| 194 #define DITHER_DESCALE 4 | 361 #define DITHER_DESCALE 4 |
| 195 #define DITHER_DESCALE_ROUNDER (1 << (DITHER_DESCALE - 1)) | 362 #define DITHER_DESCALE_ROUNDER (1 << (DITHER_DESCALE - 1)) |
| 196 #define DITHER_AMP_BITS 8 | 363 #define DITHER_AMP_BITS 8 |
| 197 #define DITHER_AMP_CENTER (1 << DITHER_AMP_BITS) | 364 #define DITHER_AMP_CENTER (1 << DITHER_AMP_BITS) |
| 198 | 365 |
| (...skipping 348 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 547 (dec->mt_method_ == 2 ? 2 : 1) * mb_w * sizeof(*dec->mb_data_); | 714 (dec->mt_method_ == 2 ? 2 : 1) * mb_w * sizeof(*dec->mb_data_); |
| 548 const size_t cache_height = (16 * num_caches | 715 const size_t cache_height = (16 * num_caches |
| 549 + kFilterExtraRows[dec->filter_type_]) * 3 / 2; | 716 + kFilterExtraRows[dec->filter_type_]) * 3 / 2; |
| 550 const size_t cache_size = top_size * cache_height; | 717 const size_t cache_size = top_size * cache_height; |
| 551 // alpha_size is the only one that scales as width x height. | 718 // alpha_size is the only one that scales as width x height. |
| 552 const uint64_t alpha_size = (dec->alpha_data_ != NULL) ? | 719 const uint64_t alpha_size = (dec->alpha_data_ != NULL) ? |
| 553 (uint64_t)dec->pic_hdr_.width_ * dec->pic_hdr_.height_ : 0ULL; | 720 (uint64_t)dec->pic_hdr_.width_ * dec->pic_hdr_.height_ : 0ULL; |
| 554 const uint64_t needed = (uint64_t)intra_pred_mode_size | 721 const uint64_t needed = (uint64_t)intra_pred_mode_size |
| 555 + top_size + mb_info_size + f_info_size | 722 + top_size + mb_info_size + f_info_size |
| 556 + yuv_size + mb_data_size | 723 + yuv_size + mb_data_size |
| 557 + cache_size + alpha_size + ALIGN_MASK; | 724 + cache_size + alpha_size + WEBP_ALIGN_CST; |
| 558 uint8_t* mem; | 725 uint8_t* mem; |
| 559 | 726 |
| 560 if (needed != (size_t)needed) return 0; // check for overflow | 727 if (needed != (size_t)needed) return 0; // check for overflow |
| 561 if (needed > dec->mem_size_) { | 728 if (needed > dec->mem_size_) { |
| 562 WebPSafeFree(dec->mem_); | 729 WebPSafeFree(dec->mem_); |
| 563 dec->mem_size_ = 0; | 730 dec->mem_size_ = 0; |
| 564 dec->mem_ = WebPSafeMalloc(needed, sizeof(uint8_t)); | 731 dec->mem_ = WebPSafeMalloc(needed, sizeof(uint8_t)); |
| 565 if (dec->mem_ == NULL) { | 732 if (dec->mem_ == NULL) { |
| 566 return VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY, | 733 return VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY, |
| 567 "no memory during frame initialization."); | 734 "no memory during frame initialization."); |
| (...skipping 16 matching lines...) Expand all Loading... |
| 584 mem += f_info_size; | 751 mem += f_info_size; |
| 585 dec->thread_ctx_.id_ = 0; | 752 dec->thread_ctx_.id_ = 0; |
| 586 dec->thread_ctx_.f_info_ = dec->f_info_; | 753 dec->thread_ctx_.f_info_ = dec->f_info_; |
| 587 if (dec->mt_method_ > 0) { | 754 if (dec->mt_method_ > 0) { |
| 588 // secondary cache line. The deblocking process need to make use of the | 755 // secondary cache line. The deblocking process need to make use of the |
| 589 // filtering strength from previous macroblock row, while the new ones | 756 // filtering strength from previous macroblock row, while the new ones |
| 590 // are being decoded in parallel. We'll just swap the pointers. | 757 // are being decoded in parallel. We'll just swap the pointers. |
| 591 dec->thread_ctx_.f_info_ += mb_w; | 758 dec->thread_ctx_.f_info_ += mb_w; |
| 592 } | 759 } |
| 593 | 760 |
| 594 mem = (uint8_t*)((uintptr_t)(mem + ALIGN_MASK) & ~ALIGN_MASK); | 761 mem = (uint8_t*)WEBP_ALIGN(mem); |
| 595 assert((yuv_size & ALIGN_MASK) == 0); | 762 assert((yuv_size & WEBP_ALIGN_CST) == 0); |
| 596 dec->yuv_b_ = (uint8_t*)mem; | 763 dec->yuv_b_ = (uint8_t*)mem; |
| 597 mem += yuv_size; | 764 mem += yuv_size; |
| 598 | 765 |
| 599 dec->mb_data_ = (VP8MBData*)mem; | 766 dec->mb_data_ = (VP8MBData*)mem; |
| 600 dec->thread_ctx_.mb_data_ = (VP8MBData*)mem; | 767 dec->thread_ctx_.mb_data_ = (VP8MBData*)mem; |
| 601 if (dec->mt_method_ == 2) { | 768 if (dec->mt_method_ == 2) { |
| 602 dec->thread_ctx_.mb_data_ += mb_w; | 769 dec->thread_ctx_.mb_data_ += mb_w; |
| 603 } | 770 } |
| 604 mem += mb_data_size; | 771 mem += mb_data_size; |
| 605 | 772 |
| (...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 637 // prepare 'io' | 804 // prepare 'io' |
| 638 io->mb_y = 0; | 805 io->mb_y = 0; |
| 639 io->y = dec->cache_y_; | 806 io->y = dec->cache_y_; |
| 640 io->u = dec->cache_u_; | 807 io->u = dec->cache_u_; |
| 641 io->v = dec->cache_v_; | 808 io->v = dec->cache_v_; |
| 642 io->y_stride = dec->cache_y_stride_; | 809 io->y_stride = dec->cache_y_stride_; |
| 643 io->uv_stride = dec->cache_uv_stride_; | 810 io->uv_stride = dec->cache_uv_stride_; |
| 644 io->a = NULL; | 811 io->a = NULL; |
| 645 } | 812 } |
| 646 | 813 |
| 647 int VP8InitFrame(VP8Decoder* const dec, VP8Io* io) { | 814 int VP8InitFrame(VP8Decoder* const dec, VP8Io* const io) { |
| 648 if (!InitThreadContext(dec)) return 0; // call first. Sets dec->num_caches_. | 815 if (!InitThreadContext(dec)) return 0; // call first. Sets dec->num_caches_. |
| 649 if (!AllocateMemory(dec)) return 0; | 816 if (!AllocateMemory(dec)) return 0; |
| 650 InitIo(dec, io); | 817 InitIo(dec, io); |
| 651 VP8DspInit(); // Init critical function pointers and look-up tables. | 818 VP8DspInit(); // Init critical function pointers and look-up tables. |
| 652 return 1; | 819 return 1; |
| 653 } | 820 } |
| 654 | 821 |
| 655 //------------------------------------------------------------------------------ | 822 //------------------------------------------------------------------------------ |
| 656 // Main reconstruction function. | |
| 657 | |
| 658 static const int kScan[16] = { | |
| 659 0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS, | |
| 660 0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS, | |
| 661 0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS, | |
| 662 0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS | |
| 663 }; | |
| 664 | |
| 665 static int CheckMode(int mb_x, int mb_y, int mode) { | |
| 666 if (mode == B_DC_PRED) { | |
| 667 if (mb_x == 0) { | |
| 668 return (mb_y == 0) ? B_DC_PRED_NOTOPLEFT : B_DC_PRED_NOLEFT; | |
| 669 } else { | |
| 670 return (mb_y == 0) ? B_DC_PRED_NOTOP : B_DC_PRED; | |
| 671 } | |
| 672 } | |
| 673 return mode; | |
| 674 } | |
| 675 | |
| 676 static void Copy32b(uint8_t* dst, uint8_t* src) { | |
| 677 memcpy(dst, src, 4); | |
| 678 } | |
| 679 | |
| 680 static WEBP_INLINE void DoTransform(uint32_t bits, const int16_t* const src, | |
| 681 uint8_t* const dst) { | |
| 682 switch (bits >> 30) { | |
| 683 case 3: | |
| 684 VP8Transform(src, dst, 0); | |
| 685 break; | |
| 686 case 2: | |
| 687 VP8TransformAC3(src, dst); | |
| 688 break; | |
| 689 case 1: | |
| 690 VP8TransformDC(src, dst); | |
| 691 break; | |
| 692 default: | |
| 693 break; | |
| 694 } | |
| 695 } | |
| 696 | |
| 697 static void DoUVTransform(uint32_t bits, const int16_t* const src, | |
| 698 uint8_t* const dst) { | |
| 699 if (bits & 0xff) { // any non-zero coeff at all? | |
| 700 if (bits & 0xaa) { // any non-zero AC coefficient? | |
| 701 VP8TransformUV(src, dst); // note we don't use the AC3 variant for U/V | |
| 702 } else { | |
| 703 VP8TransformDCUV(src, dst); | |
| 704 } | |
| 705 } | |
| 706 } | |
| 707 | |
| 708 static void ReconstructRow(const VP8Decoder* const dec, | |
| 709 const VP8ThreadContext* ctx) { | |
| 710 int j; | |
| 711 int mb_x; | |
| 712 const int mb_y = ctx->mb_y_; | |
| 713 const int cache_id = ctx->id_; | |
| 714 uint8_t* const y_dst = dec->yuv_b_ + Y_OFF; | |
| 715 uint8_t* const u_dst = dec->yuv_b_ + U_OFF; | |
| 716 uint8_t* const v_dst = dec->yuv_b_ + V_OFF; | |
| 717 for (mb_x = 0; mb_x < dec->mb_w_; ++mb_x) { | |
| 718 const VP8MBData* const block = ctx->mb_data_ + mb_x; | |
| 719 | |
| 720 // Rotate in the left samples from previously decoded block. We move four | |
| 721 // pixels at a time for alignment reason, and because of in-loop filter. | |
| 722 if (mb_x > 0) { | |
| 723 for (j = -1; j < 16; ++j) { | |
| 724 Copy32b(&y_dst[j * BPS - 4], &y_dst[j * BPS + 12]); | |
| 725 } | |
| 726 for (j = -1; j < 8; ++j) { | |
| 727 Copy32b(&u_dst[j * BPS - 4], &u_dst[j * BPS + 4]); | |
| 728 Copy32b(&v_dst[j * BPS - 4], &v_dst[j * BPS + 4]); | |
| 729 } | |
| 730 } else { | |
| 731 for (j = 0; j < 16; ++j) { | |
| 732 y_dst[j * BPS - 1] = 129; | |
| 733 } | |
| 734 for (j = 0; j < 8; ++j) { | |
| 735 u_dst[j * BPS - 1] = 129; | |
| 736 v_dst[j * BPS - 1] = 129; | |
| 737 } | |
| 738 // Init top-left sample on left column too | |
| 739 if (mb_y > 0) { | |
| 740 y_dst[-1 - BPS] = u_dst[-1 - BPS] = v_dst[-1 - BPS] = 129; | |
| 741 } | |
| 742 } | |
| 743 { | |
| 744 // bring top samples into the cache | |
| 745 VP8TopSamples* const top_yuv = dec->yuv_t_ + mb_x; | |
| 746 const int16_t* const coeffs = block->coeffs_; | |
| 747 uint32_t bits = block->non_zero_y_; | |
| 748 int n; | |
| 749 | |
| 750 if (mb_y > 0) { | |
| 751 memcpy(y_dst - BPS, top_yuv[0].y, 16); | |
| 752 memcpy(u_dst - BPS, top_yuv[0].u, 8); | |
| 753 memcpy(v_dst - BPS, top_yuv[0].v, 8); | |
| 754 } else if (mb_x == 0) { | |
| 755 // we only need to do this init once at block (0,0). | |
| 756 // Afterward, it remains valid for the whole topmost row. | |
| 757 memset(y_dst - BPS - 1, 127, 16 + 4 + 1); | |
| 758 memset(u_dst - BPS - 1, 127, 8 + 1); | |
| 759 memset(v_dst - BPS - 1, 127, 8 + 1); | |
| 760 } | |
| 761 | |
| 762 // predict and add residuals | |
| 763 if (block->is_i4x4_) { // 4x4 | |
| 764 uint32_t* const top_right = (uint32_t*)(y_dst - BPS + 16); | |
| 765 | |
| 766 if (mb_y > 0) { | |
| 767 if (mb_x >= dec->mb_w_ - 1) { // on rightmost border | |
| 768 memset(top_right, top_yuv[0].y[15], sizeof(*top_right)); | |
| 769 } else { | |
| 770 memcpy(top_right, top_yuv[1].y, sizeof(*top_right)); | |
| 771 } | |
| 772 } | |
| 773 // replicate the top-right pixels below | |
| 774 top_right[BPS] = top_right[2 * BPS] = top_right[3 * BPS] = top_right[0]; | |
| 775 | |
| 776 // predict and add residuals for all 4x4 blocks in turn. | |
| 777 for (n = 0; n < 16; ++n, bits <<= 2) { | |
| 778 uint8_t* const dst = y_dst + kScan[n]; | |
| 779 VP8PredLuma4[block->imodes_[n]](dst); | |
| 780 DoTransform(bits, coeffs + n * 16, dst); | |
| 781 } | |
| 782 } else { // 16x16 | |
| 783 const int pred_func = CheckMode(mb_x, mb_y, | |
| 784 block->imodes_[0]); | |
| 785 VP8PredLuma16[pred_func](y_dst); | |
| 786 if (bits != 0) { | |
| 787 for (n = 0; n < 16; ++n, bits <<= 2) { | |
| 788 DoTransform(bits, coeffs + n * 16, y_dst + kScan[n]); | |
| 789 } | |
| 790 } | |
| 791 } | |
| 792 { | |
| 793 // Chroma | |
| 794 const uint32_t bits_uv = block->non_zero_uv_; | |
| 795 const int pred_func = CheckMode(mb_x, mb_y, block->uvmode_); | |
| 796 VP8PredChroma8[pred_func](u_dst); | |
| 797 VP8PredChroma8[pred_func](v_dst); | |
| 798 DoUVTransform(bits_uv >> 0, coeffs + 16 * 16, u_dst); | |
| 799 DoUVTransform(bits_uv >> 8, coeffs + 20 * 16, v_dst); | |
| 800 } | |
| 801 | |
| 802 // stash away top samples for next block | |
| 803 if (mb_y < dec->mb_h_ - 1) { | |
| 804 memcpy(top_yuv[0].y, y_dst + 15 * BPS, 16); | |
| 805 memcpy(top_yuv[0].u, u_dst + 7 * BPS, 8); | |
| 806 memcpy(top_yuv[0].v, v_dst + 7 * BPS, 8); | |
| 807 } | |
| 808 } | |
| 809 // Transfer reconstructed samples from yuv_b_ cache to final destination. | |
| 810 { | |
| 811 const int y_offset = cache_id * 16 * dec->cache_y_stride_; | |
| 812 const int uv_offset = cache_id * 8 * dec->cache_uv_stride_; | |
| 813 uint8_t* const y_out = dec->cache_y_ + mb_x * 16 + y_offset; | |
| 814 uint8_t* const u_out = dec->cache_u_ + mb_x * 8 + uv_offset; | |
| 815 uint8_t* const v_out = dec->cache_v_ + mb_x * 8 + uv_offset; | |
| 816 for (j = 0; j < 16; ++j) { | |
| 817 memcpy(y_out + j * dec->cache_y_stride_, y_dst + j * BPS, 16); | |
| 818 } | |
| 819 for (j = 0; j < 8; ++j) { | |
| 820 memcpy(u_out + j * dec->cache_uv_stride_, u_dst + j * BPS, 8); | |
| 821 memcpy(v_out + j * dec->cache_uv_stride_, v_dst + j * BPS, 8); | |
| 822 } | |
| 823 } | |
| 824 } | |
| 825 } | |
| 826 | |
| 827 //------------------------------------------------------------------------------ | |
| 828 | |
| OLD | NEW |