OLD | NEW |
1 // Copyright 2010 Google Inc. All Rights Reserved. | 1 // Copyright 2010 Google Inc. All Rights Reserved. |
2 // | 2 // |
3 // Use of this source code is governed by a BSD-style license | 3 // Use of this source code is governed by a BSD-style license |
4 // that can be found in the COPYING file in the root of the source | 4 // that can be found in the COPYING file in the root of the source |
5 // tree. An additional intellectual property rights grant can be found | 5 // tree. An additional intellectual property rights grant can be found |
6 // in the file PATENTS. All contributing project authors may | 6 // in the file PATENTS. All contributing project authors may |
7 // be found in the AUTHORS file in the root of the source tree. | 7 // be found in the AUTHORS file in the root of the source tree. |
8 // ----------------------------------------------------------------------------- | 8 // ----------------------------------------------------------------------------- |
9 // | 9 // |
10 // Frame-reconstruction function. Memory allocation. | 10 // Frame-reconstruction function. Memory allocation. |
11 // | 11 // |
12 // Author: Skal (pascal.massimino@gmail.com) | 12 // Author: Skal (pascal.massimino@gmail.com) |
13 | 13 |
14 #include <stdlib.h> | 14 #include <stdlib.h> |
15 #include "./vp8i.h" | 15 #include "./vp8i.h" |
16 #include "../utils/utils.h" | 16 #include "../utils/utils.h" |
17 | 17 |
18 #define ALIGN_MASK (32 - 1) | 18 //------------------------------------------------------------------------------ |
| 19 // Main reconstruction function. |
| 20 |
| 21 static const int kScan[16] = { |
| 22 0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS, |
| 23 0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS, |
| 24 0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS, |
| 25 0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS |
| 26 }; |
| 27 |
| 28 static int CheckMode(int mb_x, int mb_y, int mode) { |
| 29 if (mode == B_DC_PRED) { |
| 30 if (mb_x == 0) { |
| 31 return (mb_y == 0) ? B_DC_PRED_NOTOPLEFT : B_DC_PRED_NOLEFT; |
| 32 } else { |
| 33 return (mb_y == 0) ? B_DC_PRED_NOTOP : B_DC_PRED; |
| 34 } |
| 35 } |
| 36 return mode; |
| 37 } |
| 38 |
| 39 static void Copy32b(uint8_t* const dst, const uint8_t* const src) { |
| 40 memcpy(dst, src, 4); |
| 41 } |
| 42 |
| 43 static WEBP_INLINE void DoTransform(uint32_t bits, const int16_t* const src, |
| 44 uint8_t* const dst) { |
| 45 switch (bits >> 30) { |
| 46 case 3: |
| 47 VP8Transform(src, dst, 0); |
| 48 break; |
| 49 case 2: |
| 50 VP8TransformAC3(src, dst); |
| 51 break; |
| 52 case 1: |
| 53 VP8TransformDC(src, dst); |
| 54 break; |
| 55 default: |
| 56 break; |
| 57 } |
| 58 } |
| 59 |
| 60 static void DoUVTransform(uint32_t bits, const int16_t* const src, |
| 61 uint8_t* const dst) { |
| 62 if (bits & 0xff) { // any non-zero coeff at all? |
| 63 if (bits & 0xaa) { // any non-zero AC coefficient? |
| 64 VP8TransformUV(src, dst); // note we don't use the AC3 variant for U/V |
| 65 } else { |
| 66 VP8TransformDCUV(src, dst); |
| 67 } |
| 68 } |
| 69 } |
19 | 70 |
20 static void ReconstructRow(const VP8Decoder* const dec, | 71 static void ReconstructRow(const VP8Decoder* const dec, |
21 const VP8ThreadContext* ctx); // TODO(skal): remove | 72 const VP8ThreadContext* ctx) { |
| 73 int j; |
| 74 int mb_x; |
| 75 const int mb_y = ctx->mb_y_; |
| 76 const int cache_id = ctx->id_; |
| 77 uint8_t* const y_dst = dec->yuv_b_ + Y_OFF; |
| 78 uint8_t* const u_dst = dec->yuv_b_ + U_OFF; |
| 79 uint8_t* const v_dst = dec->yuv_b_ + V_OFF; |
| 80 |
| 81 // Initialize left-most block. |
| 82 for (j = 0; j < 16; ++j) { |
| 83 y_dst[j * BPS - 1] = 129; |
| 84 } |
| 85 for (j = 0; j < 8; ++j) { |
| 86 u_dst[j * BPS - 1] = 129; |
| 87 v_dst[j * BPS - 1] = 129; |
| 88 } |
| 89 |
| 90 // Init top-left sample on left column too. |
| 91 if (mb_y > 0) { |
| 92 y_dst[-1 - BPS] = u_dst[-1 - BPS] = v_dst[-1 - BPS] = 129; |
| 93 } else { |
| 94 // we only need to do this init once at block (0,0). |
| 95 // Afterward, it remains valid for the whole topmost row. |
| 96 memset(y_dst - BPS - 1, 127, 16 + 4 + 1); |
| 97 memset(u_dst - BPS - 1, 127, 8 + 1); |
| 98 memset(v_dst - BPS - 1, 127, 8 + 1); |
| 99 } |
| 100 |
| 101 // Reconstruct one row. |
| 102 for (mb_x = 0; mb_x < dec->mb_w_; ++mb_x) { |
| 103 const VP8MBData* const block = ctx->mb_data_ + mb_x; |
| 104 |
| 105 // Rotate in the left samples from previously decoded block. We move four |
| 106 // pixels at a time for alignment reason, and because of in-loop filter. |
| 107 if (mb_x > 0) { |
| 108 for (j = -1; j < 16; ++j) { |
| 109 Copy32b(&y_dst[j * BPS - 4], &y_dst[j * BPS + 12]); |
| 110 } |
| 111 for (j = -1; j < 8; ++j) { |
| 112 Copy32b(&u_dst[j * BPS - 4], &u_dst[j * BPS + 4]); |
| 113 Copy32b(&v_dst[j * BPS - 4], &v_dst[j * BPS + 4]); |
| 114 } |
| 115 } |
| 116 { |
| 117 // bring top samples into the cache |
| 118 VP8TopSamples* const top_yuv = dec->yuv_t_ + mb_x; |
| 119 const int16_t* const coeffs = block->coeffs_; |
| 120 uint32_t bits = block->non_zero_y_; |
| 121 int n; |
| 122 |
| 123 if (mb_y > 0) { |
| 124 memcpy(y_dst - BPS, top_yuv[0].y, 16); |
| 125 memcpy(u_dst - BPS, top_yuv[0].u, 8); |
| 126 memcpy(v_dst - BPS, top_yuv[0].v, 8); |
| 127 } |
| 128 |
| 129 // predict and add residuals |
| 130 if (block->is_i4x4_) { // 4x4 |
| 131 uint32_t* const top_right = (uint32_t*)(y_dst - BPS + 16); |
| 132 |
| 133 if (mb_y > 0) { |
| 134 if (mb_x >= dec->mb_w_ - 1) { // on rightmost border |
| 135 memset(top_right, top_yuv[0].y[15], sizeof(*top_right)); |
| 136 } else { |
| 137 memcpy(top_right, top_yuv[1].y, sizeof(*top_right)); |
| 138 } |
| 139 } |
| 140 // replicate the top-right pixels below |
| 141 top_right[BPS] = top_right[2 * BPS] = top_right[3 * BPS] = top_right[0]; |
| 142 |
| 143 // predict and add residuals for all 4x4 blocks in turn. |
| 144 for (n = 0; n < 16; ++n, bits <<= 2) { |
| 145 uint8_t* const dst = y_dst + kScan[n]; |
| 146 VP8PredLuma4[block->imodes_[n]](dst); |
| 147 DoTransform(bits, coeffs + n * 16, dst); |
| 148 } |
| 149 } else { // 16x16 |
| 150 const int pred_func = CheckMode(mb_x, mb_y, block->imodes_[0]); |
| 151 VP8PredLuma16[pred_func](y_dst); |
| 152 if (bits != 0) { |
| 153 for (n = 0; n < 16; ++n, bits <<= 2) { |
| 154 DoTransform(bits, coeffs + n * 16, y_dst + kScan[n]); |
| 155 } |
| 156 } |
| 157 } |
| 158 { |
| 159 // Chroma |
| 160 const uint32_t bits_uv = block->non_zero_uv_; |
| 161 const int pred_func = CheckMode(mb_x, mb_y, block->uvmode_); |
| 162 VP8PredChroma8[pred_func](u_dst); |
| 163 VP8PredChroma8[pred_func](v_dst); |
| 164 DoUVTransform(bits_uv >> 0, coeffs + 16 * 16, u_dst); |
| 165 DoUVTransform(bits_uv >> 8, coeffs + 20 * 16, v_dst); |
| 166 } |
| 167 |
| 168 // stash away top samples for next block |
| 169 if (mb_y < dec->mb_h_ - 1) { |
| 170 memcpy(top_yuv[0].y, y_dst + 15 * BPS, 16); |
| 171 memcpy(top_yuv[0].u, u_dst + 7 * BPS, 8); |
| 172 memcpy(top_yuv[0].v, v_dst + 7 * BPS, 8); |
| 173 } |
| 174 } |
| 175 // Transfer reconstructed samples from yuv_b_ cache to final destination. |
| 176 { |
| 177 const int y_offset = cache_id * 16 * dec->cache_y_stride_; |
| 178 const int uv_offset = cache_id * 8 * dec->cache_uv_stride_; |
| 179 uint8_t* const y_out = dec->cache_y_ + mb_x * 16 + y_offset; |
| 180 uint8_t* const u_out = dec->cache_u_ + mb_x * 8 + uv_offset; |
| 181 uint8_t* const v_out = dec->cache_v_ + mb_x * 8 + uv_offset; |
| 182 for (j = 0; j < 16; ++j) { |
| 183 memcpy(y_out + j * dec->cache_y_stride_, y_dst + j * BPS, 16); |
| 184 } |
| 185 for (j = 0; j < 8; ++j) { |
| 186 memcpy(u_out + j * dec->cache_uv_stride_, u_dst + j * BPS, 8); |
| 187 memcpy(v_out + j * dec->cache_uv_stride_, v_dst + j * BPS, 8); |
| 188 } |
| 189 } |
| 190 } |
| 191 } |
22 | 192 |
23 //------------------------------------------------------------------------------ | 193 //------------------------------------------------------------------------------ |
24 // Filtering | 194 // Filtering |
25 | 195 |
26 // kFilterExtraRows[] = How many extra lines are needed on the MB boundary | 196 // kFilterExtraRows[] = How many extra lines are needed on the MB boundary |
27 // for caching, given a filtering level. | 197 // for caching, given a filtering level. |
28 // Simple filter: up to 2 luma samples are read and 1 is written. | 198 // Simple filter: up to 2 luma samples are read and 1 is written. |
29 // Complex filter: up to 4 luma samples are read and 3 are written. Same for | 199 // Complex filter: up to 4 luma samples are read and 3 are written. Same for |
30 // U/V, so it's 8 samples total (because of the 2x upsampling). | 200 // U/V, so it's 8 samples total (because of the 2x upsampling). |
31 static const uint8_t kFilterExtraRows[3] = { 0, 2, 8 }; | 201 static const uint8_t kFilterExtraRows[3] = { 0, 2, 8 }; |
(...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
105 if (!dec->segment_hdr_.absolute_delta_) { | 275 if (!dec->segment_hdr_.absolute_delta_) { |
106 base_level += hdr->level_; | 276 base_level += hdr->level_; |
107 } | 277 } |
108 } else { | 278 } else { |
109 base_level = hdr->level_; | 279 base_level = hdr->level_; |
110 } | 280 } |
111 for (i4x4 = 0; i4x4 <= 1; ++i4x4) { | 281 for (i4x4 = 0; i4x4 <= 1; ++i4x4) { |
112 VP8FInfo* const info = &dec->fstrengths_[s][i4x4]; | 282 VP8FInfo* const info = &dec->fstrengths_[s][i4x4]; |
113 int level = base_level; | 283 int level = base_level; |
114 if (hdr->use_lf_delta_) { | 284 if (hdr->use_lf_delta_) { |
115 // TODO(skal): only CURRENT is handled for now. | |
116 level += hdr->ref_lf_delta_[0]; | 285 level += hdr->ref_lf_delta_[0]; |
117 if (i4x4) { | 286 if (i4x4) { |
118 level += hdr->mode_lf_delta_[0]; | 287 level += hdr->mode_lf_delta_[0]; |
119 } | 288 } |
120 } | 289 } |
121 level = (level < 0) ? 0 : (level > 63) ? 63 : level; | 290 level = (level < 0) ? 0 : (level > 63) ? 63 : level; |
122 if (level > 0) { | 291 if (level > 0) { |
123 int ilevel = level; | 292 int ilevel = level; |
124 if (hdr->sharpness_ > 0) { | 293 if (hdr->sharpness_ > 0) { |
125 if (hdr->sharpness_ > 4) { | 294 if (hdr->sharpness_ > 4) { |
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
170 const int idx = (dqm->uv_quant_ < 0) ? 0 : dqm->uv_quant_; | 339 const int idx = (dqm->uv_quant_ < 0) ? 0 : dqm->uv_quant_; |
171 dqm->dither_ = (f * kQuantToDitherAmp[idx]) >> 3; | 340 dqm->dither_ = (f * kQuantToDitherAmp[idx]) >> 3; |
172 } | 341 } |
173 all_amp |= dqm->dither_; | 342 all_amp |= dqm->dither_; |
174 } | 343 } |
175 if (all_amp != 0) { | 344 if (all_amp != 0) { |
176 VP8InitRandom(&dec->dithering_rg_, 1.0f); | 345 VP8InitRandom(&dec->dithering_rg_, 1.0f); |
177 dec->dither_ = 1; | 346 dec->dither_ = 1; |
178 } | 347 } |
179 } | 348 } |
180 #if WEBP_DECODER_ABI_VERSION > 0x0204 | |
181 // potentially allow alpha dithering | 349 // potentially allow alpha dithering |
182 dec->alpha_dithering_ = options->alpha_dithering_strength; | 350 dec->alpha_dithering_ = options->alpha_dithering_strength; |
183 if (dec->alpha_dithering_ > 100) { | 351 if (dec->alpha_dithering_ > 100) { |
184 dec->alpha_dithering_ = 100; | 352 dec->alpha_dithering_ = 100; |
185 } else if (dec->alpha_dithering_ < 0) { | 353 } else if (dec->alpha_dithering_ < 0) { |
186 dec->alpha_dithering_ = 0; | 354 dec->alpha_dithering_ = 0; |
187 } | 355 } |
188 #endif | |
189 } | 356 } |
190 } | 357 } |
191 | 358 |
192 // minimal amp that will provide a non-zero dithering effect | 359 // minimal amp that will provide a non-zero dithering effect |
193 #define MIN_DITHER_AMP 4 | 360 #define MIN_DITHER_AMP 4 |
194 #define DITHER_DESCALE 4 | 361 #define DITHER_DESCALE 4 |
195 #define DITHER_DESCALE_ROUNDER (1 << (DITHER_DESCALE - 1)) | 362 #define DITHER_DESCALE_ROUNDER (1 << (DITHER_DESCALE - 1)) |
196 #define DITHER_AMP_BITS 8 | 363 #define DITHER_AMP_BITS 8 |
197 #define DITHER_AMP_CENTER (1 << DITHER_AMP_BITS) | 364 #define DITHER_AMP_CENTER (1 << DITHER_AMP_BITS) |
198 | 365 |
(...skipping 348 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
547 (dec->mt_method_ == 2 ? 2 : 1) * mb_w * sizeof(*dec->mb_data_); | 714 (dec->mt_method_ == 2 ? 2 : 1) * mb_w * sizeof(*dec->mb_data_); |
548 const size_t cache_height = (16 * num_caches | 715 const size_t cache_height = (16 * num_caches |
549 + kFilterExtraRows[dec->filter_type_]) * 3 / 2; | 716 + kFilterExtraRows[dec->filter_type_]) * 3 / 2; |
550 const size_t cache_size = top_size * cache_height; | 717 const size_t cache_size = top_size * cache_height; |
551 // alpha_size is the only one that scales as width x height. | 718 // alpha_size is the only one that scales as width x height. |
552 const uint64_t alpha_size = (dec->alpha_data_ != NULL) ? | 719 const uint64_t alpha_size = (dec->alpha_data_ != NULL) ? |
553 (uint64_t)dec->pic_hdr_.width_ * dec->pic_hdr_.height_ : 0ULL; | 720 (uint64_t)dec->pic_hdr_.width_ * dec->pic_hdr_.height_ : 0ULL; |
554 const uint64_t needed = (uint64_t)intra_pred_mode_size | 721 const uint64_t needed = (uint64_t)intra_pred_mode_size |
555 + top_size + mb_info_size + f_info_size | 722 + top_size + mb_info_size + f_info_size |
556 + yuv_size + mb_data_size | 723 + yuv_size + mb_data_size |
557 + cache_size + alpha_size + ALIGN_MASK; | 724 + cache_size + alpha_size + WEBP_ALIGN_CST; |
558 uint8_t* mem; | 725 uint8_t* mem; |
559 | 726 |
560 if (needed != (size_t)needed) return 0; // check for overflow | 727 if (needed != (size_t)needed) return 0; // check for overflow |
561 if (needed > dec->mem_size_) { | 728 if (needed > dec->mem_size_) { |
562 WebPSafeFree(dec->mem_); | 729 WebPSafeFree(dec->mem_); |
563 dec->mem_size_ = 0; | 730 dec->mem_size_ = 0; |
564 dec->mem_ = WebPSafeMalloc(needed, sizeof(uint8_t)); | 731 dec->mem_ = WebPSafeMalloc(needed, sizeof(uint8_t)); |
565 if (dec->mem_ == NULL) { | 732 if (dec->mem_ == NULL) { |
566 return VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY, | 733 return VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY, |
567 "no memory during frame initialization."); | 734 "no memory during frame initialization."); |
(...skipping 16 matching lines...) Expand all Loading... |
584 mem += f_info_size; | 751 mem += f_info_size; |
585 dec->thread_ctx_.id_ = 0; | 752 dec->thread_ctx_.id_ = 0; |
586 dec->thread_ctx_.f_info_ = dec->f_info_; | 753 dec->thread_ctx_.f_info_ = dec->f_info_; |
587 if (dec->mt_method_ > 0) { | 754 if (dec->mt_method_ > 0) { |
588 // secondary cache line. The deblocking process need to make use of the | 755 // secondary cache line. The deblocking process need to make use of the |
589 // filtering strength from previous macroblock row, while the new ones | 756 // filtering strength from previous macroblock row, while the new ones |
590 // are being decoded in parallel. We'll just swap the pointers. | 757 // are being decoded in parallel. We'll just swap the pointers. |
591 dec->thread_ctx_.f_info_ += mb_w; | 758 dec->thread_ctx_.f_info_ += mb_w; |
592 } | 759 } |
593 | 760 |
594 mem = (uint8_t*)((uintptr_t)(mem + ALIGN_MASK) & ~ALIGN_MASK); | 761 mem = (uint8_t*)WEBP_ALIGN(mem); |
595 assert((yuv_size & ALIGN_MASK) == 0); | 762 assert((yuv_size & WEBP_ALIGN_CST) == 0); |
596 dec->yuv_b_ = (uint8_t*)mem; | 763 dec->yuv_b_ = (uint8_t*)mem; |
597 mem += yuv_size; | 764 mem += yuv_size; |
598 | 765 |
599 dec->mb_data_ = (VP8MBData*)mem; | 766 dec->mb_data_ = (VP8MBData*)mem; |
600 dec->thread_ctx_.mb_data_ = (VP8MBData*)mem; | 767 dec->thread_ctx_.mb_data_ = (VP8MBData*)mem; |
601 if (dec->mt_method_ == 2) { | 768 if (dec->mt_method_ == 2) { |
602 dec->thread_ctx_.mb_data_ += mb_w; | 769 dec->thread_ctx_.mb_data_ += mb_w; |
603 } | 770 } |
604 mem += mb_data_size; | 771 mem += mb_data_size; |
605 | 772 |
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
637 // prepare 'io' | 804 // prepare 'io' |
638 io->mb_y = 0; | 805 io->mb_y = 0; |
639 io->y = dec->cache_y_; | 806 io->y = dec->cache_y_; |
640 io->u = dec->cache_u_; | 807 io->u = dec->cache_u_; |
641 io->v = dec->cache_v_; | 808 io->v = dec->cache_v_; |
642 io->y_stride = dec->cache_y_stride_; | 809 io->y_stride = dec->cache_y_stride_; |
643 io->uv_stride = dec->cache_uv_stride_; | 810 io->uv_stride = dec->cache_uv_stride_; |
644 io->a = NULL; | 811 io->a = NULL; |
645 } | 812 } |
646 | 813 |
647 int VP8InitFrame(VP8Decoder* const dec, VP8Io* io) { | 814 int VP8InitFrame(VP8Decoder* const dec, VP8Io* const io) { |
648 if (!InitThreadContext(dec)) return 0; // call first. Sets dec->num_caches_. | 815 if (!InitThreadContext(dec)) return 0; // call first. Sets dec->num_caches_. |
649 if (!AllocateMemory(dec)) return 0; | 816 if (!AllocateMemory(dec)) return 0; |
650 InitIo(dec, io); | 817 InitIo(dec, io); |
651 VP8DspInit(); // Init critical function pointers and look-up tables. | 818 VP8DspInit(); // Init critical function pointers and look-up tables. |
652 return 1; | 819 return 1; |
653 } | 820 } |
654 | 821 |
655 //------------------------------------------------------------------------------ | 822 //------------------------------------------------------------------------------ |
656 // Main reconstruction function. | |
657 | |
658 static const int kScan[16] = { | |
659 0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS, | |
660 0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS, | |
661 0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS, | |
662 0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS | |
663 }; | |
664 | |
665 static int CheckMode(int mb_x, int mb_y, int mode) { | |
666 if (mode == B_DC_PRED) { | |
667 if (mb_x == 0) { | |
668 return (mb_y == 0) ? B_DC_PRED_NOTOPLEFT : B_DC_PRED_NOLEFT; | |
669 } else { | |
670 return (mb_y == 0) ? B_DC_PRED_NOTOP : B_DC_PRED; | |
671 } | |
672 } | |
673 return mode; | |
674 } | |
675 | |
676 static void Copy32b(uint8_t* dst, uint8_t* src) { | |
677 memcpy(dst, src, 4); | |
678 } | |
679 | |
680 static WEBP_INLINE void DoTransform(uint32_t bits, const int16_t* const src, | |
681 uint8_t* const dst) { | |
682 switch (bits >> 30) { | |
683 case 3: | |
684 VP8Transform(src, dst, 0); | |
685 break; | |
686 case 2: | |
687 VP8TransformAC3(src, dst); | |
688 break; | |
689 case 1: | |
690 VP8TransformDC(src, dst); | |
691 break; | |
692 default: | |
693 break; | |
694 } | |
695 } | |
696 | |
697 static void DoUVTransform(uint32_t bits, const int16_t* const src, | |
698 uint8_t* const dst) { | |
699 if (bits & 0xff) { // any non-zero coeff at all? | |
700 if (bits & 0xaa) { // any non-zero AC coefficient? | |
701 VP8TransformUV(src, dst); // note we don't use the AC3 variant for U/V | |
702 } else { | |
703 VP8TransformDCUV(src, dst); | |
704 } | |
705 } | |
706 } | |
707 | |
708 static void ReconstructRow(const VP8Decoder* const dec, | |
709 const VP8ThreadContext* ctx) { | |
710 int j; | |
711 int mb_x; | |
712 const int mb_y = ctx->mb_y_; | |
713 const int cache_id = ctx->id_; | |
714 uint8_t* const y_dst = dec->yuv_b_ + Y_OFF; | |
715 uint8_t* const u_dst = dec->yuv_b_ + U_OFF; | |
716 uint8_t* const v_dst = dec->yuv_b_ + V_OFF; | |
717 for (mb_x = 0; mb_x < dec->mb_w_; ++mb_x) { | |
718 const VP8MBData* const block = ctx->mb_data_ + mb_x; | |
719 | |
720 // Rotate in the left samples from previously decoded block. We move four | |
721 // pixels at a time for alignment reason, and because of in-loop filter. | |
722 if (mb_x > 0) { | |
723 for (j = -1; j < 16; ++j) { | |
724 Copy32b(&y_dst[j * BPS - 4], &y_dst[j * BPS + 12]); | |
725 } | |
726 for (j = -1; j < 8; ++j) { | |
727 Copy32b(&u_dst[j * BPS - 4], &u_dst[j * BPS + 4]); | |
728 Copy32b(&v_dst[j * BPS - 4], &v_dst[j * BPS + 4]); | |
729 } | |
730 } else { | |
731 for (j = 0; j < 16; ++j) { | |
732 y_dst[j * BPS - 1] = 129; | |
733 } | |
734 for (j = 0; j < 8; ++j) { | |
735 u_dst[j * BPS - 1] = 129; | |
736 v_dst[j * BPS - 1] = 129; | |
737 } | |
738 // Init top-left sample on left column too | |
739 if (mb_y > 0) { | |
740 y_dst[-1 - BPS] = u_dst[-1 - BPS] = v_dst[-1 - BPS] = 129; | |
741 } | |
742 } | |
743 { | |
744 // bring top samples into the cache | |
745 VP8TopSamples* const top_yuv = dec->yuv_t_ + mb_x; | |
746 const int16_t* const coeffs = block->coeffs_; | |
747 uint32_t bits = block->non_zero_y_; | |
748 int n; | |
749 | |
750 if (mb_y > 0) { | |
751 memcpy(y_dst - BPS, top_yuv[0].y, 16); | |
752 memcpy(u_dst - BPS, top_yuv[0].u, 8); | |
753 memcpy(v_dst - BPS, top_yuv[0].v, 8); | |
754 } else if (mb_x == 0) { | |
755 // we only need to do this init once at block (0,0). | |
756 // Afterward, it remains valid for the whole topmost row. | |
757 memset(y_dst - BPS - 1, 127, 16 + 4 + 1); | |
758 memset(u_dst - BPS - 1, 127, 8 + 1); | |
759 memset(v_dst - BPS - 1, 127, 8 + 1); | |
760 } | |
761 | |
762 // predict and add residuals | |
763 if (block->is_i4x4_) { // 4x4 | |
764 uint32_t* const top_right = (uint32_t*)(y_dst - BPS + 16); | |
765 | |
766 if (mb_y > 0) { | |
767 if (mb_x >= dec->mb_w_ - 1) { // on rightmost border | |
768 memset(top_right, top_yuv[0].y[15], sizeof(*top_right)); | |
769 } else { | |
770 memcpy(top_right, top_yuv[1].y, sizeof(*top_right)); | |
771 } | |
772 } | |
773 // replicate the top-right pixels below | |
774 top_right[BPS] = top_right[2 * BPS] = top_right[3 * BPS] = top_right[0]; | |
775 | |
776 // predict and add residuals for all 4x4 blocks in turn. | |
777 for (n = 0; n < 16; ++n, bits <<= 2) { | |
778 uint8_t* const dst = y_dst + kScan[n]; | |
779 VP8PredLuma4[block->imodes_[n]](dst); | |
780 DoTransform(bits, coeffs + n * 16, dst); | |
781 } | |
782 } else { // 16x16 | |
783 const int pred_func = CheckMode(mb_x, mb_y, | |
784 block->imodes_[0]); | |
785 VP8PredLuma16[pred_func](y_dst); | |
786 if (bits != 0) { | |
787 for (n = 0; n < 16; ++n, bits <<= 2) { | |
788 DoTransform(bits, coeffs + n * 16, y_dst + kScan[n]); | |
789 } | |
790 } | |
791 } | |
792 { | |
793 // Chroma | |
794 const uint32_t bits_uv = block->non_zero_uv_; | |
795 const int pred_func = CheckMode(mb_x, mb_y, block->uvmode_); | |
796 VP8PredChroma8[pred_func](u_dst); | |
797 VP8PredChroma8[pred_func](v_dst); | |
798 DoUVTransform(bits_uv >> 0, coeffs + 16 * 16, u_dst); | |
799 DoUVTransform(bits_uv >> 8, coeffs + 20 * 16, v_dst); | |
800 } | |
801 | |
802 // stash away top samples for next block | |
803 if (mb_y < dec->mb_h_ - 1) { | |
804 memcpy(top_yuv[0].y, y_dst + 15 * BPS, 16); | |
805 memcpy(top_yuv[0].u, u_dst + 7 * BPS, 8); | |
806 memcpy(top_yuv[0].v, v_dst + 7 * BPS, 8); | |
807 } | |
808 } | |
809 // Transfer reconstructed samples from yuv_b_ cache to final destination. | |
810 { | |
811 const int y_offset = cache_id * 16 * dec->cache_y_stride_; | |
812 const int uv_offset = cache_id * 8 * dec->cache_uv_stride_; | |
813 uint8_t* const y_out = dec->cache_y_ + mb_x * 16 + y_offset; | |
814 uint8_t* const u_out = dec->cache_u_ + mb_x * 8 + uv_offset; | |
815 uint8_t* const v_out = dec->cache_v_ + mb_x * 8 + uv_offset; | |
816 for (j = 0; j < 16; ++j) { | |
817 memcpy(y_out + j * dec->cache_y_stride_, y_dst + j * BPS, 16); | |
818 } | |
819 for (j = 0; j < 8; ++j) { | |
820 memcpy(u_out + j * dec->cache_uv_stride_, u_dst + j * BPS, 8); | |
821 memcpy(v_out + j * dec->cache_uv_stride_, v_dst + j * BPS, 8); | |
822 } | |
823 } | |
824 } | |
825 } | |
826 | |
827 //------------------------------------------------------------------------------ | |
828 | |
OLD | NEW |