Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(150)

Side by Side Diff: third_party/libwebp/dec/frame.c

Issue 1546003002: libwebp: update to 0.5.0 (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: rebase around clang-cl fix Created 4 years, 12 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2010 Google Inc. All Rights Reserved. 1 // Copyright 2010 Google Inc. All Rights Reserved.
2 // 2 //
3 // Use of this source code is governed by a BSD-style license 3 // Use of this source code is governed by a BSD-style license
4 // that can be found in the COPYING file in the root of the source 4 // that can be found in the COPYING file in the root of the source
5 // tree. An additional intellectual property rights grant can be found 5 // tree. An additional intellectual property rights grant can be found
6 // in the file PATENTS. All contributing project authors may 6 // in the file PATENTS. All contributing project authors may
7 // be found in the AUTHORS file in the root of the source tree. 7 // be found in the AUTHORS file in the root of the source tree.
8 // ----------------------------------------------------------------------------- 8 // -----------------------------------------------------------------------------
9 // 9 //
10 // Frame-reconstruction function. Memory allocation. 10 // Frame-reconstruction function. Memory allocation.
11 // 11 //
12 // Author: Skal (pascal.massimino@gmail.com) 12 // Author: Skal (pascal.massimino@gmail.com)
13 13
14 #include <stdlib.h> 14 #include <stdlib.h>
15 #include "./vp8i.h" 15 #include "./vp8i.h"
16 #include "../utils/utils.h" 16 #include "../utils/utils.h"
17 17
18 #define ALIGN_MASK (32 - 1) 18 //------------------------------------------------------------------------------
19 // Main reconstruction function.
20
21 static const int kScan[16] = {
22 0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS,
23 0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS,
24 0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS,
25 0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS
26 };
27
28 static int CheckMode(int mb_x, int mb_y, int mode) {
29 if (mode == B_DC_PRED) {
30 if (mb_x == 0) {
31 return (mb_y == 0) ? B_DC_PRED_NOTOPLEFT : B_DC_PRED_NOLEFT;
32 } else {
33 return (mb_y == 0) ? B_DC_PRED_NOTOP : B_DC_PRED;
34 }
35 }
36 return mode;
37 }
38
39 static void Copy32b(uint8_t* const dst, const uint8_t* const src) {
40 memcpy(dst, src, 4);
41 }
42
43 static WEBP_INLINE void DoTransform(uint32_t bits, const int16_t* const src,
44 uint8_t* const dst) {
45 switch (bits >> 30) {
46 case 3:
47 VP8Transform(src, dst, 0);
48 break;
49 case 2:
50 VP8TransformAC3(src, dst);
51 break;
52 case 1:
53 VP8TransformDC(src, dst);
54 break;
55 default:
56 break;
57 }
58 }
59
60 static void DoUVTransform(uint32_t bits, const int16_t* const src,
61 uint8_t* const dst) {
62 if (bits & 0xff) { // any non-zero coeff at all?
63 if (bits & 0xaa) { // any non-zero AC coefficient?
64 VP8TransformUV(src, dst); // note we don't use the AC3 variant for U/V
65 } else {
66 VP8TransformDCUV(src, dst);
67 }
68 }
69 }
19 70
20 static void ReconstructRow(const VP8Decoder* const dec, 71 static void ReconstructRow(const VP8Decoder* const dec,
21 const VP8ThreadContext* ctx); // TODO(skal): remove 72 const VP8ThreadContext* ctx) {
73 int j;
74 int mb_x;
75 const int mb_y = ctx->mb_y_;
76 const int cache_id = ctx->id_;
77 uint8_t* const y_dst = dec->yuv_b_ + Y_OFF;
78 uint8_t* const u_dst = dec->yuv_b_ + U_OFF;
79 uint8_t* const v_dst = dec->yuv_b_ + V_OFF;
80
81 // Initialize left-most block.
82 for (j = 0; j < 16; ++j) {
83 y_dst[j * BPS - 1] = 129;
84 }
85 for (j = 0; j < 8; ++j) {
86 u_dst[j * BPS - 1] = 129;
87 v_dst[j * BPS - 1] = 129;
88 }
89
90 // Init top-left sample on left column too.
91 if (mb_y > 0) {
92 y_dst[-1 - BPS] = u_dst[-1 - BPS] = v_dst[-1 - BPS] = 129;
93 } else {
94 // we only need to do this init once at block (0,0).
95 // Afterward, it remains valid for the whole topmost row.
96 memset(y_dst - BPS - 1, 127, 16 + 4 + 1);
97 memset(u_dst - BPS - 1, 127, 8 + 1);
98 memset(v_dst - BPS - 1, 127, 8 + 1);
99 }
100
101 // Reconstruct one row.
102 for (mb_x = 0; mb_x < dec->mb_w_; ++mb_x) {
103 const VP8MBData* const block = ctx->mb_data_ + mb_x;
104
105 // Rotate in the left samples from previously decoded block. We move four
106 // pixels at a time for alignment reason, and because of in-loop filter.
107 if (mb_x > 0) {
108 for (j = -1; j < 16; ++j) {
109 Copy32b(&y_dst[j * BPS - 4], &y_dst[j * BPS + 12]);
110 }
111 for (j = -1; j < 8; ++j) {
112 Copy32b(&u_dst[j * BPS - 4], &u_dst[j * BPS + 4]);
113 Copy32b(&v_dst[j * BPS - 4], &v_dst[j * BPS + 4]);
114 }
115 }
116 {
117 // bring top samples into the cache
118 VP8TopSamples* const top_yuv = dec->yuv_t_ + mb_x;
119 const int16_t* const coeffs = block->coeffs_;
120 uint32_t bits = block->non_zero_y_;
121 int n;
122
123 if (mb_y > 0) {
124 memcpy(y_dst - BPS, top_yuv[0].y, 16);
125 memcpy(u_dst - BPS, top_yuv[0].u, 8);
126 memcpy(v_dst - BPS, top_yuv[0].v, 8);
127 }
128
129 // predict and add residuals
130 if (block->is_i4x4_) { // 4x4
131 uint32_t* const top_right = (uint32_t*)(y_dst - BPS + 16);
132
133 if (mb_y > 0) {
134 if (mb_x >= dec->mb_w_ - 1) { // on rightmost border
135 memset(top_right, top_yuv[0].y[15], sizeof(*top_right));
136 } else {
137 memcpy(top_right, top_yuv[1].y, sizeof(*top_right));
138 }
139 }
140 // replicate the top-right pixels below
141 top_right[BPS] = top_right[2 * BPS] = top_right[3 * BPS] = top_right[0];
142
143 // predict and add residuals for all 4x4 blocks in turn.
144 for (n = 0; n < 16; ++n, bits <<= 2) {
145 uint8_t* const dst = y_dst + kScan[n];
146 VP8PredLuma4[block->imodes_[n]](dst);
147 DoTransform(bits, coeffs + n * 16, dst);
148 }
149 } else { // 16x16
150 const int pred_func = CheckMode(mb_x, mb_y, block->imodes_[0]);
151 VP8PredLuma16[pred_func](y_dst);
152 if (bits != 0) {
153 for (n = 0; n < 16; ++n, bits <<= 2) {
154 DoTransform(bits, coeffs + n * 16, y_dst + kScan[n]);
155 }
156 }
157 }
158 {
159 // Chroma
160 const uint32_t bits_uv = block->non_zero_uv_;
161 const int pred_func = CheckMode(mb_x, mb_y, block->uvmode_);
162 VP8PredChroma8[pred_func](u_dst);
163 VP8PredChroma8[pred_func](v_dst);
164 DoUVTransform(bits_uv >> 0, coeffs + 16 * 16, u_dst);
165 DoUVTransform(bits_uv >> 8, coeffs + 20 * 16, v_dst);
166 }
167
168 // stash away top samples for next block
169 if (mb_y < dec->mb_h_ - 1) {
170 memcpy(top_yuv[0].y, y_dst + 15 * BPS, 16);
171 memcpy(top_yuv[0].u, u_dst + 7 * BPS, 8);
172 memcpy(top_yuv[0].v, v_dst + 7 * BPS, 8);
173 }
174 }
175 // Transfer reconstructed samples from yuv_b_ cache to final destination.
176 {
177 const int y_offset = cache_id * 16 * dec->cache_y_stride_;
178 const int uv_offset = cache_id * 8 * dec->cache_uv_stride_;
179 uint8_t* const y_out = dec->cache_y_ + mb_x * 16 + y_offset;
180 uint8_t* const u_out = dec->cache_u_ + mb_x * 8 + uv_offset;
181 uint8_t* const v_out = dec->cache_v_ + mb_x * 8 + uv_offset;
182 for (j = 0; j < 16; ++j) {
183 memcpy(y_out + j * dec->cache_y_stride_, y_dst + j * BPS, 16);
184 }
185 for (j = 0; j < 8; ++j) {
186 memcpy(u_out + j * dec->cache_uv_stride_, u_dst + j * BPS, 8);
187 memcpy(v_out + j * dec->cache_uv_stride_, v_dst + j * BPS, 8);
188 }
189 }
190 }
191 }
22 192
23 //------------------------------------------------------------------------------ 193 //------------------------------------------------------------------------------
24 // Filtering 194 // Filtering
25 195
26 // kFilterExtraRows[] = How many extra lines are needed on the MB boundary 196 // kFilterExtraRows[] = How many extra lines are needed on the MB boundary
27 // for caching, given a filtering level. 197 // for caching, given a filtering level.
28 // Simple filter: up to 2 luma samples are read and 1 is written. 198 // Simple filter: up to 2 luma samples are read and 1 is written.
29 // Complex filter: up to 4 luma samples are read and 3 are written. Same for 199 // Complex filter: up to 4 luma samples are read and 3 are written. Same for
30 // U/V, so it's 8 samples total (because of the 2x upsampling). 200 // U/V, so it's 8 samples total (because of the 2x upsampling).
31 static const uint8_t kFilterExtraRows[3] = { 0, 2, 8 }; 201 static const uint8_t kFilterExtraRows[3] = { 0, 2, 8 };
(...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after
105 if (!dec->segment_hdr_.absolute_delta_) { 275 if (!dec->segment_hdr_.absolute_delta_) {
106 base_level += hdr->level_; 276 base_level += hdr->level_;
107 } 277 }
108 } else { 278 } else {
109 base_level = hdr->level_; 279 base_level = hdr->level_;
110 } 280 }
111 for (i4x4 = 0; i4x4 <= 1; ++i4x4) { 281 for (i4x4 = 0; i4x4 <= 1; ++i4x4) {
112 VP8FInfo* const info = &dec->fstrengths_[s][i4x4]; 282 VP8FInfo* const info = &dec->fstrengths_[s][i4x4];
113 int level = base_level; 283 int level = base_level;
114 if (hdr->use_lf_delta_) { 284 if (hdr->use_lf_delta_) {
115 // TODO(skal): only CURRENT is handled for now.
116 level += hdr->ref_lf_delta_[0]; 285 level += hdr->ref_lf_delta_[0];
117 if (i4x4) { 286 if (i4x4) {
118 level += hdr->mode_lf_delta_[0]; 287 level += hdr->mode_lf_delta_[0];
119 } 288 }
120 } 289 }
121 level = (level < 0) ? 0 : (level > 63) ? 63 : level; 290 level = (level < 0) ? 0 : (level > 63) ? 63 : level;
122 if (level > 0) { 291 if (level > 0) {
123 int ilevel = level; 292 int ilevel = level;
124 if (hdr->sharpness_ > 0) { 293 if (hdr->sharpness_ > 0) {
125 if (hdr->sharpness_ > 4) { 294 if (hdr->sharpness_ > 4) {
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after
170 const int idx = (dqm->uv_quant_ < 0) ? 0 : dqm->uv_quant_; 339 const int idx = (dqm->uv_quant_ < 0) ? 0 : dqm->uv_quant_;
171 dqm->dither_ = (f * kQuantToDitherAmp[idx]) >> 3; 340 dqm->dither_ = (f * kQuantToDitherAmp[idx]) >> 3;
172 } 341 }
173 all_amp |= dqm->dither_; 342 all_amp |= dqm->dither_;
174 } 343 }
175 if (all_amp != 0) { 344 if (all_amp != 0) {
176 VP8InitRandom(&dec->dithering_rg_, 1.0f); 345 VP8InitRandom(&dec->dithering_rg_, 1.0f);
177 dec->dither_ = 1; 346 dec->dither_ = 1;
178 } 347 }
179 } 348 }
180 #if WEBP_DECODER_ABI_VERSION > 0x0204
181 // potentially allow alpha dithering 349 // potentially allow alpha dithering
182 dec->alpha_dithering_ = options->alpha_dithering_strength; 350 dec->alpha_dithering_ = options->alpha_dithering_strength;
183 if (dec->alpha_dithering_ > 100) { 351 if (dec->alpha_dithering_ > 100) {
184 dec->alpha_dithering_ = 100; 352 dec->alpha_dithering_ = 100;
185 } else if (dec->alpha_dithering_ < 0) { 353 } else if (dec->alpha_dithering_ < 0) {
186 dec->alpha_dithering_ = 0; 354 dec->alpha_dithering_ = 0;
187 } 355 }
188 #endif
189 } 356 }
190 } 357 }
191 358
192 // minimal amp that will provide a non-zero dithering effect 359 // minimal amp that will provide a non-zero dithering effect
193 #define MIN_DITHER_AMP 4 360 #define MIN_DITHER_AMP 4
194 #define DITHER_DESCALE 4 361 #define DITHER_DESCALE 4
195 #define DITHER_DESCALE_ROUNDER (1 << (DITHER_DESCALE - 1)) 362 #define DITHER_DESCALE_ROUNDER (1 << (DITHER_DESCALE - 1))
196 #define DITHER_AMP_BITS 8 363 #define DITHER_AMP_BITS 8
197 #define DITHER_AMP_CENTER (1 << DITHER_AMP_BITS) 364 #define DITHER_AMP_CENTER (1 << DITHER_AMP_BITS)
198 365
(...skipping 348 matching lines...) Expand 10 before | Expand all | Expand 10 after
547 (dec->mt_method_ == 2 ? 2 : 1) * mb_w * sizeof(*dec->mb_data_); 714 (dec->mt_method_ == 2 ? 2 : 1) * mb_w * sizeof(*dec->mb_data_);
548 const size_t cache_height = (16 * num_caches 715 const size_t cache_height = (16 * num_caches
549 + kFilterExtraRows[dec->filter_type_]) * 3 / 2; 716 + kFilterExtraRows[dec->filter_type_]) * 3 / 2;
550 const size_t cache_size = top_size * cache_height; 717 const size_t cache_size = top_size * cache_height;
551 // alpha_size is the only one that scales as width x height. 718 // alpha_size is the only one that scales as width x height.
552 const uint64_t alpha_size = (dec->alpha_data_ != NULL) ? 719 const uint64_t alpha_size = (dec->alpha_data_ != NULL) ?
553 (uint64_t)dec->pic_hdr_.width_ * dec->pic_hdr_.height_ : 0ULL; 720 (uint64_t)dec->pic_hdr_.width_ * dec->pic_hdr_.height_ : 0ULL;
554 const uint64_t needed = (uint64_t)intra_pred_mode_size 721 const uint64_t needed = (uint64_t)intra_pred_mode_size
555 + top_size + mb_info_size + f_info_size 722 + top_size + mb_info_size + f_info_size
556 + yuv_size + mb_data_size 723 + yuv_size + mb_data_size
557 + cache_size + alpha_size + ALIGN_MASK; 724 + cache_size + alpha_size + WEBP_ALIGN_CST;
558 uint8_t* mem; 725 uint8_t* mem;
559 726
560 if (needed != (size_t)needed) return 0; // check for overflow 727 if (needed != (size_t)needed) return 0; // check for overflow
561 if (needed > dec->mem_size_) { 728 if (needed > dec->mem_size_) {
562 WebPSafeFree(dec->mem_); 729 WebPSafeFree(dec->mem_);
563 dec->mem_size_ = 0; 730 dec->mem_size_ = 0;
564 dec->mem_ = WebPSafeMalloc(needed, sizeof(uint8_t)); 731 dec->mem_ = WebPSafeMalloc(needed, sizeof(uint8_t));
565 if (dec->mem_ == NULL) { 732 if (dec->mem_ == NULL) {
566 return VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY, 733 return VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY,
567 "no memory during frame initialization."); 734 "no memory during frame initialization.");
(...skipping 16 matching lines...) Expand all
584 mem += f_info_size; 751 mem += f_info_size;
585 dec->thread_ctx_.id_ = 0; 752 dec->thread_ctx_.id_ = 0;
586 dec->thread_ctx_.f_info_ = dec->f_info_; 753 dec->thread_ctx_.f_info_ = dec->f_info_;
587 if (dec->mt_method_ > 0) { 754 if (dec->mt_method_ > 0) {
588 // secondary cache line. The deblocking process need to make use of the 755 // secondary cache line. The deblocking process need to make use of the
589 // filtering strength from previous macroblock row, while the new ones 756 // filtering strength from previous macroblock row, while the new ones
590 // are being decoded in parallel. We'll just swap the pointers. 757 // are being decoded in parallel. We'll just swap the pointers.
591 dec->thread_ctx_.f_info_ += mb_w; 758 dec->thread_ctx_.f_info_ += mb_w;
592 } 759 }
593 760
594 mem = (uint8_t*)((uintptr_t)(mem + ALIGN_MASK) & ~ALIGN_MASK); 761 mem = (uint8_t*)WEBP_ALIGN(mem);
595 assert((yuv_size & ALIGN_MASK) == 0); 762 assert((yuv_size & WEBP_ALIGN_CST) == 0);
596 dec->yuv_b_ = (uint8_t*)mem; 763 dec->yuv_b_ = (uint8_t*)mem;
597 mem += yuv_size; 764 mem += yuv_size;
598 765
599 dec->mb_data_ = (VP8MBData*)mem; 766 dec->mb_data_ = (VP8MBData*)mem;
600 dec->thread_ctx_.mb_data_ = (VP8MBData*)mem; 767 dec->thread_ctx_.mb_data_ = (VP8MBData*)mem;
601 if (dec->mt_method_ == 2) { 768 if (dec->mt_method_ == 2) {
602 dec->thread_ctx_.mb_data_ += mb_w; 769 dec->thread_ctx_.mb_data_ += mb_w;
603 } 770 }
604 mem += mb_data_size; 771 mem += mb_data_size;
605 772
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
637 // prepare 'io' 804 // prepare 'io'
638 io->mb_y = 0; 805 io->mb_y = 0;
639 io->y = dec->cache_y_; 806 io->y = dec->cache_y_;
640 io->u = dec->cache_u_; 807 io->u = dec->cache_u_;
641 io->v = dec->cache_v_; 808 io->v = dec->cache_v_;
642 io->y_stride = dec->cache_y_stride_; 809 io->y_stride = dec->cache_y_stride_;
643 io->uv_stride = dec->cache_uv_stride_; 810 io->uv_stride = dec->cache_uv_stride_;
644 io->a = NULL; 811 io->a = NULL;
645 } 812 }
646 813
647 int VP8InitFrame(VP8Decoder* const dec, VP8Io* io) { 814 int VP8InitFrame(VP8Decoder* const dec, VP8Io* const io) {
648 if (!InitThreadContext(dec)) return 0; // call first. Sets dec->num_caches_. 815 if (!InitThreadContext(dec)) return 0; // call first. Sets dec->num_caches_.
649 if (!AllocateMemory(dec)) return 0; 816 if (!AllocateMemory(dec)) return 0;
650 InitIo(dec, io); 817 InitIo(dec, io);
651 VP8DspInit(); // Init critical function pointers and look-up tables. 818 VP8DspInit(); // Init critical function pointers and look-up tables.
652 return 1; 819 return 1;
653 } 820 }
654 821
655 //------------------------------------------------------------------------------ 822 //------------------------------------------------------------------------------
656 // Main reconstruction function.
657
658 static const int kScan[16] = {
659 0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS,
660 0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS,
661 0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS,
662 0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS
663 };
664
665 static int CheckMode(int mb_x, int mb_y, int mode) {
666 if (mode == B_DC_PRED) {
667 if (mb_x == 0) {
668 return (mb_y == 0) ? B_DC_PRED_NOTOPLEFT : B_DC_PRED_NOLEFT;
669 } else {
670 return (mb_y == 0) ? B_DC_PRED_NOTOP : B_DC_PRED;
671 }
672 }
673 return mode;
674 }
675
676 static void Copy32b(uint8_t* dst, uint8_t* src) {
677 memcpy(dst, src, 4);
678 }
679
680 static WEBP_INLINE void DoTransform(uint32_t bits, const int16_t* const src,
681 uint8_t* const dst) {
682 switch (bits >> 30) {
683 case 3:
684 VP8Transform(src, dst, 0);
685 break;
686 case 2:
687 VP8TransformAC3(src, dst);
688 break;
689 case 1:
690 VP8TransformDC(src, dst);
691 break;
692 default:
693 break;
694 }
695 }
696
697 static void DoUVTransform(uint32_t bits, const int16_t* const src,
698 uint8_t* const dst) {
699 if (bits & 0xff) { // any non-zero coeff at all?
700 if (bits & 0xaa) { // any non-zero AC coefficient?
701 VP8TransformUV(src, dst); // note we don't use the AC3 variant for U/V
702 } else {
703 VP8TransformDCUV(src, dst);
704 }
705 }
706 }
707
708 static void ReconstructRow(const VP8Decoder* const dec,
709 const VP8ThreadContext* ctx) {
710 int j;
711 int mb_x;
712 const int mb_y = ctx->mb_y_;
713 const int cache_id = ctx->id_;
714 uint8_t* const y_dst = dec->yuv_b_ + Y_OFF;
715 uint8_t* const u_dst = dec->yuv_b_ + U_OFF;
716 uint8_t* const v_dst = dec->yuv_b_ + V_OFF;
717 for (mb_x = 0; mb_x < dec->mb_w_; ++mb_x) {
718 const VP8MBData* const block = ctx->mb_data_ + mb_x;
719
720 // Rotate in the left samples from previously decoded block. We move four
721 // pixels at a time for alignment reason, and because of in-loop filter.
722 if (mb_x > 0) {
723 for (j = -1; j < 16; ++j) {
724 Copy32b(&y_dst[j * BPS - 4], &y_dst[j * BPS + 12]);
725 }
726 for (j = -1; j < 8; ++j) {
727 Copy32b(&u_dst[j * BPS - 4], &u_dst[j * BPS + 4]);
728 Copy32b(&v_dst[j * BPS - 4], &v_dst[j * BPS + 4]);
729 }
730 } else {
731 for (j = 0; j < 16; ++j) {
732 y_dst[j * BPS - 1] = 129;
733 }
734 for (j = 0; j < 8; ++j) {
735 u_dst[j * BPS - 1] = 129;
736 v_dst[j * BPS - 1] = 129;
737 }
738 // Init top-left sample on left column too
739 if (mb_y > 0) {
740 y_dst[-1 - BPS] = u_dst[-1 - BPS] = v_dst[-1 - BPS] = 129;
741 }
742 }
743 {
744 // bring top samples into the cache
745 VP8TopSamples* const top_yuv = dec->yuv_t_ + mb_x;
746 const int16_t* const coeffs = block->coeffs_;
747 uint32_t bits = block->non_zero_y_;
748 int n;
749
750 if (mb_y > 0) {
751 memcpy(y_dst - BPS, top_yuv[0].y, 16);
752 memcpy(u_dst - BPS, top_yuv[0].u, 8);
753 memcpy(v_dst - BPS, top_yuv[0].v, 8);
754 } else if (mb_x == 0) {
755 // we only need to do this init once at block (0,0).
756 // Afterward, it remains valid for the whole topmost row.
757 memset(y_dst - BPS - 1, 127, 16 + 4 + 1);
758 memset(u_dst - BPS - 1, 127, 8 + 1);
759 memset(v_dst - BPS - 1, 127, 8 + 1);
760 }
761
762 // predict and add residuals
763 if (block->is_i4x4_) { // 4x4
764 uint32_t* const top_right = (uint32_t*)(y_dst - BPS + 16);
765
766 if (mb_y > 0) {
767 if (mb_x >= dec->mb_w_ - 1) { // on rightmost border
768 memset(top_right, top_yuv[0].y[15], sizeof(*top_right));
769 } else {
770 memcpy(top_right, top_yuv[1].y, sizeof(*top_right));
771 }
772 }
773 // replicate the top-right pixels below
774 top_right[BPS] = top_right[2 * BPS] = top_right[3 * BPS] = top_right[0];
775
776 // predict and add residuals for all 4x4 blocks in turn.
777 for (n = 0; n < 16; ++n, bits <<= 2) {
778 uint8_t* const dst = y_dst + kScan[n];
779 VP8PredLuma4[block->imodes_[n]](dst);
780 DoTransform(bits, coeffs + n * 16, dst);
781 }
782 } else { // 16x16
783 const int pred_func = CheckMode(mb_x, mb_y,
784 block->imodes_[0]);
785 VP8PredLuma16[pred_func](y_dst);
786 if (bits != 0) {
787 for (n = 0; n < 16; ++n, bits <<= 2) {
788 DoTransform(bits, coeffs + n * 16, y_dst + kScan[n]);
789 }
790 }
791 }
792 {
793 // Chroma
794 const uint32_t bits_uv = block->non_zero_uv_;
795 const int pred_func = CheckMode(mb_x, mb_y, block->uvmode_);
796 VP8PredChroma8[pred_func](u_dst);
797 VP8PredChroma8[pred_func](v_dst);
798 DoUVTransform(bits_uv >> 0, coeffs + 16 * 16, u_dst);
799 DoUVTransform(bits_uv >> 8, coeffs + 20 * 16, v_dst);
800 }
801
802 // stash away top samples for next block
803 if (mb_y < dec->mb_h_ - 1) {
804 memcpy(top_yuv[0].y, y_dst + 15 * BPS, 16);
805 memcpy(top_yuv[0].u, u_dst + 7 * BPS, 8);
806 memcpy(top_yuv[0].v, v_dst + 7 * BPS, 8);
807 }
808 }
809 // Transfer reconstructed samples from yuv_b_ cache to final destination.
810 {
811 const int y_offset = cache_id * 16 * dec->cache_y_stride_;
812 const int uv_offset = cache_id * 8 * dec->cache_uv_stride_;
813 uint8_t* const y_out = dec->cache_y_ + mb_x * 16 + y_offset;
814 uint8_t* const u_out = dec->cache_u_ + mb_x * 8 + uv_offset;
815 uint8_t* const v_out = dec->cache_v_ + mb_x * 8 + uv_offset;
816 for (j = 0; j < 16; ++j) {
817 memcpy(y_out + j * dec->cache_y_stride_, y_dst + j * BPS, 16);
818 }
819 for (j = 0; j < 8; ++j) {
820 memcpy(u_out + j * dec->cache_uv_stride_, u_dst + j * BPS, 8);
821 memcpy(v_out + j * dec->cache_uv_stride_, v_dst + j * BPS, 8);
822 }
823 }
824 }
825 }
826
827 //------------------------------------------------------------------------------
828
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698