third_party/libwebp/dec/frame.c - Issue 116213006: Update libwebp to 0.4.0

Side by Side Diff: third_party/libwebp/dec/frame.c

Issue 116213006: Update libwebp to 0.4.0 (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: After Blink Roll Created 6 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2010 Google Inc. All Rights Reserved.	1 // Copyright 2010 Google Inc. All Rights Reserved.

2 //	2 //

3 // Use of this source code is governed by a BSD-style license	3 // Use of this source code is governed by a BSD-style license

4 // that can be found in the COPYING file in the root of the source	4 // that can be found in the COPYING file in the root of the source

5 // tree. An additional intellectual property rights grant can be found	5 // tree. An additional intellectual property rights grant can be found

6 // in the file PATENTS. All contributing project authors may	6 // in the file PATENTS. All contributing project authors may

7 // be found in the AUTHORS file in the root of the source tree.	7 // be found in the AUTHORS file in the root of the source tree.

8 // -----------------------------------------------------------------------------	8 // -----------------------------------------------------------------------------

9 //	9 //

10 // Frame-reconstruction function. Memory allocation.	10 // Frame-reconstruction function. Memory allocation.

11 //	11 //

12 // Author: Skal (pascal.massimino@gmail.com)	12 // Author: Skal (pascal.massimino@gmail.com)

13	13

14 #include <stdlib.h>	14 #include <stdlib.h>

15 #include "./vp8i.h"	15 #include "./vp8i.h"

16 #include "../utils/utils.h"	16 #include "../utils/utils.h"

17	17

18 #if defined(__cplusplus) \|\| defined(c_plusplus)	18 #define ALIGN_MASK (32 - 1)

19 extern "C" {

20 #endif

21	19

22 #define ALIGN_MASK (32 - 1)	20 static void ReconstructRow(const VP8Decoder* const dec,

	21 const VP8ThreadContext* ctx); // TODO(skal): remove

23	22

24 //------------------------------------------------------------------------------	23 //------------------------------------------------------------------------------

25 // Filtering	24 // Filtering

26	25

27 // kFilterExtraRows[] = How many extra lines are needed on the MB boundary	26 // kFilterExtraRows[] = How many extra lines are needed on the MB boundary

28 // for caching, given a filtering level.	27 // for caching, given a filtering level.

29 // Simple filter: up to 2 luma samples are read and 1 is written.	28 // Simple filter: up to 2 luma samples are read and 1 is written.

30 // Complex filter: up to 4 luma samples are read and 3 are written. Same for	29 // Complex filter: up to 4 luma samples are read and 3 are written. Same for

31 // U/V, so it's 8 samples total (because of the 2x upsampling).	30 // U/V, so it's 8 samples total (because of the 2x upsampling).

32 static const uint8_t kFilterExtraRows[3] = { 0, 2, 8 };	31 static const uint8_t kFilterExtraRows[3] = { 0, 2, 8 };

33	32

34 static WEBP_INLINE int hev_thresh_from_level(int level, int keyframe) {

35 if (keyframe) {

36 return (level >= 40) ? 2 : (level >= 15) ? 1 : 0;

37 } else {

38 return (level >= 40) ? 3 : (level >= 20) ? 2 : (level >= 15) ? 1 : 0;

39 }

40 }

41

42 static void DoFilter(const VP8Decoder* const dec, int mb_x, int mb_y) {	33 static void DoFilter(const VP8Decoder* const dec, int mb_x, int mb_y) {

43 const VP8ThreadContext* const ctx = &dec->thread_ctx_;	34 const VP8ThreadContext* const ctx = &dec->thread_ctx_;

	35 const int cache_id = ctx->id_;

44 const int y_bps = dec->cache_y_stride_;	36 const int y_bps = dec->cache_y_stride_;

45 VP8FInfo* const f_info = ctx->f_info_ + mb_x;	37 const VP8FInfo* const f_info = ctx->f_info_ + mb_x;

46 uint8_t* const y_dst = dec->cache_y_ + ctx->id_ * 16 * y_bps + mb_x * 16;	38 uint8_t* const y_dst = dec->cache_y_ + cache_id * 16 * y_bps + mb_x * 16;

47 const int level = f_info->f_level_;

48 const int ilevel = f_info->f_ilevel_;	39 const int ilevel = f_info->f_ilevel_;

49 const int limit = 2 * level + ilevel;	40 const int limit = f_info->f_limit_;

50 if (level == 0) {	41 if (limit == 0) {

51 return;	42 return;

52 }	43 }

	44 assert(limit >= 3);

53 if (dec->filter_type_ == 1) { // simple	45 if (dec->filter_type_ == 1) { // simple

54 if (mb_x > 0) {	46 if (mb_x > 0) {

55 VP8SimpleHFilter16(y_dst, y_bps, limit + 4);	47 VP8SimpleHFilter16(y_dst, y_bps, limit + 4);

56 }	48 }

57 if (f_info->f_inner_) {	49 if (f_info->f_inner_) {

58 VP8SimpleHFilter16i(y_dst, y_bps, limit);	50 VP8SimpleHFilter16i(y_dst, y_bps, limit);

59 }	51 }

60 if (mb_y > 0) {	52 if (mb_y > 0) {

61 VP8SimpleVFilter16(y_dst, y_bps, limit + 4);	53 VP8SimpleVFilter16(y_dst, y_bps, limit + 4);

62 }	54 }

63 if (f_info->f_inner_) {	55 if (f_info->f_inner_) {

64 VP8SimpleVFilter16i(y_dst, y_bps, limit);	56 VP8SimpleVFilter16i(y_dst, y_bps, limit);

65 }	57 }

66 } else { // complex	58 } else { // complex

67 const int uv_bps = dec->cache_uv_stride_;	59 const int uv_bps = dec->cache_uv_stride_;

68 uint8_t* const u_dst = dec->cache_u_ + ctx->id_ * 8 * uv_bps + mb_x * 8;	60 uint8_t* const u_dst = dec->cache_u_ + cache_id * 8 * uv_bps + mb_x * 8;

69 uint8_t* const v_dst = dec->cache_v_ + ctx->id_ * 8 * uv_bps + mb_x * 8;	61 uint8_t* const v_dst = dec->cache_v_ + cache_id * 8 * uv_bps + mb_x * 8;

70 const int hev_thresh =	62 const int hev_thresh = f_info->hev_thresh_;

71 hev_thresh_from_level(level, dec->frm_hdr_.key_frame_);

72 if (mb_x > 0) {	63 if (mb_x > 0) {

73 VP8HFilter16(y_dst, y_bps, limit + 4, ilevel, hev_thresh);	64 VP8HFilter16(y_dst, y_bps, limit + 4, ilevel, hev_thresh);

74 VP8HFilter8(u_dst, v_dst, uv_bps, limit + 4, ilevel, hev_thresh);	65 VP8HFilter8(u_dst, v_dst, uv_bps, limit + 4, ilevel, hev_thresh);

75 }	66 }

76 if (f_info->f_inner_) {	67 if (f_info->f_inner_) {

77 VP8HFilter16i(y_dst, y_bps, limit, ilevel, hev_thresh);	68 VP8HFilter16i(y_dst, y_bps, limit, ilevel, hev_thresh);

78 VP8HFilter8i(u_dst, v_dst, uv_bps, limit, ilevel, hev_thresh);	69 VP8HFilter8i(u_dst, v_dst, uv_bps, limit, ilevel, hev_thresh);

79 }	70 }

80 if (mb_y > 0) {	71 if (mb_y > 0) {

81 VP8VFilter16(y_dst, y_bps, limit + 4, ilevel, hev_thresh);	72 VP8VFilter16(y_dst, y_bps, limit + 4, ilevel, hev_thresh);

(...skipping 39 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
121 VP8FInfo* const info = &dec->fstrengths_[s][i4x4];	112 VP8FInfo* const info = &dec->fstrengths_[s][i4x4];

122 int level = base_level;	113 int level = base_level;

123 if (hdr->use_lf_delta_) {	114 if (hdr->use_lf_delta_) {

124 // TODO(skal): only CURRENT is handled for now.	115 // TODO(skal): only CURRENT is handled for now.

125 level += hdr->ref_lf_delta_[0];	116 level += hdr->ref_lf_delta_[0];

126 if (i4x4) {	117 if (i4x4) {

127 level += hdr->mode_lf_delta_[0];	118 level += hdr->mode_lf_delta_[0];

128 }	119 }

129 }	120 }

130 level = (level < 0) ? 0 : (level > 63) ? 63 : level;	121 level = (level < 0) ? 0 : (level > 63) ? 63 : level;

131 info->f_level_ = level;	122 if (level > 0) {

	123 int ilevel = level;

	124 if (hdr->sharpness_ > 0) {

	125 if (hdr->sharpness_ > 4) {

	126 ilevel >>= 2;

	127 } else {

	128 ilevel >>= 1;

	129 }

	130 if (ilevel > 9 - hdr->sharpness_) {

	131 ilevel = 9 - hdr->sharpness_;

	132 }

	133 }

	134 if (ilevel < 1) ilevel = 1;

	135 info->f_ilevel_ = ilevel;

	136 info->f_limit_ = 2 * level + ilevel;

	137 info->hev_thresh_ = (level >= 40) ? 2 : (level >= 15) ? 1 : 0;

	138 } else {

	139 info->f_limit_ = 0; // no filtering

	140 }

	141 info->f_inner_ = i4x4;

	142 }

	143 }

	144 }

	145 }

132	146

133 if (hdr->sharpness_ > 0) {	147 //------------------------------------------------------------------------------

134 if (hdr->sharpness_ > 4) {	148 // Dithering

135 level >>= 2;	149

136 } else {	150 #define DITHER_AMP_TAB_SIZE 12

137 level >>= 1;	151 static const int kQuantToDitherAmp[DITHER_AMP_TAB_SIZE] = {

138 }	152 // roughly, it's dqm->uv_mat_[1]

139 if (level > 9 - hdr->sharpness_) {	153 8, 7, 6, 4, 4, 2, 2, 2, 1, 1, 1, 1

140 level = 9 - hdr->sharpness_;	154 };

141 }	155

	156 void VP8InitDithering(const WebPDecoderOptions* const options,

	157 VP8Decoder* const dec) {

	158 assert(dec != NULL);

	159 if (options != NULL) {

	160 const int d = options->dithering_strength;

	161 const int max_amp = (1 << VP8_RANDOM_DITHER_FIX) - 1;

	162 const int f = (d < 0) ? 0 : (d > 100) ? max_amp : (d * max_amp / 100);

	163 if (f > 0) {

	164 int s;

	165 int all_amp = 0;

	166 for (s = 0; s < NUM_MB_SEGMENTS; ++s) {

	167 VP8QuantMatrix* const dqm = &dec->dqm_[s];

	168 if (dqm->uv_quant_ < DITHER_AMP_TAB_SIZE) {

	169 // TODO(skal): should we specially dither more for uv_quant_ < 0?

	170 const int idx = (dqm->uv_quant_ < 0) ? 0 : dqm->uv_quant_;

	171 dqm->dither_ = (f * kQuantToDitherAmp[idx]) >> 3;

142 }	172 }

143 info->f_ilevel_ = (level < 1) ? 1 : level;	173 all_amp \|= dqm->dither_;

144 info->f_inner_ = 0;

145 }	174 }

	175 if (all_amp != 0) {

	176 VP8InitRandom(&dec->dithering_rg_, 1.0f);

	177 dec->dither_ = 1;

	178 }

	179 }

	180 }

	181 }

	182

	183 // minimal amp that will provide a non-zero dithering effect

	184 #define MIN_DITHER_AMP 4

	185 #define DITHER_DESCALE 4

	186 #define DITHER_DESCALE_ROUNDER (1 << (DITHER_DESCALE - 1))

	187 #define DITHER_AMP_BITS 8

	188 #define DITHER_AMP_CENTER (1 << DITHER_AMP_BITS)

	189

	190 static void Dither8x8(VP8Random* const rg, uint8_t* dst, int bps, int amp) {

	191 int i, j;

	192 for (j = 0; j < 8; ++j) {

	193 for (i = 0; i < 8; ++i) {

	194 // TODO: could be made faster with SSE2

	195 const int bits =

	196 VP8RandomBits2(rg, DITHER_AMP_BITS + 1, amp) - DITHER_AMP_CENTER;

	197 // Convert to range: [-2,2] for dither=50, [-4,4] for dither=100

	198 const int delta = (bits + DITHER_DESCALE_ROUNDER) >> DITHER_DESCALE;

	199 const int v = (int)dst[i] + delta;

	200 dst[i] = (v < 0) ? 0 : (v > 255) ? 255u : (uint8_t)v;

	201 }

	202 dst += bps;

	203 }

	204 }

	205

	206 static void DitherRow(VP8Decoder* const dec) {

	207 int mb_x;

	208 assert(dec->dither_);

	209 for (mb_x = dec->tl_mb_x_; mb_x < dec->br_mb_x_; ++mb_x) {

	210 const VP8ThreadContext* const ctx = &dec->thread_ctx_;

	211 const VP8MBData* const data = ctx->mb_data_ + mb_x;

	212 const int cache_id = ctx->id_;

	213 const int uv_bps = dec->cache_uv_stride_;

	214 if (data->dither_ >= MIN_DITHER_AMP) {

	215 uint8_t* const u_dst = dec->cache_u_ + cache_id * 8 * uv_bps + mb_x * 8;

	216 uint8_t* const v_dst = dec->cache_v_ + cache_id * 8 * uv_bps + mb_x * 8;

	217 Dither8x8(&dec->dithering_rg_, u_dst, uv_bps, data->dither_);

	218 Dither8x8(&dec->dithering_rg_, v_dst, uv_bps, data->dither_);

146 }	219 }

147 }	220 }

148 }	221 }

149	222

150 //------------------------------------------------------------------------------	223 //------------------------------------------------------------------------------

151 // This function is called after a row of macroblocks is finished decoding.	224 // This function is called after a row of macroblocks is finished decoding.

152 // It also takes into account the following restrictions:	225 // It also takes into account the following restrictions:

153 // * In case of in-loop filtering, we must hold off sending some of the bottom	226 // * In case of in-loop filtering, we must hold off sending some of the bottom

154 // pixels as they are yet unfiltered. They will be when the next macroblock	227 // pixels as they are yet unfiltered. They will be when the next macroblock

155 // row is decoded. Meanwhile, we must preserve them by rotating them in the	228 // row is decoded. Meanwhile, we must preserve them by rotating them in the

156 // cache area. This doesn't hold for the very bottom row of the uncropped	229 // cache area. This doesn't hold for the very bottom row of the uncropped

157 // picture of course.	230 // picture of course.

158 // * we must clip the remaining pixels against the cropping area. The VP8Io	231 // * we must clip the remaining pixels against the cropping area. The VP8Io

159 // struct must have the following fields set correctly before calling put():	232 // struct must have the following fields set correctly before calling put():

160	233

161 #define MACROBLOCK_VPOS(mb_y) ((mb_y) * 16) // vertical position of a MB	234 #define MACROBLOCK_VPOS(mb_y) ((mb_y) * 16) // vertical position of a MB

162	235

163 // Finalize and transmit a complete row. Return false in case of user-abort.	236 // Finalize and transmit a complete row. Return false in case of user-abort.

164 static int FinishRow(VP8Decoder* const dec, VP8Io* const io) {	237 static int FinishRow(VP8Decoder* const dec, VP8Io* const io) {

165 int ok = 1;	238 int ok = 1;

166 const VP8ThreadContext* const ctx = &dec->thread_ctx_;	239 const VP8ThreadContext* const ctx = &dec->thread_ctx_;

	240 const int cache_id = ctx->id_;

167 const int extra_y_rows = kFilterExtraRows[dec->filter_type_];	241 const int extra_y_rows = kFilterExtraRows[dec->filter_type_];

168 const int ysize = extra_y_rows * dec->cache_y_stride_;	242 const int ysize = extra_y_rows * dec->cache_y_stride_;

169 const int uvsize = (extra_y_rows / 2) * dec->cache_uv_stride_;	243 const int uvsize = (extra_y_rows / 2) * dec->cache_uv_stride_;

170 const int y_offset = ctx->id_ * 16 * dec->cache_y_stride_;	244 const int y_offset = cache_id * 16 * dec->cache_y_stride_;

171 const int uv_offset = ctx->id_ * 8 * dec->cache_uv_stride_;	245 const int uv_offset = cache_id * 8 * dec->cache_uv_stride_;

172 uint8_t* const ydst = dec->cache_y_ - ysize + y_offset;	246 uint8_t* const ydst = dec->cache_y_ - ysize + y_offset;

173 uint8_t* const udst = dec->cache_u_ - uvsize + uv_offset;	247 uint8_t* const udst = dec->cache_u_ - uvsize + uv_offset;

174 uint8_t* const vdst = dec->cache_v_ - uvsize + uv_offset;	248 uint8_t* const vdst = dec->cache_v_ - uvsize + uv_offset;

175 const int first_row = (ctx->mb_y_ == 0);	249 const int mb_y = ctx->mb_y_;

176 const int last_row = (ctx->mb_y_ >= dec->br_mb_y_ - 1);	250 const int is_first_row = (mb_y == 0);

177 int y_start = MACROBLOCK_VPOS(ctx->mb_y_);	251 const int is_last_row = (mb_y >= dec->br_mb_y_ - 1);

178 int y_end = MACROBLOCK_VPOS(ctx->mb_y_ + 1);	252

	253 if (dec->mt_method_ == 2) {

	254 ReconstructRow(dec, ctx);

	255 }

179	256

180 if (ctx->filter_row_) {	257 if (ctx->filter_row_) {

181 FilterRow(dec);	258 FilterRow(dec);

182 }	259 }

183	260

184 if (io->put) {	261 if (dec->dither_) {

185 if (!first_row) {	262 DitherRow(dec);

	263 }

	264

	265 if (io->put != NULL) {

	266 int y_start = MACROBLOCK_VPOS(mb_y);

	267 int y_end = MACROBLOCK_VPOS(mb_y + 1);

	268 if (!is_first_row) {

186 y_start -= extra_y_rows;	269 y_start -= extra_y_rows;

187 io->y = ydst;	270 io->y = ydst;

188 io->u = udst;	271 io->u = udst;

189 io->v = vdst;	272 io->v = vdst;

190 } else {	273 } else {

191 io->y = dec->cache_y_ + y_offset;	274 io->y = dec->cache_y_ + y_offset;

192 io->u = dec->cache_u_ + uv_offset;	275 io->u = dec->cache_u_ + uv_offset;

193 io->v = dec->cache_v_ + uv_offset;	276 io->v = dec->cache_v_ + uv_offset;

194 }	277 }

195	278

196 if (!last_row) {	279 if (!is_last_row) {

197 y_end -= extra_y_rows;	280 y_end -= extra_y_rows;

198 }	281 }

199 if (y_end > io->crop_bottom) {	282 if (y_end > io->crop_bottom) {

200 y_end = io->crop_bottom; // make sure we don't overflow on last row.	283 y_end = io->crop_bottom; // make sure we don't overflow on last row.

201 }	284 }

202 io->a = NULL;	285 io->a = NULL;

203 if (dec->alpha_data_ != NULL && y_start < y_end) {	286 if (dec->alpha_data_ != NULL && y_start < y_end) {

204 // TODO(skal): several things to correct here:	287 // TODO(skal): testing presence of alpha with dec->alpha_data_ is not a

205 // * testing presence of alpha with dec->alpha_data_ is not a good idea	288 // good idea.

206 // * we're actually decompressing the full plane only once. It should be

207 // more obvious from signature.

208 // * we could free alpha_data_ right after this call, but we don't own.

209 io->a = VP8DecompressAlphaRows(dec, y_start, y_end - y_start);	289 io->a = VP8DecompressAlphaRows(dec, y_start, y_end - y_start);

210 if (io->a == NULL) {	290 if (io->a == NULL) {

211 return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR,	291 return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR,

212 "Could not decode alpha data.");	292 "Could not decode alpha data.");

213 }	293 }

214 }	294 }

215 if (y_start < io->crop_top) {	295 if (y_start < io->crop_top) {

216 const int delta_y = io->crop_top - y_start;	296 const int delta_y = io->crop_top - y_start;

217 y_start = io->crop_top;	297 y_start = io->crop_top;

218 assert(!(delta_y & 1));	298 assert(!(delta_y & 1));

(...skipping 11 matching lines...) Expand all Loading...
230 if (io->a != NULL) {	310 if (io->a != NULL) {

231 io->a += io->crop_left;	311 io->a += io->crop_left;

232 }	312 }

233 io->mb_y = y_start - io->crop_top;	313 io->mb_y = y_start - io->crop_top;

234 io->mb_w = io->crop_right - io->crop_left;	314 io->mb_w = io->crop_right - io->crop_left;

235 io->mb_h = y_end - y_start;	315 io->mb_h = y_end - y_start;

236 ok = io->put(io);	316 ok = io->put(io);

237 }	317 }

238 }	318 }

239 // rotate top samples if needed	319 // rotate top samples if needed

240 if (ctx->id_ + 1 == dec->num_caches_) {	320 if (cache_id + 1 == dec->num_caches_) {

241 if (!last_row) {	321 if (!is_last_row) {

242 memcpy(dec->cache_y_ - ysize, ydst + 16 * dec->cache_y_stride_, ysize);	322 memcpy(dec->cache_y_ - ysize, ydst + 16 * dec->cache_y_stride_, ysize);

243 memcpy(dec->cache_u_ - uvsize, udst + 8 * dec->cache_uv_stride_, uvsize);	323 memcpy(dec->cache_u_ - uvsize, udst + 8 * dec->cache_uv_stride_, uvsize);

244 memcpy(dec->cache_v_ - uvsize, vdst + 8 * dec->cache_uv_stride_, uvsize);	324 memcpy(dec->cache_v_ - uvsize, vdst + 8 * dec->cache_uv_stride_, uvsize);

245 }	325 }

246 }	326 }

247	327

248 return ok;	328 return ok;

249 }	329 }

250	330

251 #undef MACROBLOCK_VPOS	331 #undef MACROBLOCK_VPOS

252	332

253 //------------------------------------------------------------------------------	333 //------------------------------------------------------------------------------

254	334

255 int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io) {	335 int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io) {

256 int ok = 1;	336 int ok = 1;

257 VP8ThreadContext* const ctx = &dec->thread_ctx_;	337 VP8ThreadContext* const ctx = &dec->thread_ctx_;

258 if (!dec->use_threads_) {	338 const int filter_row =

	339 (dec->filter_type_ > 0) &&

	340 (dec->mb_y_ >= dec->tl_mb_y_) && (dec->mb_y_ <= dec->br_mb_y_);

	341 if (dec->mt_method_ == 0) {

259 // ctx->id_ and ctx->f_info_ are already set	342 // ctx->id_ and ctx->f_info_ are already set

260 ctx->mb_y_ = dec->mb_y_;	343 ctx->mb_y_ = dec->mb_y_;

261 ctx->filter_row_ = dec->filter_row_;	344 ctx->filter_row_ = filter_row;

	345 ReconstructRow(dec, ctx);

262 ok = FinishRow(dec, io);	346 ok = FinishRow(dec, io);

263 } else {	347 } else {

264 WebPWorker* const worker = &dec->worker_;	348 WebPWorker* const worker = &dec->worker_;

265 // Finish previous job before updating context	349 // Finish previous job before updating context

266 ok &= WebPWorkerSync(worker);	350 ok &= WebPWorkerSync(worker);

267 assert(worker->status_ == OK);	351 assert(worker->status_ == OK);

268 if (ok) { // spawn a new deblocking/output job	352 if (ok) { // spawn a new deblocking/output job

269 ctx->io_ = *io;	353 ctx->io_ = *io;

270 ctx->id_ = dec->cache_id_;	354 ctx->id_ = dec->cache_id_;

271 ctx->mb_y_ = dec->mb_y_;	355 ctx->mb_y_ = dec->mb_y_;

272 ctx->filter_row_ = dec->filter_row_;	356 ctx->filter_row_ = filter_row;

273 if (ctx->filter_row_) { // just swap filter info	357 if (dec->mt_method_ == 2) { // swap macroblock data

	358 VP8MBData* const tmp = ctx->mb_data_;

	359 ctx->mb_data_ = dec->mb_data_;

	360 dec->mb_data_ = tmp;

	361 } else {

	362 // perform reconstruction directly in main thread

	363 ReconstructRow(dec, ctx);

	364 }

	365 if (filter_row) { // swap filter info

274 VP8FInfo* const tmp = ctx->f_info_;	366 VP8FInfo* const tmp = ctx->f_info_;

275 ctx->f_info_ = dec->f_info_;	367 ctx->f_info_ = dec->f_info_;

276 dec->f_info_ = tmp;	368 dec->f_info_ = tmp;

277 }	369 }

278 WebPWorkerLaunch(worker);	370 WebPWorkerLaunch(worker); // (reconstruct)+filter in parallel

279 if (++dec->cache_id_ == dec->num_caches_) {	371 if (++dec->cache_id_ == dec->num_caches_) {

280 dec->cache_id_ = 0;	372 dec->cache_id_ = 0;

281 }	373 }

282 }	374 }

283 }	375 }

284 return ok;	376 return ok;

285 }	377 }

286	378

287 //------------------------------------------------------------------------------	379 //------------------------------------------------------------------------------

288 // Finish setting up the decoding parameter once user's setup() is called.	380 // Finish setting up the decoding parameter once user's setup() is called.

289	381

290 VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io) {	382 VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io) {

291 // Call setup() first. This may trigger additional decoding features on 'io'.	383 // Call setup() first. This may trigger additional decoding features on 'io'.

292 // Note: Afterward, we must call teardown() not matter what.	384 // Note: Afterward, we must call teardown() no matter what.

293 if (io->setup && !io->setup(io)) {	385 if (io->setup != NULL && !io->setup(io)) {

294 VP8SetError(dec, VP8_STATUS_USER_ABORT, "Frame setup failed");	386 VP8SetError(dec, VP8_STATUS_USER_ABORT, "Frame setup failed");

295 return dec->status_;	387 return dec->status_;

296 }	388 }

297	389

298 // Disable filtering per user request	390 // Disable filtering per user request

299 if (io->bypass_filtering) {	391 if (io->bypass_filtering) {

300 dec->filter_type_ = 0;	392 dec->filter_type_ = 0;

301 }	393 }

302 // TODO(skal): filter type / strength / sharpness forcing	394 // TODO(skal): filter type / strength / sharpness forcing

303	395

304 // Define the area where we can skip in-loop filtering, in case of cropping.	396 // Define the area where we can skip in-loop filtering, in case of cropping.

305 //	397 //

306 // 'Simple' filter reads two luma samples outside of the macroblock and	398 // 'Simple' filter reads two luma samples outside of the macroblock

307 // and filters one. It doesn't filter the chroma samples. Hence, we can	399 // and filters one. It doesn't filter the chroma samples. Hence, we can

308 // avoid doing the in-loop filtering before crop_top/crop_left position.	400 // avoid doing the in-loop filtering before crop_top/crop_left position.

309 // For the 'Complex' filter, 3 samples are read and up to 3 are filtered.	401 // For the 'Complex' filter, 3 samples are read and up to 3 are filtered.

310 // Means: there's a dependency chain that goes all the way up to the	402 // Means: there's a dependency chain that goes all the way up to the

311 // top-left corner of the picture (MB #0). We must filter all the previous	403 // top-left corner of the picture (MB #0). We must filter all the previous

312 // macroblocks.	404 // macroblocks.

313 // TODO(skal): add an 'approximate_decoding' option, that won't produce	405 // TODO(skal): add an 'approximate_decoding' option, that won't produce

314 // a 1:1 bit-exactness for complex filtering?	406 // a 1:1 bit-exactness for complex filtering?

315 {	407 {

316 const int extra_pixels = kFilterExtraRows[dec->filter_type_];	408 const int extra_pixels = kFilterExtraRows[dec->filter_type_];

(...skipping 20 matching lines...) Expand all Loading...
337 if (dec->br_mb_y_ > dec->mb_h_) {	429 if (dec->br_mb_y_ > dec->mb_h_) {

338 dec->br_mb_y_ = dec->mb_h_;	430 dec->br_mb_y_ = dec->mb_h_;

339 }	431 }

340 }	432 }

341 PrecomputeFilterStrengths(dec);	433 PrecomputeFilterStrengths(dec);

342 return VP8_STATUS_OK;	434 return VP8_STATUS_OK;

343 }	435 }

344	436

345 int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io) {	437 int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io) {

346 int ok = 1;	438 int ok = 1;

347 if (dec->use_threads_) {	439 if (dec->mt_method_ > 0) {

348 ok = WebPWorkerSync(&dec->worker_);	440 ok = WebPWorkerSync(&dec->worker_);

349 }	441 }

350	442

351 if (io->teardown) {	443 if (io->teardown != NULL) {

352 io->teardown(io);	444 io->teardown(io);

353 }	445 }

354 return ok;	446 return ok;

355 }	447 }

356	448

357 //------------------------------------------------------------------------------	449 //------------------------------------------------------------------------------

358 // For multi-threaded decoding we need to use 3 rows of 16 pixels as delay line.	450 // For multi-threaded decoding we need to use 3 rows of 16 pixels as delay line.

359 //	451 //

360 // Reason is: the deblocking filter cannot deblock the bottom horizontal edges	452 // Reason is: the deblocking filter cannot deblock the bottom horizontal edges

361 // immediately, and needs to wait for first few rows of the next macroblock to	453 // immediately, and needs to wait for first few rows of the next macroblock to

(...skipping 15 matching lines...) Expand all Loading...
377 // and output process have non-concurrent writing:	469 // and output process have non-concurrent writing:

378 // Decode: [ 0..15][16..31][ 0..15][16..31][...	470 // Decode: [ 0..15][16..31][ 0..15][16..31][...

379 // io->put: [ 0..15][16..31][ 0..15][...	471 // io->put: [ 0..15][16..31][ 0..15][...

380	472

381 #define MT_CACHE_LINES 3	473 #define MT_CACHE_LINES 3

382 #define ST_CACHE_LINES 1 // 1 cache row only for single-threaded case	474 #define ST_CACHE_LINES 1 // 1 cache row only for single-threaded case

383	475

384 // Initialize multi/single-thread worker	476 // Initialize multi/single-thread worker

385 static int InitThreadContext(VP8Decoder* const dec) {	477 static int InitThreadContext(VP8Decoder* const dec) {

386 dec->cache_id_ = 0;	478 dec->cache_id_ = 0;

387 if (dec->use_threads_) {	479 if (dec->mt_method_ > 0) {

388 WebPWorker* const worker = &dec->worker_;	480 WebPWorker* const worker = &dec->worker_;

389 if (!WebPWorkerReset(worker)) {	481 if (!WebPWorkerReset(worker)) {

390 return VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY,	482 return VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY,

391 "thread initialization failed.");	483 "thread initialization failed.");

392 }	484 }

393 worker->data1 = dec;	485 worker->data1 = dec;

394 worker->data2 = (void*)&dec->thread_ctx_.io_;	486 worker->data2 = (void*)&dec->thread_ctx_.io_;

395 worker->hook = (WebPWorkerHook)FinishRow;	487 worker->hook = (WebPWorkerHook)FinishRow;

396 dec->num_caches_ =	488 dec->num_caches_ =

397 (dec->filter_type_ > 0) ? MT_CACHE_LINES : MT_CACHE_LINES - 1;	489 (dec->filter_type_ > 0) ? MT_CACHE_LINES : MT_CACHE_LINES - 1;

398 } else {	490 } else {

399 dec->num_caches_ = ST_CACHE_LINES;	491 dec->num_caches_ = ST_CACHE_LINES;

400 }	492 }

401 return 1;	493 return 1;

402 }	494 }

403	495

	496 int VP8GetThreadMethod(const WebPDecoderOptions* const options,

	497 const WebPHeaderStructure* const headers,

	498 int width, int height) {

	499 if (options == NULL \|\| options->use_threads == 0) {

	500 return 0;

	501 }

	502 (void)headers;

	503 (void)width;

	504 (void)height;

	505 assert(!headers->is_lossless);

	506 #if defined(WEBP_USE_THREAD)

	507 if (width < MIN_WIDTH_FOR_THREADS) return 0;

	508 // TODO(skal): tune the heuristic further

	509 #if 0

	510 if (height < 2 * width) return 2;

	511 #endif

	512 return 2;

	513 #else // !WEBP_USE_THREAD

	514 return 0;

	515 #endif

	516 }

	517

404 #undef MT_CACHE_LINES	518 #undef MT_CACHE_LINES

405 #undef ST_CACHE_LINES	519 #undef ST_CACHE_LINES

406	520

407 //------------------------------------------------------------------------------	521 //------------------------------------------------------------------------------

408 // Memory setup	522 // Memory setup

409	523

410 static int AllocateMemory(VP8Decoder* const dec) {	524 static int AllocateMemory(VP8Decoder* const dec) {

411 const int num_caches = dec->num_caches_;	525 const int num_caches = dec->num_caches_;

412 const int mb_w = dec->mb_w_;	526 const int mb_w = dec->mb_w_;

413 // Note: we use 'size_t' when there's no overflow risk, uint64_t otherwise.	527 // Note: we use 'size_t' when there's no overflow risk, uint64_t otherwise.

414 const size_t intra_pred_mode_size = 4 * mb_w * sizeof(uint8_t);	528 const size_t intra_pred_mode_size = 4 * mb_w * sizeof(uint8_t);

415 const size_t top_size = (16 + 8 + 8) * mb_w;	529 const size_t top_size = sizeof(VP8TopSamples) * mb_w;

416 const size_t mb_info_size = (mb_w + 1) * sizeof(VP8MB);	530 const size_t mb_info_size = (mb_w + 1) * sizeof(VP8MB);

417 const size_t f_info_size =	531 const size_t f_info_size =

418 (dec->filter_type_ > 0) ?	532 (dec->filter_type_ > 0) ?

419 mb_w * (dec->use_threads_ ? 2 : 1) * sizeof(VP8FInfo)	533 mb_w * (dec->mt_method_ > 0 ? 2 : 1) * sizeof(VP8FInfo)

420 : 0;	534 : 0;

421 const size_t yuv_size = YUV_SIZE * sizeof(*dec->yuv_b_);	535 const size_t yuv_size = YUV_SIZE * sizeof(*dec->yuv_b_);

422 const size_t coeffs_size = 384 * sizeof(*dec->coeffs_);	536 const size_t mb_data_size =

	537 (dec->mt_method_ == 2 ? 2 : 1) * mb_w * sizeof(*dec->mb_data_);

423 const size_t cache_height = (16 * num_caches	538 const size_t cache_height = (16 * num_caches

424 + kFilterExtraRows[dec->filter_type_]) * 3 / 2;	539 + kFilterExtraRows[dec->filter_type_]) * 3 / 2;

425 const size_t cache_size = top_size * cache_height;	540 const size_t cache_size = top_size * cache_height;

426 // alpha_size is the only one that scales as width x height.	541 // alpha_size is the only one that scales as width x height.

427 const uint64_t alpha_size = (dec->alpha_data_ != NULL) ?	542 const uint64_t alpha_size = (dec->alpha_data_ != NULL) ?

428 (uint64_t)dec->pic_hdr_.width_ * dec->pic_hdr_.height_ : 0ULL;	543 (uint64_t)dec->pic_hdr_.width_ * dec->pic_hdr_.height_ : 0ULL;

429 const uint64_t needed = (uint64_t)intra_pred_mode_size	544 const uint64_t needed = (uint64_t)intra_pred_mode_size

430 + top_size + mb_info_size + f_info_size	545 + top_size + mb_info_size + f_info_size

431 + yuv_size + coeffs_size	546 + yuv_size + mb_data_size

432 + cache_size + alpha_size + ALIGN_MASK;	547 + cache_size + alpha_size + ALIGN_MASK;

433 uint8_t* mem;	548 uint8_t* mem;

434	549

435 if (needed != (size_t)needed) return 0; // check for overflow	550 if (needed != (size_t)needed) return 0; // check for overflow

436 if (needed > dec->mem_size_) {	551 if (needed > dec->mem_size_) {

437 free(dec->mem_);	552 free(dec->mem_);

438 dec->mem_size_ = 0;	553 dec->mem_size_ = 0;

439 dec->mem_ = WebPSafeMalloc(needed, sizeof(uint8_t));	554 dec->mem_ = WebPSafeMalloc(needed, sizeof(uint8_t));

440 if (dec->mem_ == NULL) {	555 if (dec->mem_ == NULL) {

441 return VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY,	556 return VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY,

442 "no memory during frame initialization.");	557 "no memory during frame initialization.");

443 }	558 }

444 // down-cast is ok, thanks to WebPSafeAlloc() above.	559 // down-cast is ok, thanks to WebPSafeAlloc() above.

445 dec->mem_size_ = (size_t)needed;	560 dec->mem_size_ = (size_t)needed;

446 }	561 }

447	562

448 mem = (uint8_t*)dec->mem_;	563 mem = (uint8_t*)dec->mem_;

449 dec->intra_t_ = (uint8_t*)mem;	564 dec->intra_t_ = (uint8_t*)mem;

450 mem += intra_pred_mode_size;	565 mem += intra_pred_mode_size;

451	566

452 dec->y_t_ = (uint8_t*)mem;	567 dec->yuv_t_ = (VP8TopSamples*)mem;

453 mem += 16 * mb_w;	568 mem += top_size;

454 dec->u_t_ = (uint8_t*)mem;

455 mem += 8 * mb_w;

456 dec->v_t_ = (uint8_t*)mem;

457 mem += 8 * mb_w;

458	569

459 dec->mb_info_ = ((VP8MB*)mem) + 1;	570 dec->mb_info_ = ((VP8MB*)mem) + 1;

460 mem += mb_info_size;	571 mem += mb_info_size;

461	572

462 dec->f_info_ = f_info_size ? (VP8FInfo*)mem : NULL;	573 dec->f_info_ = f_info_size ? (VP8FInfo*)mem : NULL;

463 mem += f_info_size;	574 mem += f_info_size;

464 dec->thread_ctx_.id_ = 0;	575 dec->thread_ctx_.id_ = 0;

465 dec->thread_ctx_.f_info_ = dec->f_info_;	576 dec->thread_ctx_.f_info_ = dec->f_info_;

466 if (dec->use_threads_) {	577 if (dec->mt_method_ > 0) {

467 // secondary cache line. The deblocking process need to make use of the	578 // secondary cache line. The deblocking process need to make use of the

468 // filtering strength from previous macroblock row, while the new ones	579 // filtering strength from previous macroblock row, while the new ones

469 // are being decoded in parallel. We'll just swap the pointers.	580 // are being decoded in parallel. We'll just swap the pointers.

470 dec->thread_ctx_.f_info_ += mb_w;	581 dec->thread_ctx_.f_info_ += mb_w;

471 }	582 }

472	583

473 mem = (uint8_t*)((uintptr_t)(mem + ALIGN_MASK) & ~ALIGN_MASK);	584 mem = (uint8_t*)((uintptr_t)(mem + ALIGN_MASK) & ~ALIGN_MASK);

474 assert((yuv_size & ALIGN_MASK) == 0);	585 assert((yuv_size & ALIGN_MASK) == 0);

475 dec->yuv_b_ = (uint8_t*)mem;	586 dec->yuv_b_ = (uint8_t*)mem;

476 mem += yuv_size;	587 mem += yuv_size;

477	588

478 dec->coeffs_ = (int16_t*)mem;	589 dec->mb_data_ = (VP8MBData*)mem;

479 mem += coeffs_size;	590 dec->thread_ctx_.mb_data_ = (VP8MBData*)mem;

	591 if (dec->mt_method_ == 2) {

	592 dec->thread_ctx_.mb_data_ += mb_w;

	593 }

	594 mem += mb_data_size;

480	595

481 dec->cache_y_stride_ = 16 * mb_w;	596 dec->cache_y_stride_ = 16 * mb_w;

482 dec->cache_uv_stride_ = 8 * mb_w;	597 dec->cache_uv_stride_ = 8 * mb_w;

483 {	598 {

484 const int extra_rows = kFilterExtraRows[dec->filter_type_];	599 const int extra_rows = kFilterExtraRows[dec->filter_type_];

485 const int extra_y = extra_rows * dec->cache_y_stride_;	600 const int extra_y = extra_rows * dec->cache_y_stride_;

486 const int extra_uv = (extra_rows / 2) * dec->cache_uv_stride_;	601 const int extra_uv = (extra_rows / 2) * dec->cache_uv_stride_;

487 dec->cache_y_ = ((uint8_t*)mem) + extra_y;	602 dec->cache_y_ = ((uint8_t*)mem) + extra_y;

488 dec->cache_u_ = dec->cache_y_	603 dec->cache_u_ = dec->cache_y_

489 + 16 * num_caches * dec->cache_y_stride_ + extra_uv;	604 + 16 * num_caches * dec->cache_y_stride_ + extra_uv;

490 dec->cache_v_ = dec->cache_u_	605 dec->cache_v_ = dec->cache_u_

491 + 8 * num_caches * dec->cache_uv_stride_ + extra_uv;	606 + 8 * num_caches * dec->cache_uv_stride_ + extra_uv;

492 dec->cache_id_ = 0;	607 dec->cache_id_ = 0;

493 }	608 }

494 mem += cache_size;	609 mem += cache_size;

495	610

496 // alpha plane	611 // alpha plane

497 dec->alpha_plane_ = alpha_size ? (uint8_t*)mem : NULL;	612 dec->alpha_plane_ = alpha_size ? (uint8_t*)mem : NULL;

498 mem += alpha_size;	613 mem += alpha_size;

499 assert(mem <= (uint8_t*)dec->mem_ + dec->mem_size_);	614 assert(mem <= (uint8_t*)dec->mem_ + dec->mem_size_);

500	615

501 // note: left-info is initialized once for all.	616 // note: left/top-info is initialized once for all.

502 memset(dec->mb_info_ - 1, 0, mb_info_size);	617 memset(dec->mb_info_ - 1, 0, mb_info_size);

	618 VP8InitScanline(dec); // initialize left too.

503	619

504 // initialize top	620 // initialize top

505 memset(dec->intra_t_, B_DC_PRED, intra_pred_mode_size);	621 memset(dec->intra_t_, B_DC_PRED, intra_pred_mode_size);

506	622

507 return 1;	623 return 1;

508 }	624 }

509	625

510 static void InitIo(VP8Decoder* const dec, VP8Io* io) {	626 static void InitIo(VP8Decoder* const dec, VP8Io* io) {

511 // prepare 'io'	627 // prepare 'io'

512 io->mb_y = 0;	628 io->mb_y = 0;

(...skipping 16 matching lines...) Expand all Loading...
529 //------------------------------------------------------------------------------	645 //------------------------------------------------------------------------------

530 // Main reconstruction function.	646 // Main reconstruction function.

531	647

532 static const int kScan[16] = {	648 static const int kScan[16] = {

533 0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS,	649 0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS,

534 0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS,	650 0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS,

535 0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS,	651 0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS,

536 0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS	652 0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS

537 };	653 };

538	654

539 static WEBP_INLINE int CheckMode(VP8Decoder* const dec, int mode) {	655 static int CheckMode(int mb_x, int mb_y, int mode) {

540 if (mode == B_DC_PRED) {	656 if (mode == B_DC_PRED) {

541 if (dec->mb_x_ == 0) {	657 if (mb_x == 0) {

542 return (dec->mb_y_ == 0) ? B_DC_PRED_NOTOPLEFT : B_DC_PRED_NOLEFT;	658 return (mb_y == 0) ? B_DC_PRED_NOTOPLEFT : B_DC_PRED_NOLEFT;

543 } else {	659 } else {

544 return (dec->mb_y_ == 0) ? B_DC_PRED_NOTOP : B_DC_PRED;	660 return (mb_y == 0) ? B_DC_PRED_NOTOP : B_DC_PRED;

545 }	661 }

546 }	662 }

547 return mode;	663 return mode;

548 }	664 }

549	665

550 static WEBP_INLINE void Copy32b(uint8_t* dst, uint8_t* src) {	666 static void Copy32b(uint8_t* dst, uint8_t* src) {

551 (uint32_t)dst = (uint32_t)src;	667 memcpy(dst, src, 4);

552 }	668 }

553	669

554 void VP8ReconstructBlock(VP8Decoder* const dec) {	670 static WEBP_INLINE void DoTransform(uint32_t bits, const int16_t* const src,

	671 uint8_t* const dst) {

	672 switch (bits >> 30) {

	673 case 3:

	674 VP8Transform(src, dst, 0);

	675 break;

	676 case 2:

	677 VP8TransformAC3(src, dst);

	678 break;

	679 case 1:

	680 VP8TransformDC(src, dst);

	681 break;

	682 default:

	683 break;

	684 }

	685 }

	686

	687 static void DoUVTransform(uint32_t bits, const int16_t* const src,

	688 uint8_t* const dst) {

	689 if (bits & 0xff) { // any non-zero coeff at all?

	690 if (bits & 0xaa) { // any non-zero AC coefficient?

	691 VP8TransformUV(src, dst); // note we don't use the AC3 variant for U/V

	692 } else {

	693 VP8TransformDCUV(src, dst);

	694 }

	695 }

	696 }

	697

	698 static void ReconstructRow(const VP8Decoder* const dec,

	699 const VP8ThreadContext* ctx) {

555 int j;	700 int j;

	701 int mb_x;

	702 const int mb_y = ctx->mb_y_;

	703 const int cache_id = ctx->id_;

556 uint8_t* const y_dst = dec->yuv_b_ + Y_OFF;	704 uint8_t* const y_dst = dec->yuv_b_ + Y_OFF;

557 uint8_t* const u_dst = dec->yuv_b_ + U_OFF;	705 uint8_t* const u_dst = dec->yuv_b_ + U_OFF;

558 uint8_t* const v_dst = dec->yuv_b_ + V_OFF;	706 uint8_t* const v_dst = dec->yuv_b_ + V_OFF;

	707 for (mb_x = 0; mb_x < dec->mb_w_; ++mb_x) {

	708 const VP8MBData* const block = ctx->mb_data_ + mb_x;

559	709

560 // Rotate in the left samples from previously decoded block. We move four	710 // Rotate in the left samples from previously decoded block. We move four

561 // pixels at a time for alignment reason, and because of in-loop filter.	711 // pixels at a time for alignment reason, and because of in-loop filter.

562 if (dec->mb_x_ > 0) {	712 if (mb_x > 0) {

563 for (j = -1; j < 16; ++j) {	713 for (j = -1; j < 16; ++j) {

564 Copy32b(&y_dst[j * BPS - 4], &y_dst[j * BPS + 12]);	714 Copy32b(&y_dst[j * BPS - 4], &y_dst[j * BPS + 12]);

	715 }

	716 for (j = -1; j < 8; ++j) {

	717 Copy32b(&u_dst[j * BPS - 4], &u_dst[j * BPS + 4]);

	718 Copy32b(&v_dst[j * BPS - 4], &v_dst[j * BPS + 4]);

	719 }

	720 } else {

	721 for (j = 0; j < 16; ++j) {

	722 y_dst[j * BPS - 1] = 129;

	723 }

	724 for (j = 0; j < 8; ++j) {

	725 u_dst[j * BPS - 1] = 129;

	726 v_dst[j * BPS - 1] = 129;

	727 }

	728 // Init top-left sample on left column too

	729 if (mb_y > 0) {

	730 y_dst[-1 - BPS] = u_dst[-1 - BPS] = v_dst[-1 - BPS] = 129;

	731 }

565 }	732 }

566 for (j = -1; j < 8; ++j) {	733 {

567 Copy32b(&u_dst[j * BPS - 4], &u_dst[j * BPS + 4]);	734 // bring top samples into the cache

568 Copy32b(&v_dst[j * BPS - 4], &v_dst[j * BPS + 4]);	735 VP8TopSamples* const top_yuv = dec->yuv_t_ + mb_x;

569 }	736 const int16_t* const coeffs = block->coeffs_;

570 } else {	737 uint32_t bits = block->non_zero_y_;

571 for (j = 0; j < 16; ++j) {	738 int n;

572 y_dst[j * BPS - 1] = 129;

573 }

574 for (j = 0; j < 8; ++j) {

575 u_dst[j * BPS - 1] = 129;

576 v_dst[j * BPS - 1] = 129;

577 }

578 // Init top-left sample on left column too

579 if (dec->mb_y_ > 0) {

580 y_dst[-1 - BPS] = u_dst[-1 - BPS] = v_dst[-1 - BPS] = 129;

581 }

582 }

583 {

584 // bring top samples into the cache

585 uint8_t* const top_y = dec->y_t_ + dec->mb_x_ * 16;

586 uint8_t* const top_u = dec->u_t_ + dec->mb_x_ * 8;

587 uint8_t* const top_v = dec->v_t_ + dec->mb_x_ * 8;

588 const int16_t* coeffs = dec->coeffs_;

589 int n;

590	739

591 if (dec->mb_y_ > 0) {	740 if (mb_y > 0) {

592 memcpy(y_dst - BPS, top_y, 16);	741 memcpy(y_dst - BPS, top_yuv[0].y, 16);

593 memcpy(u_dst - BPS, top_u, 8);	742 memcpy(u_dst - BPS, top_yuv[0].u, 8);

594 memcpy(v_dst - BPS, top_v, 8);	743 memcpy(v_dst - BPS, top_yuv[0].v, 8);

595 } else if (dec->mb_x_ == 0) {	744 } else if (mb_x == 0) {

596 // we only need to do this init once at block (0,0).	745 // we only need to do this init once at block (0,0).

597 // Afterward, it remains valid for the whole topmost row.	746 // Afterward, it remains valid for the whole topmost row.

598 memset(y_dst - BPS - 1, 127, 16 + 4 + 1);	747 memset(y_dst - BPS - 1, 127, 16 + 4 + 1);

599 memset(u_dst - BPS - 1, 127, 8 + 1);	748 memset(u_dst - BPS - 1, 127, 8 + 1);

600 memset(v_dst - BPS - 1, 127, 8 + 1);	749 memset(v_dst - BPS - 1, 127, 8 + 1);

601 }	750 }

602	751

603 // predict and add residuals	752 // predict and add residuals

	753 if (block->is_i4x4_) { // 4x4

	754 uint32_t* const top_right = (uint32_t*)(y_dst - BPS + 16);

604	755

605 if (dec->is_i4x4_) { // 4x4	756 if (mb_y > 0) {

606 uint32_t* const top_right = (uint32_t*)(y_dst - BPS + 16);	757 if (mb_x >= dec->mb_w_ - 1) { // on rightmost border

	758 memset(top_right, top_yuv[0].y[15], sizeof(*top_right));

	759 } else {

	760 memcpy(top_right, top_yuv[1].y, sizeof(*top_right));

	761 }

	762 }

	763 // replicate the top-right pixels below

	764 top_right[BPS] = top_right[2 * BPS] = top_right[3 * BPS] = top_right[0];

607	765

608 if (dec->mb_y_ > 0) {	766 // predict and add residuals for all 4x4 blocks in turn.

609 if (dec->mb_x_ >= dec->mb_w_ - 1) { // on rightmost border	767 for (n = 0; n < 16; ++n, bits <<= 2) {

610 top_right[0] = top_y[15] * 0x01010101u;	768 uint8_t* const dst = y_dst + kScan[n];

611 } else {	769 VP8PredLuma4[block->imodes_[n]](dst);

612 memcpy(top_right, top_y + 16, sizeof(*top_right));	770 DoTransform(bits, coeffs + n * 16, dst);

613 }	771 }

614 }	772 } else { // 16x16

615 // replicate the top-right pixels below	773 const int pred_func = CheckMode(mb_x, mb_y,

616 top_right[BPS] = top_right[2 * BPS] = top_right[3 * BPS] = top_right[0];	774 block->imodes_[0]);

617	775 VP8PredLuma16[pred_func](y_dst);

618 // predict and add residues for all 4x4 blocks in turn.	776 if (bits != 0) {

619 for (n = 0; n < 16; n++) {	777 for (n = 0; n < 16; ++n, bits <<= 2) {

620 uint8_t* const dst = y_dst + kScan[n];	778 DoTransform(bits, coeffs + n * 16, y_dst + kScan[n]);

621 VP8PredLuma4[dec->imodes_[n]](dst);

622 if (dec->non_zero_ac_ & (1 << n)) {

623 VP8Transform(coeffs + n * 16, dst, 0);

624 } else if (dec->non_zero_ & (1 << n)) { // only DC is present

625 VP8TransformDC(coeffs + n * 16, dst);

626 }

627 }

628 } else { // 16x16

629 const int pred_func = CheckMode(dec, dec->imodes_[0]);

630 VP8PredLuma16[pred_func](y_dst);

631 if (dec->non_zero_) {

632 for (n = 0; n < 16; n++) {

633 uint8_t* const dst = y_dst + kScan[n];

634 if (dec->non_zero_ac_ & (1 << n)) {

635 VP8Transform(coeffs + n * 16, dst, 0);

636 } else if (dec->non_zero_ & (1 << n)) { // only DC is present

637 VP8TransformDC(coeffs + n * 16, dst);

638 }	779 }

639 }	780 }

640 }	781 }

641 }	782 {

642 {	783 // Chroma

643 // Chroma	784 const uint32_t bits_uv = block->non_zero_uv_;

644 const int pred_func = CheckMode(dec, dec->uvmode_);	785 const int pred_func = CheckMode(mb_x, mb_y, block->uvmode_);

645 VP8PredChroma8[pred_func](u_dst);	786 VP8PredChroma8[pred_func](u_dst);

646 VP8PredChroma8[pred_func](v_dst);	787 VP8PredChroma8[pred_func](v_dst);

647	788 DoUVTransform(bits_uv >> 0, coeffs + 16 * 16, u_dst);

648 if (dec->non_zero_ & 0x0f0000) { // chroma-U	789 DoUVTransform(bits_uv >> 8, coeffs + 20 * 16, v_dst);

649 const int16_t* const u_coeffs = dec->coeffs_ + 16 * 16;

650 if (dec->non_zero_ac_ & 0x0f0000) {

651 VP8TransformUV(u_coeffs, u_dst);

652 } else {

653 VP8TransformDCUV(u_coeffs, u_dst);

654 }

655 }

656 if (dec->non_zero_ & 0xf00000) { // chroma-V

657 const int16_t* const v_coeffs = dec->coeffs_ + 20 * 16;

658 if (dec->non_zero_ac_ & 0xf00000) {

659 VP8TransformUV(v_coeffs, v_dst);

660 } else {

661 VP8TransformDCUV(v_coeffs, v_dst);

662 }

663 }	790 }

664	791

665 // stash away top samples for next block	792 // stash away top samples for next block

666 if (dec->mb_y_ < dec->mb_h_ - 1) {	793 if (mb_y < dec->mb_h_ - 1) {

667 memcpy(top_y, y_dst + 15 * BPS, 16);	794 memcpy(top_yuv[0].y, y_dst + 15 * BPS, 16);

668 memcpy(top_u, u_dst + 7 * BPS, 8);	795 memcpy(top_yuv[0].u, u_dst + 7 * BPS, 8);

669 memcpy(top_v, v_dst + 7 * BPS, 8);	796 memcpy(top_yuv[0].v, v_dst + 7 * BPS, 8);

670 }	797 }

671 }	798 }

672 }	799 // Transfer reconstructed samples from yuv_b_ cache to final destination.

673 // Transfer reconstructed samples from yuv_b_ cache to final destination.	800 {

674 {	801 const int y_offset = cache_id * 16 * dec->cache_y_stride_;

675 const int y_offset = dec->cache_id_ * 16 * dec->cache_y_stride_;	802 const int uv_offset = cache_id * 8 * dec->cache_uv_stride_;

676 const int uv_offset = dec->cache_id_ * 8 * dec->cache_uv_stride_;	803 uint8_t* const y_out = dec->cache_y_ + mb_x * 16 + y_offset;

677 uint8_t* const y_out = dec->cache_y_ + dec->mb_x_ * 16 + y_offset;	804 uint8_t* const u_out = dec->cache_u_ + mb_x * 8 + uv_offset;

678 uint8_t* const u_out = dec->cache_u_ + dec->mb_x_ * 8 + uv_offset;	805 uint8_t* const v_out = dec->cache_v_ + mb_x * 8 + uv_offset;

679 uint8_t* const v_out = dec->cache_v_ + dec->mb_x_ * 8 + uv_offset;	806 for (j = 0; j < 16; ++j) {

680 for (j = 0; j < 16; ++j) {	807 memcpy(y_out + j * dec->cache_y_stride_, y_dst + j * BPS, 16);

681 memcpy(y_out + j * dec->cache_y_stride_, y_dst + j * BPS, 16);	808 }

682 }	809 for (j = 0; j < 8; ++j) {

683 for (j = 0; j < 8; ++j) {	810 memcpy(u_out + j * dec->cache_uv_stride_, u_dst + j * BPS, 8);

684 memcpy(u_out + j * dec->cache_uv_stride_, u_dst + j * BPS, 8);	811 memcpy(v_out + j * dec->cache_uv_stride_, v_dst + j * BPS, 8);

685 memcpy(v_out + j * dec->cache_uv_stride_, v_dst + j * BPS, 8);	812 }

686 }	813 }

687 }	814 }

688 }	815 }

689	816

690 //------------------------------------------------------------------------------	817 //------------------------------------------------------------------------------

691	818

692 #if defined(__cplusplus) \|\| defined(c_plusplus)

693 } // extern "C"

694 #endif

OLD	NEW

« no previous file with comments | « third_party/libwebp/dec/decode_vp8.h ('k') | third_party/libwebp/dec/idec.c » ('j') | no next file with comments »